From b38395bf74636186e03e6dc67434b360b358aa7b Mon Sep 17 00:00:00 2001 From: Thomas Date: Tue, 26 Apr 2022 21:44:03 -0500 Subject: [PATCH] Add fideslang python module (#36) * Add fideslang python module * move the data example files into their own dir * add alllll of the files from fides * remove all of the non-fideslang code and update the setup.py * docker build is working * remove non fideslang tests * get `make check-all` passing * remove the old docs from fidesctl * all checks passing * serving docs locally works * update the codeowners and set docker to only push on tags Co-authored-by: Thomas La Piana --- .dockerignore | 11 + .gitattributes | 1 + .github/CODEOWNERS | 8 + .github/pull_request_template.md | 21 + .github/workflows/docker.yaml | 30 + .github/workflows/pr_checks.yml | 133 ++ .github/workflows/publish_package.yaml | 23 + .gitignore | 206 ++- .pre-commit-config.yaml | 39 + CHANGELOG.md | 21 + CODE_OF_CONDUCT.md | 5 + CONTRIBUTING.md | 5 + Dockerfile | 70 + MANIFEST.in | 6 + Makefile | 123 ++ README.md | 22 +- .../data_categories.csv | 158 +- .../data_categories.json | 0 .../data_categories.yml | 0 .../data_qualifiers.csv | 12 +- .../data_qualifiers.json | 0 .../data_qualifiers.yml | 0 .../data_subjects.csv | 34 +- .../data_subjects.json | 0 .../data_subjects.yml | 0 data_uses.csv => data_files/data_uses.csv | 50 +- data_uses.json => data_files/data_uses.json | 0 data_uses.yml => data_files/data_uses.yml | 0 demo_resources/demo_dataset.yml | 51 + demo_resources/demo_extended_taxonomy.yml | 24 + demo_resources/demo_organization.yml | 22 + demo_resources/demo_policy.yml | 20 + demo_resources/demo_registry.yml | 4 + demo_resources/demo_system.yml | 40 + dev-requirements.txt | 11 + docker-compose.yml | 19 + mkdocs/Dockerfile | 19 + optional-requirements.txt | 1 + pyproject.toml | 80 + requirements.txt | 3 + scripts/dev-requirements.txt | 1 - scripts/import_from_fidesctl.sh | 4 - setup.cfg | 31 + setup.py | 44 + src/__init__.py | 0 src/fideslang/__init__.py | 42 + src/fideslang/_version.py | 562 ++++++ src/fideslang/default_fixtures.py | 267 +++ src/fideslang/default_taxonomy.py | 847 +++++++++ src/fideslang/manifests.py | 84 + src/fideslang/models.py | 714 +++++++ src/fideslang/parse.py | 45 + src/fideslang/relationships.py | 81 + src/fideslang/utils.py | 22 + src/fideslang/validation.py | 95 + tests/conftest.py | 200 ++ .../failing_dataset_collection_taxonomy.yml | 54 + tests/data/failing_dataset_field_taxonomy.yml | 55 + tests/data/failing_dataset_taxonomy.yml | 54 + tests/data/failing_declaration_taxonomy.yml | 35 + tests/data/failing_nested_dataset.yml | 57 + tests/data/passing_declaration_taxonomy.yml | 35 + tests/data/sample_hierarchy_figures.json | 937 ++++++++++ tests/data/sample_manifest.yml | 13 + tests/fideslang/test_manifests.py | 157 ++ tests/fideslang/test_parse.py | 83 + tests/fideslang/test_relationships.py | 184 ++ tests/fideslang/test_validation.py | 278 +++ versioneer.py | 1648 +++++++++++++++++ 69 files changed, 7759 insertions(+), 142 deletions(-) create mode 100644 .dockerignore create mode 100644 .gitattributes create mode 100644 .github/CODEOWNERS create mode 100644 .github/pull_request_template.md create mode 100644 .github/workflows/docker.yaml create mode 100644 .github/workflows/pr_checks.yml create mode 100644 .github/workflows/publish_package.yaml create mode 100644 .pre-commit-config.yaml create mode 100644 CHANGELOG.md create mode 100644 CODE_OF_CONDUCT.md create mode 100644 CONTRIBUTING.md create mode 100644 Dockerfile create mode 100644 MANIFEST.in create mode 100644 Makefile rename data_categories.csv => data_files/data_categories.csv (99%) rename data_categories.json => data_files/data_categories.json (100%) rename data_categories.yml => data_files/data_categories.yml (100%) rename data_qualifiers.csv => data_files/data_qualifiers.csv (99%) rename data_qualifiers.json => data_files/data_qualifiers.json (100%) rename data_qualifiers.yml => data_files/data_qualifiers.yml (100%) rename data_subjects.csv => data_files/data_subjects.csv (99%) rename data_subjects.json => data_files/data_subjects.json (100%) rename data_subjects.yml => data_files/data_subjects.yml (100%) rename data_uses.csv => data_files/data_uses.csv (99%) rename data_uses.json => data_files/data_uses.json (100%) rename data_uses.yml => data_files/data_uses.yml (100%) create mode 100644 demo_resources/demo_dataset.yml create mode 100644 demo_resources/demo_extended_taxonomy.yml create mode 100644 demo_resources/demo_organization.yml create mode 100644 demo_resources/demo_policy.yml create mode 100644 demo_resources/demo_registry.yml create mode 100644 demo_resources/demo_system.yml create mode 100644 dev-requirements.txt create mode 100644 docker-compose.yml create mode 100644 mkdocs/Dockerfile create mode 100644 optional-requirements.txt create mode 100644 pyproject.toml create mode 100644 requirements.txt delete mode 100644 scripts/dev-requirements.txt delete mode 100755 scripts/import_from_fidesctl.sh create mode 100644 setup.cfg create mode 100644 setup.py create mode 100644 src/__init__.py create mode 100644 src/fideslang/__init__.py create mode 100644 src/fideslang/_version.py create mode 100644 src/fideslang/default_fixtures.py create mode 100644 src/fideslang/default_taxonomy.py create mode 100644 src/fideslang/manifests.py create mode 100644 src/fideslang/models.py create mode 100644 src/fideslang/parse.py create mode 100644 src/fideslang/relationships.py create mode 100644 src/fideslang/utils.py create mode 100644 src/fideslang/validation.py create mode 100644 tests/conftest.py create mode 100644 tests/data/failing_dataset_collection_taxonomy.yml create mode 100644 tests/data/failing_dataset_field_taxonomy.yml create mode 100644 tests/data/failing_dataset_taxonomy.yml create mode 100644 tests/data/failing_declaration_taxonomy.yml create mode 100644 tests/data/failing_nested_dataset.yml create mode 100644 tests/data/passing_declaration_taxonomy.yml create mode 100644 tests/data/sample_hierarchy_figures.json create mode 100644 tests/data/sample_manifest.yml create mode 100644 tests/fideslang/test_manifests.py create mode 100644 tests/fideslang/test_parse.py create mode 100644 tests/fideslang/test_relationships.py create mode 100644 tests/fideslang/test_validation.py create mode 100644 versioneer.py diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..ff7ca1ca --- /dev/null +++ b/.dockerignore @@ -0,0 +1,11 @@ +# Ignore existing build artifacts +build/ +dist/ +src/fideslang.egg-info/ + +# Ignore the docs +mkdocs/ + +# Ignore dev files +.github/ +.devcontainer/ diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..d93720f2 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +src/fideslang/_version.py export-subst diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 00000000..194119f6 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,8 @@ +# Set the fidesctl core team as the default codeowners + +* @ethyca/fides-control + +# Set the product/tech writing team as owners for the docs + +mkdocs/ @ethyca/docs-authors +README.md @ethyca/docs-authors diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 00000000..3a5fed32 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,21 @@ +Closes + +### Code Changes + +* [ ] _list your code changes here_ + +### Steps to Confirm + +* [ ] _list any manual steps taken to confirm the changes_ + +### Pre-Merge Checklist + +* [ ] All CI Pipelines Succeeded +* [ ] Documentation Updated +* [ ] Issue Requirements are Met +* [ ] Relevant Follow-Up Issues Created +* [ ] Update `CHANGELOG.md` + +### Description Of Changes + +_Write some things here about the changes and any potential caveats_ diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml new file mode 100644 index 00000000..f3152349 --- /dev/null +++ b/.github/workflows/docker.yaml @@ -0,0 +1,30 @@ +name: Docker Build & Push + +on: + push: + tags: + - "*" + +env: + DOCKER_USER: ethycaci + DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }} + +jobs: + push-fidesctl: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 # This is required to properly tag images + + - name: Login to DockerHub + uses: docker/login-action@v1 + with: + username: ${{ env.DOCKER_USER }} + password: ${{ env.DOCKER_TOKEN }} + + - name: Build Fideslang + run: make build + + - name: Push Fideslang + run: make push diff --git a/.github/workflows/pr_checks.yml b/.github/workflows/pr_checks.yml new file mode 100644 index 00000000..68b987af --- /dev/null +++ b/.github/workflows/pr_checks.yml @@ -0,0 +1,133 @@ +name: PR Checks + +on: + pull_request: + branches: + - main + +env: + CONTAINER: fideslang-local + IMAGE: ethyca/fideslang:local + +jobs: + Build: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v2 + + - name: Set up Docker Buildx + id: buildx + uses: docker/setup-buildx-action@v1 + + - name: Build fideslang container + uses: docker/build-push-action@v2 + with: + builder: ${{ steps.buildx.outputs.name }} + context: . + target: prod + outputs: type=docker,dest=/tmp/${{ env.CONTAINER }}.tar + push: false + tags: ${{ env.IMAGE }} + + - name: Upload fideslang container + uses: actions/upload-artifact@v2 + with: + name: ${{ env.CONTAINER }} + path: /tmp/${{ env.CONTAINER }}.tar + retention-days: 1 + + Black: + needs: Build + runs-on: ubuntu-latest + steps: + - name: Download fideslang container + uses: actions/download-artifact@v2 + with: + name: ${{ env.CONTAINER }} + path: /tmp/ + + - name: Load fideslang image + run: docker load --input /tmp/${{ env.CONTAINER }}.tar + + - name: Checkout + uses: actions/checkout@v2 + + - name: Run formatter + run: make black + + Pylint: + needs: Build + runs-on: ubuntu-latest + steps: + - name: Download fideslang container + uses: actions/download-artifact@v2 + with: + name: ${{ env.CONTAINER }} + path: /tmp/ + + - name: Load fideslang image + run: docker load --input /tmp/${{ env.CONTAINER }}.tar + + - name: Checkout + uses: actions/checkout@v2 + + - name: Run linter + run: make pylint + + Mypy: + needs: Build + runs-on: ubuntu-latest + steps: + - name: Download fideslang container + uses: actions/download-artifact@v2 + with: + name: ${{ env.CONTAINER }} + path: /tmp/ + + - name: Load fideslang image + run: docker load --input /tmp/${{ env.CONTAINER }}.tar + + - name: Checkout + uses: actions/checkout@v2 + + - name: Run typechecker + run: make mypy + + Xenon: + needs: Build + runs-on: ubuntu-latest + steps: + - name: Download fideslang container + uses: actions/download-artifact@v2 + with: + name: ${{ env.CONTAINER }} + path: /tmp/ + + - name: Load fideslang image + run: docker load --input /tmp/${{ env.CONTAINER }}.tar + + - name: Checkout + uses: actions/checkout@v2 + + - name: Run cyclomatic complexity check + run: make xenon + + Pytest: + needs: Build + runs-on: ubuntu-latest + steps: + - name: Download fideslang container + uses: actions/download-artifact@v2 + with: + name: ${{ env.CONTAINER }} + path: /tmp/ + + - name: Load fideslang image + run: docker load --input /tmp/${{ env.CONTAINER }}.tar + + - name: Checkout + uses: actions/checkout@v2 + + - name: Run unit test suite + run: make pytest diff --git a/.github/workflows/publish_package.yaml b/.github/workflows/publish_package.yaml new file mode 100644 index 00000000..87321f2b --- /dev/null +++ b/.github/workflows/publish_package.yaml @@ -0,0 +1,23 @@ +name: Publish fidesctl + +on: + push: + tags: + - "*" + +jobs: + upload_to_pypi: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Install Twine + run: pip install twine + + - name: Twine Upload + run: | + python setup.py sdist + twine upload dist/* + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} diff --git a/.gitignore b/.gitignore index 8dfee9df..9803eb3b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,206 @@ +## generic files to ignore +*~ +*.lock +*.DS_Store +*.swp +*.out + +# rails specific +*.sqlite3 +config/database.yml +log/* +tmp/* + +# java specific +*.class + +# docs +mkdocs/docs/site/ + +# python specific +*.pyc + +# xcode/iphone specific +build/* +*.pbxuser +*.mode2v3 +*.mode1v3 +*.perspective +*.perspectivev3 +*~.nib + +# akka specific +logs/* + +# sbt specific +target/ +project/boot +lib_managed/* +project/build/target +project/build/lib_managed +project/build/src_managed +project/plugins/lib_managed +project/plugins/target +project/plugins/src_managed +project/plugins/project +.bsp + +core/lib_managed +core/target +pubsub/lib_managed +pubsub/target + +# eclipse specific +.metadata +jrebel.lic +.settings +.classpath +.project + +.ensime* +*.sublime-* +.cache + +# intellij +*.eml +*.iml +*.ipr +*.iws +.*.sw? +.idea + +# metals +.metals +.bloop +project/metals.sbt + +# paulp script +/.lib/ + +# Editors +.vscode/ +.idea/ + +# Vagrant +.vagrant/ + +# Mac/OSX +.DS_Store + +# Windows +Thumbs.db + +# Source for the following rules: https://raw.githubusercontent.com/github/gitignore/master/Python.gitignore +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class +*.pyc + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*egg-info/ +.installed.cfg +*.egg +MANIFEST +conda-out/ + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ venv/ -mkdocs/site +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..ab623b9a --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,39 @@ +minimum_pre_commit_version: "2" + +repos: + - repo: local + hooks: + - id: docker + name: docker + entry: make build-local + files: "^src/" + types_or: [file, python] + language: system + + - id: black + name: black + entry: make black + files: "^src/" + types_or: [file, python] + language: system + + - id: mypy + name: mypy + entry: make mypy + files: "^src/" + types_or: [file, python] + language: system + + - id: xenon + name: xenon + entry: make xenon + files: "^src/" + types_or: [file, python] + language: system + + - id: pylint + name: pylint + entry: make pylint + files: "^src/" + types_or: [file, python] + language: system diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..cc999122 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,21 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/) + +The types of changes are: + +* `Added` for new features. +* `Changed` for changes in existing functionality. +* `Developer Experience` for changes in developer workflow or tooling. +* `Deprecated` for soon-to-be removed features. +* `Removed` for now removed features. +* `Fixed` for any bug fixes. +* `Security` in case of vulnerabilities. + +## [Unreleased](https://github.com/ethyca/fideslang/compare/1.0.0...main) + +### Added + +* Created the fideslang standalone python module diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 00000000..58bbf4e8 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,5 @@ +## Fides Code of Conduct + +The Fides project, which includes Fideslang, Fidesops, and Fidesctl, adheres to the following [Code of Conduct](https://ethyca.github.io/fides/community/code_of_conduct/). + +The Fides core team welcomes any contributions and suggestions to help make the community a better place 🤝 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000..baf64add --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,5 @@ +## Fides Contribution Guidelines + +The Fides project, which includes Fideslang, Fidesops, and Fidesctl, adheres to the following [Contribution Guidelines](https://ethyca.github.io/fides/development/overview/). + +The Fides core team welcomes any contributions and suggestions to help make the community a better place 🤝 diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..662c72b5 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,70 @@ +FROM --platform=linux/amd64 python:3.8-slim-buster as base + +# Update pip in the base image since we'll use it everywhere +RUN pip install -U pip + +####################### +## Tool Installation ## +####################### + +FROM base as builder + +RUN : \ + && apt-get update \ + && apt-get install \ + -y --no-install-recommends \ + curl \ + git \ + ipython \ + make \ + vim \ + g++ \ + gnupg \ + gcc \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +######################### +## Python Dependencies ## +######################### + +COPY dev-requirements.txt dev-requirements.txt +RUN pip install -r dev-requirements.txt + +COPY requirements.txt requirements.txt +RUN pip install -r requirements.txt + +COPY optional-requirements.txt optional-requirements.txt +RUN pip install -r optional-requirements.txt + +############################### +## General Application Setup ## +############################### + +COPY . /fideslang +WORKDIR /fideslang + +# Immediately flush to stdout, globally +ENV PYTHONUNBUFFERED=TRUE + +# Enable detection of running within Docker +ENV RUNNING_IN_DOCKER=TRUE + +################################### +## Application Development Setup ## +################################### + +FROM builder as dev + +# Install fideslang as a symlink +RUN pip install -e ".[all]" + +################################## +## Production Application Setup ## +################################## + +FROM builder as prod + +# Install without a symlink +RUN python setup.py sdist +RUN pip install dist/fideslang-*.tar.gz diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 00000000..08070873 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,6 @@ +include LICENSE +include README.md +include requirements.txt +include dev-requirements.txt +include versioneer.py +include src/fideslang/_version.py diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..53dada1a --- /dev/null +++ b/Makefile @@ -0,0 +1,123 @@ +.DEFAULT_GOAL := help + +#################### +# CONSTANTS +#################### +REGISTRY := ethyca +IMAGE_TAG := $(shell git fetch --force --tags && git describe --tags --dirty --always) + +# Image Names & Tags +IMAGE_NAME := fideslang +IMAGE := $(REGISTRY)/$(IMAGE_NAME):$(IMAGE_TAG) +IMAGE_LOCAL := $(REGISTRY)/$(IMAGE_NAME):local +IMAGE_LATEST := $(REGISTRY)/$(IMAGE_NAME):latest + +# Disable TTY to perserve output within Github Actions logs +# CI env variable is always set to true in Github Actions +ifeq "$(CI)" "true" + CI_ARGS:=--no-TTY +endif + +# Run in Compose +RUN = docker compose run --rm $(CI_ARGS) $(IMAGE_NAME) + +.PHONY: help +help: + @echo -------------------- + @echo Development Targets: + @echo ---- + @echo build - Builds the fidesctl Docker image. + @echo ---- + @echo check-all - Run all CI checks except for externally dependent ones. + @echo ---- + @echo clean - Runs Docker commands to clean up the docker local environment. + @echo ---- + @echo shell - Spins up the database, API, and starts a shell within the API container to run CLI commands. + @echo ---- + @echo docs-serve - Spins up the docs server on localhost:8000 + @echo -------------------- + +#################### +# Dev +#################### + +.PHONY: shell +shell: build-local + @echo "Setting up a local development shell... (press CTRL-D to exit)" + @$(RUN) /bin/bash + @make teardown + +#################### +# Docker +#################### + +build: + docker build --target=prod --tag $(IMAGE) . + +build-local: + docker build --target=dev --tag $(IMAGE_LOCAL) . + +# The production image is used for running tests in CI +build-local-prod: + docker build --target=prod --tag $(IMAGE_LOCAL) . + +push: build + docker tag $(IMAGE) $(IMAGE_LATEST) + docker push $(IMAGE) + docker push $(IMAGE_LATEST) + +#################### +# CI +#################### + +black: + @$(RUN) black --check src/ + +# The order of dependent targets here is intentional +check-all: teardown build-local-prod check-install black \ + pylint mypy xenon pytest + @echo "Running formatter, linter, typechecker and tests..." + +check-install: + @echo "Checking that fidesctl is installed..." + @$(RUN) python -c "import fideslang" + +mypy: + @$(RUN) mypy + +pylint: + @$(RUN) pylint src/ + +pytest: + @$(RUN) pytest -x + +xenon: + @$(RUN) xenon src \ + --max-absolute B \ + --max-modules B \ + --max-average A \ + --ignore "data, tests, docs" \ + --exclude "src/fideslang/_version.py" + +#################### +# Utils +#################### + +.PHONY: clean +clean: + @echo "Doing docker cleanup for this project..." + @docker compose down --remove-orphans --volumes --rmi all + @docker system prune --force + @echo "Clean complete!" + +.PHONY: teardown +teardown: + @echo "Tearing down the dev environment..." + @docker compose down --remove-orphans + @echo "Teardown complete" + +.PHONY: docs-serve +docs-serve: + @docker compose build docs + @docker compose run --rm --service-ports $(CI_ARGS) docs \ + /bin/bash -c "mkdocs serve --dev-addr=0.0.0.0:8000" diff --git a/README.md b/README.md index 3176732b..5a5049f7 100644 --- a/README.md +++ b/README.md @@ -3,20 +3,23 @@ [![License: CC BY 4.0](https://img.shields.io/badge/License-CC%20BY%204.0-lightgrey.svg)](https://creativecommons.org/licenses/by/4.0/) [![Twitter](https://img.shields.io/twitter/follow/ethyca?style=social)](https://twitter.com/ethyca) ![Fideslang banner](mkdocs/docs/img/fideslang.png "Fideslang banner") + ## Overview + Fideslang or Fides Language is a privacy taxonomy and working draft of a proposed structure to describe data and data processing behaviors as part of a typical software development process. Our hope with standarizing this definition publicly with the community is to derive an interopable standard for describe types of data and how they're being used in applications to simplify global privacy regulations. **To view the detailed taxonomy documentation, please visit [https://ethyca.github.io/fideslang/](https://ethyca.github.io/fideslang)** - + ## Summary of Taxonomy Classification Groups -The taxonomy currently comprises of four classification groups that are used together to easily describe all of the data types and associated processing behaviors of an entire tech stack; both the application processes and any data storage. +The taxonomy currently comprises of four classification groups that are used together to easily describe all of the data types and associated processing behaviors of an entire tech stack; both the application processes and any data storage. [![alt text](mkdocs/docs/img/taxonomy.png "Privacy Taxonomy Declaration")](https://ethyca.github.io/fideslang/explorer/) [Click here to view an interactive visualization of the taxonomy](https://ethyca.github.io/fideslang/explorer/) ### 1. Data Categories + Data Categories are labels to describe the type of data processed by a system. You can assign one or more data categories to a field when classifying a system. Data Categories are heirarchical with natural inheritance, meaning you can classify data coarsely with a high-level category (e.g. `user.provided` data), or you can classify it with greater precision using subclasses (e.g. `user.provided.identifiable.contact.email` data). @@ -24,13 +27,15 @@ Data Categories are heirarchical with natural inheritance, meaning you can class Learn more about [Data Categories in the taxonomy reference now](https://ethyca.github.io/fideslang/data_categories/). ### 2. Data Use Categories -Data Use Categories are labels that describe how, or for what purpose(s) a component of your system is using data. Similar to data categories, you can assign one or multiple Data Use Categories to a system. + +Data Use Categories are labels that describe how, or for what purpose(s) a component of your system is using data. Similar to data categories, you can assign one or multiple Data Use Categories to a system. Data Use Categories are also heirarchical with natural inheritance, meaning you can easily describe what you're using data for either coarsely (e.g. `provide.system.operations`) or with more precision using subclasses (e.g. `provide.system.operations.support.optimization`). Learn more about [Data Use Categories in the taxonomy reference now](https://ethyca.github.io/fideslang/data_uses/). ### 3. Data Subject Categories + Data Subject is a label commonly used in the regulatory world to describe the users of a system who's data is being processed. In many systems a generic user label may be sufficient, however the Privacy Taxonomy is intended to provide greater control through specificity where needed. Examples of Data Subject include: Examples of this are: @@ -44,8 +49,9 @@ Examples of this are: Learn more about [Data Subject Categories in the taxonomy reference now](https://ethyca.github.io/fideslang/data_subjects/). ### 4. Data Identification Qualifiers -Data Identification Qualifiers describe the degree of identification of the given data. -Think of this as a spectrum: on one end is completely anonymous data, i.e. it is impossible to identify an individual from it, and on the other end is data that specifically identifies an individual. + +Data Identification Qualifiers describe the degree of identification of the given data. +Think of this as a spectrum: on one end is completely anonymous data, i.e. it is impossible to identify an individual from it, and on the other end is data that specifically identifies an individual. Along this spectrum are labels that describe the degree of identification that a given data might provide, such as: @@ -56,11 +62,9 @@ Along this spectrum are labels that describe the degree of identification that a Learn more about [Data Identification Qualifiers in the taxonomy reference now](https://ethyca.github.io/fideslang/data_qualifiers/). ### Extensibility & Interopability -The taxonomy is designed to support common privacy compliance regulations and standards out of the box, these include GDPR, CCPA, LGPD and ISO 19944. + +The taxonomy is designed to support common privacy compliance regulations and standards out of the box, these include GDPR, CCPA, LGPD and ISO 19944. You can extend the taxonomy to support your system needs. If you do this, we recommend extending from the existing class structures to ensure interopability inside and outside your organization. If you have suggestions for missing classifications or concepts, please submit them for addition. - - - diff --git a/data_categories.csv b/data_files/data_categories.csv similarity index 99% rename from data_categories.csv rename to data_files/data_categories.csv index 0acf19e6..72a6437a 100644 --- a/data_categories.csv +++ b/data_files/data_categories.csv @@ -1,79 +1,79 @@ -privacy_key,name,parent_key,description -data_category,Data Category,, -account,Account Data,data_category,Data related to a system account. -account.contact,Account Contact Data,account,Contact data related to a system account. -account.contact.city,Account City,account.contact,Account's city level address data. -account.contact.country,Account Country,account.contact,Account's country level address data. -account.contact.email,Account Email,account.contact,Account's email address. -account.contact.phone_number,Account Phone Number,account.contact,Account's phone number. -account.contact.postal_code,Account Postal Code,account.contact,Account's postal code. -account.contact.state,Account State,account.contact,Account's state level address data. -account.contact.street,Account Street,account.contact,Account's street level address. -account.payment,Payment Data,account,Payment data related to system account. -account.payment.financial_account_number,Account Payment Financial Account Number,account.payment,"Financial account number for an account's payment card, bank account, or other financial system." -system,System Data,data_category,"Data unique to, and under control of the system." -system.authentication,Authentication Data,system,Data used to manage access to the system. -system.operations,Operations Data,system,Data used for system operations. -user,User Data,data_category,"Data related to the user of the system, either provided directly or derived based on their usage." -user.derived,Derived Data,user,Data derived from user provided data or as a result of user actions in the system. -user.derived.identifiable,Derived User Identifiable Data,user.derived,"Derived data that is linked to, or identifies a user." -user.derived.identifiable.biometric_health,Biometric Health Data,user.derived.identifiable,Encoded characteristic collected about a user. -user.derived.identifiable.browsing_history,Browsing History,user.derived.identifiable,Content browsing history of a user. -user.derived.identifiable.demographic,Demographic Data,user.derived.identifiable,Demographic data about a user. -user.derived.identifiable.contact,Derived Contact Data,user.derived.identifiable,Contact data collected about a user. -user.derived.identifiable.device,Device Data,user.derived.identifiable,"Data related to a user's device, configuration and setting." -user.derived.identifiable.device.cookie_id,Cookie ID,user.derived.identifiable.device,Cookie unique identification number. -user.derived.identifiable.device.device_id,Device ID,user.derived.identifiable.device,Device unique identification number. -user.derived.identifiable.device.ip_address,IP Address,user.derived.identifiable.device,Unique identifier related to device connection. -user.derived.identifiable.gender,Derived Gender,user.derived.identifiable,Gender of an individual. -user.derived.identifiable.location,Location Data,user.derived.identifiable,Records of the location of a user. -user.derived.identifiable.media_consumption,Media Consumption Data,user.derived.identifiable,Media type consumption data of a user. -user.derived.identifiable.non_specific_age,Derived Non-Specific Age,user.derived.identifiable,Age range data. -user.derived.identifiable.observed,Observed Data,user.derived.identifiable,Data collected through observation of use of the system. -user.derived.identifiable.profiling,Profiling Data,user.derived.identifiable,Preference and interest data about a user. -user.derived.identifiable.race,Derived Race,user.derived.identifiable,Racial or ethnic origin data. -user.derived.identifiable.religious_belief,Derived Religious Belief,user.derived.identifiable,Religion or religious belief. -user.derived.identifiable.search_history,Search History,user.derived.identifiable,Records of search history and queries of a user. -user.derived.identifiable.sexual_orientation,Derived Sexual Orientation,user.derived.identifiable,Personal sex life or sexual data. -user.derived.identifiable.social,Social Data,user.derived.identifiable,Social activity and interaction data. -user.derived.identifiable.telemetry,Telemetry Data,user.derived.identifiable,User identifiable measurement data from system sensors and monitoring. -user.derived.identifiable.unique_id,Unique ID,user.derived.identifiable,Unique identifier for a user assigned through system use. -user.derived.identifiable.user_sensor,User Sensor Data,user.derived.identifiable,Measurement data derived about a user's environment through system use. -user.derived.identifiable.organization,Organization Identifiable Data,user.derived.identifiable,"Derived data that is linked to, or identifies an organization." -user.derived.identifiable.workplace,Derived Workplace,user.derived.identifiable,Organization of employment. -user.derived.nonidentifiable,Derived User Non-Identifiable Data,user.derived,Non-user identifiable data derived related to a user as a result of user actions in the system. -user.derived.nonidentifiable.sensor,Sensor Data,user.derived.nonidentifiable,Non-user identifiable measurement data derived from sensors and monitoring systems. -user.provided,User Provided Data,user,Data provided or created directly by a user of the system. -user.provided.identifiable,User Provided Identifiable Data,user.provided,Data provided or created directly by a user that is linked to or identifies a user. -user.provided.identifiable.biometric,Biometric Data,user.provided.identifiable,Encoded characteristics provided by a user. -user.provided.identifiable.childrens,Children's Data,user.provided.identifiable,Data relating to children. -user.provided.identifiable.contact,Provided Contact Data,user.provided.identifiable,User provided contact data for purposes other than account management. -user.provided.identifiable.contact.city,User Provided City,user.provided.identifiable.contact,User's city level address data. -user.provided.identifiable.contact.country,User Provided Country,user.provided.identifiable.contact,User's country level address data. -user.provided.identifiable.contact.email,User Provided Email,user.provided.identifiable.contact,User's provided email address. -user.provided.identifiable.contact.phone_number,User Provided Phone Number,user.provided.identifiable.contact,User's phone number. -user.provided.identifiable.contact.postal_code,User Provided Postal Code,user.provided.identifiable.contact,User's postal code. -user.provided.identifiable.contact.state,User Provided State,user.provided.identifiable.contact,User's state level address data. -user.provided.identifiable.contact.street,User Provided Street,user.provided.identifiable.contact,User's street level address data. -user.provided.identifiable.credentials,Credentials,user.provided.identifiable,User provided authentication data. -user.provided.identifiable.credentials.biometric_credentials,Biometric Credentials,user.provided.identifiable.credentials,Credentials for system authentication. -user.provided.identifiable.credentials.password,Password,user.provided.identifiable.credentials,Password for system authentication. -user.provided.identifiable.date_of_birth,Date of Birth,user.provided.identifiable,User's date of birth. -user.provided.identifiable.financial,Financial Data,user.provided.identifiable,Payment data and financial history. -user.provided.identifiable.financial.account_number,User Provided Financial Account Number,user.provided.identifiable.financial,"User's account number for a payment card, bank account, or other financial system." -user.provided.identifiable.gender,User Provided Gender,user.provided.identifiable,Gender of an individual. -user.provided.identifiable.genetic,Genetic Data,user.provided.identifiable,Data about the genetic makeup provided by a user. -user.provided.identifiable.government_id,Government ID,user.provided.identifiable,State provided identification data. -user.provided.identifiable.government_id.drivers_license_number,Driver's License Number,user.provided.identifiable.government_id,State issued driving identification number. -user.provided.identifiable.government_id.national_identification_number,National Identification Number,user.provided.identifiable.government_id,State issued personal identification number. -user.provided.identifiable.government_id.passport_number,Passport Number,user.provided.identifiable.government_id,State issued passport data. -user.provided.identifiable.health_and_medical,Health and Medical Data,user.provided.identifiable,Health records or individual's personal medical information. -user.provided.identifiable.job_title,Job Title,user.provided.identifiable,Professional data. -user.provided.identifiable.name,Name,user.provided.identifiable,User's real name. -user.provided.identifiable.non_specific_age,User Provided Non-Specific Age,user.provided.identifiable,Age range data. -user.provided.identifiable.political_opinion,Political Opinion,user.provided.identifiable,Data related to the individual's political opinions. -user.provided.identifiable.race,User Provided Race,user.provided.identifiable,Racial or ethnic origin data. -user.provided.identifiable.religious_belief,User Provided Religious Belief,user.provided.identifiable,Religion or religious belief. -user.provided.identifiable.sexual_orientation,User Provided Sexual Orientation,user.provided.identifiable,Personal sex life or sexual data. -user.provided.identifiable.workplace,User Provided Workplace,user.provided.identifiable,Organization of employment. -user.provided.nonidentifiable,User Provided Non-Identifiable Data,user.provided,Data provided or created directly by a user that is not identifiable. +privacy_key,name,parent_key,description +data_category,Data Category,, +account,Account Data,data_category,Data related to a system account. +account.contact,Account Contact Data,account,Contact data related to a system account. +account.contact.city,Account City,account.contact,Account's city level address data. +account.contact.country,Account Country,account.contact,Account's country level address data. +account.contact.email,Account Email,account.contact,Account's email address. +account.contact.phone_number,Account Phone Number,account.contact,Account's phone number. +account.contact.postal_code,Account Postal Code,account.contact,Account's postal code. +account.contact.state,Account State,account.contact,Account's state level address data. +account.contact.street,Account Street,account.contact,Account's street level address. +account.payment,Payment Data,account,Payment data related to system account. +account.payment.financial_account_number,Account Payment Financial Account Number,account.payment,"Financial account number for an account's payment card, bank account, or other financial system." +system,System Data,data_category,"Data unique to, and under control of the system." +system.authentication,Authentication Data,system,Data used to manage access to the system. +system.operations,Operations Data,system,Data used for system operations. +user,User Data,data_category,"Data related to the user of the system, either provided directly or derived based on their usage." +user.derived,Derived Data,user,Data derived from user provided data or as a result of user actions in the system. +user.derived.identifiable,Derived User Identifiable Data,user.derived,"Derived data that is linked to, or identifies a user." +user.derived.identifiable.biometric_health,Biometric Health Data,user.derived.identifiable,Encoded characteristic collected about a user. +user.derived.identifiable.browsing_history,Browsing History,user.derived.identifiable,Content browsing history of a user. +user.derived.identifiable.demographic,Demographic Data,user.derived.identifiable,Demographic data about a user. +user.derived.identifiable.contact,Derived Contact Data,user.derived.identifiable,Contact data collected about a user. +user.derived.identifiable.device,Device Data,user.derived.identifiable,"Data related to a user's device, configuration and setting." +user.derived.identifiable.device.cookie_id,Cookie ID,user.derived.identifiable.device,Cookie unique identification number. +user.derived.identifiable.device.device_id,Device ID,user.derived.identifiable.device,Device unique identification number. +user.derived.identifiable.device.ip_address,IP Address,user.derived.identifiable.device,Unique identifier related to device connection. +user.derived.identifiable.gender,Derived Gender,user.derived.identifiable,Gender of an individual. +user.derived.identifiable.location,Location Data,user.derived.identifiable,Records of the location of a user. +user.derived.identifiable.media_consumption,Media Consumption Data,user.derived.identifiable,Media type consumption data of a user. +user.derived.identifiable.non_specific_age,Derived Non-Specific Age,user.derived.identifiable,Age range data. +user.derived.identifiable.observed,Observed Data,user.derived.identifiable,Data collected through observation of use of the system. +user.derived.identifiable.profiling,Profiling Data,user.derived.identifiable,Preference and interest data about a user. +user.derived.identifiable.race,Derived Race,user.derived.identifiable,Racial or ethnic origin data. +user.derived.identifiable.religious_belief,Derived Religious Belief,user.derived.identifiable,Religion or religious belief. +user.derived.identifiable.search_history,Search History,user.derived.identifiable,Records of search history and queries of a user. +user.derived.identifiable.sexual_orientation,Derived Sexual Orientation,user.derived.identifiable,Personal sex life or sexual data. +user.derived.identifiable.social,Social Data,user.derived.identifiable,Social activity and interaction data. +user.derived.identifiable.telemetry,Telemetry Data,user.derived.identifiable,User identifiable measurement data from system sensors and monitoring. +user.derived.identifiable.unique_id,Unique ID,user.derived.identifiable,Unique identifier for a user assigned through system use. +user.derived.identifiable.user_sensor,User Sensor Data,user.derived.identifiable,Measurement data derived about a user's environment through system use. +user.derived.identifiable.organization,Organization Identifiable Data,user.derived.identifiable,"Derived data that is linked to, or identifies an organization." +user.derived.identifiable.workplace,Derived Workplace,user.derived.identifiable,Organization of employment. +user.derived.nonidentifiable,Derived User Non-Identifiable Data,user.derived,Non-user identifiable data derived related to a user as a result of user actions in the system. +user.derived.nonidentifiable.sensor,Sensor Data,user.derived.nonidentifiable,Non-user identifiable measurement data derived from sensors and monitoring systems. +user.provided,User Provided Data,user,Data provided or created directly by a user of the system. +user.provided.identifiable,User Provided Identifiable Data,user.provided,Data provided or created directly by a user that is linked to or identifies a user. +user.provided.identifiable.biometric,Biometric Data,user.provided.identifiable,Encoded characteristics provided by a user. +user.provided.identifiable.childrens,Children's Data,user.provided.identifiable,Data relating to children. +user.provided.identifiable.contact,Provided Contact Data,user.provided.identifiable,User provided contact data for purposes other than account management. +user.provided.identifiable.contact.city,User Provided City,user.provided.identifiable.contact,User's city level address data. +user.provided.identifiable.contact.country,User Provided Country,user.provided.identifiable.contact,User's country level address data. +user.provided.identifiable.contact.email,User Provided Email,user.provided.identifiable.contact,User's provided email address. +user.provided.identifiable.contact.phone_number,User Provided Phone Number,user.provided.identifiable.contact,User's phone number. +user.provided.identifiable.contact.postal_code,User Provided Postal Code,user.provided.identifiable.contact,User's postal code. +user.provided.identifiable.contact.state,User Provided State,user.provided.identifiable.contact,User's state level address data. +user.provided.identifiable.contact.street,User Provided Street,user.provided.identifiable.contact,User's street level address data. +user.provided.identifiable.credentials,Credentials,user.provided.identifiable,User provided authentication data. +user.provided.identifiable.credentials.biometric_credentials,Biometric Credentials,user.provided.identifiable.credentials,Credentials for system authentication. +user.provided.identifiable.credentials.password,Password,user.provided.identifiable.credentials,Password for system authentication. +user.provided.identifiable.date_of_birth,Date of Birth,user.provided.identifiable,User's date of birth. +user.provided.identifiable.financial,Financial Data,user.provided.identifiable,Payment data and financial history. +user.provided.identifiable.financial.account_number,User Provided Financial Account Number,user.provided.identifiable.financial,"User's account number for a payment card, bank account, or other financial system." +user.provided.identifiable.gender,User Provided Gender,user.provided.identifiable,Gender of an individual. +user.provided.identifiable.genetic,Genetic Data,user.provided.identifiable,Data about the genetic makeup provided by a user. +user.provided.identifiable.government_id,Government ID,user.provided.identifiable,State provided identification data. +user.provided.identifiable.government_id.drivers_license_number,Driver's License Number,user.provided.identifiable.government_id,State issued driving identification number. +user.provided.identifiable.government_id.national_identification_number,National Identification Number,user.provided.identifiable.government_id,State issued personal identification number. +user.provided.identifiable.government_id.passport_number,Passport Number,user.provided.identifiable.government_id,State issued passport data. +user.provided.identifiable.health_and_medical,Health and Medical Data,user.provided.identifiable,Health records or individual's personal medical information. +user.provided.identifiable.job_title,Job Title,user.provided.identifiable,Professional data. +user.provided.identifiable.name,Name,user.provided.identifiable,User's real name. +user.provided.identifiable.non_specific_age,User Provided Non-Specific Age,user.provided.identifiable,Age range data. +user.provided.identifiable.political_opinion,Political Opinion,user.provided.identifiable,Data related to the individual's political opinions. +user.provided.identifiable.race,User Provided Race,user.provided.identifiable,Racial or ethnic origin data. +user.provided.identifiable.religious_belief,User Provided Religious Belief,user.provided.identifiable,Religion or religious belief. +user.provided.identifiable.sexual_orientation,User Provided Sexual Orientation,user.provided.identifiable,Personal sex life or sexual data. +user.provided.identifiable.workplace,User Provided Workplace,user.provided.identifiable,Organization of employment. +user.provided.nonidentifiable,User Provided Non-Identifiable Data,user.provided,Data provided or created directly by a user that is not identifiable. diff --git a/data_categories.json b/data_files/data_categories.json similarity index 100% rename from data_categories.json rename to data_files/data_categories.json diff --git a/data_categories.yml b/data_files/data_categories.yml similarity index 100% rename from data_categories.yml rename to data_files/data_categories.yml diff --git a/data_qualifiers.csv b/data_files/data_qualifiers.csv similarity index 99% rename from data_qualifiers.csv rename to data_files/data_qualifiers.csv index 9ddcde96..eac50817 100644 --- a/data_qualifiers.csv +++ b/data_files/data_qualifiers.csv @@ -1,7 +1,7 @@ -privacy_key,name,parent_key,description -data_qualifier,Data Qualifier,, -aggregated,Aggregated Data,data_qualifier,Statistical data that does not contain individually identifying information but includes information about groups of individuals that renders individual identification impossible. -aggregated.anonymized,Anonymized Data,aggregated,Data where all attributes have been sufficiently altered that the individaul cannot be reidentified by this data or in combination with other datasets. -aggregated.anonymized.unlinked_pseudonymized,Unlinked Pseudonymized Data,aggregated.anonymized,"Data for which all identifiers have been substituted with unrelated values and linkages broken such that it may not be reversed, even by the party that performed the pseudonymization." -aggregated.anonymized.unlinked_pseudonymized.pseudonymized,Pseudonymized Data,aggregated.anonymized.unlinked_pseudonymized,"Data for which all identifiers have been substituted with unrelated values, rendering the individual unidentifiable and cannot be reasonably reversed other than by the party that performed the pseudonymization." +privacy_key,name,parent_key,description +data_qualifier,Data Qualifier,, +aggregated,Aggregated Data,data_qualifier,Statistical data that does not contain individually identifying information but includes information about groups of individuals that renders individual identification impossible. +aggregated.anonymized,Anonymized Data,aggregated,Data where all attributes have been sufficiently altered that the individaul cannot be reidentified by this data or in combination with other datasets. +aggregated.anonymized.unlinked_pseudonymized,Unlinked Pseudonymized Data,aggregated.anonymized,"Data for which all identifiers have been substituted with unrelated values and linkages broken such that it may not be reversed, even by the party that performed the pseudonymization." +aggregated.anonymized.unlinked_pseudonymized.pseudonymized,Pseudonymized Data,aggregated.anonymized.unlinked_pseudonymized,"Data for which all identifiers have been substituted with unrelated values, rendering the individual unidentifiable and cannot be reasonably reversed other than by the party that performed the pseudonymization." aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified,Identified Data,aggregated.anonymized.unlinked_pseudonymized.pseudonymized,Data that directly identifies an individual. \ No newline at end of file diff --git a/data_qualifiers.json b/data_files/data_qualifiers.json similarity index 100% rename from data_qualifiers.json rename to data_files/data_qualifiers.json diff --git a/data_qualifiers.yml b/data_files/data_qualifiers.yml similarity index 100% rename from data_qualifiers.yml rename to data_files/data_qualifiers.yml diff --git a/data_subjects.csv b/data_files/data_subjects.csv similarity index 99% rename from data_subjects.csv rename to data_files/data_subjects.csv index 259f38fe..de890d79 100644 --- a/data_subjects.csv +++ b/data_files/data_subjects.csv @@ -1,17 +1,17 @@ -privacy_key,name,parent_key,description -data_subject,Data Subject,, -anonymous_user,Anonymous User,data_subject,An individual that is unidentifiable to the systems. Note - This should only be applied to truly anonymous users where there is no risk of re-identification -citizen_voter,Citizen Voter,data_subject,An individual registered to voter with a state or authority. -commuter,Commuter,data_subject,An individual that is traveling or transiting in the context of location tracking. -consultant,Consultant,data_subject,An individual employed in a consultative/temporary capacity by the organization. -customer,Custom,data_subject,An individual or other organization that purchases goods or services from the organization. -employee,Employee,data_subject,An individual employed by the organization. -job_applicant,Job Applicant,data_subject,An individual applying for employment to the organization. -next_of_kin,Next of Kin,data_subject,A relative of any other individual subject where such a relationship is known. -passenger,Passenger,data_subject,An individual traveling on some means of provided transport. -patient,Patient,data_subject,An individual identified for the purposes of any medical care. -prospect,Prospect,data_subject,An individual or organization to whom an organization is selling goods or services. -shareholder,Shareholder,data_subject,An individual or organization that holds equity in the organization. -supplier_vendor,Supplier/Vendor,data_subject,An individual or organization that provides services or goods to the organization. -trainee,Trainee,data_subject,An individual undergoing training by the organization. -visitor,Visitor,data_subject,An individual visiting a location. +privacy_key,name,parent_key,description +data_subject,Data Subject,, +anonymous_user,Anonymous User,data_subject,An individual that is unidentifiable to the systems. Note - This should only be applied to truly anonymous users where there is no risk of re-identification +citizen_voter,Citizen Voter,data_subject,An individual registered to voter with a state or authority. +commuter,Commuter,data_subject,An individual that is traveling or transiting in the context of location tracking. +consultant,Consultant,data_subject,An individual employed in a consultative/temporary capacity by the organization. +customer,Custom,data_subject,An individual or other organization that purchases goods or services from the organization. +employee,Employee,data_subject,An individual employed by the organization. +job_applicant,Job Applicant,data_subject,An individual applying for employment to the organization. +next_of_kin,Next of Kin,data_subject,A relative of any other individual subject where such a relationship is known. +passenger,Passenger,data_subject,An individual traveling on some means of provided transport. +patient,Patient,data_subject,An individual identified for the purposes of any medical care. +prospect,Prospect,data_subject,An individual or organization to whom an organization is selling goods or services. +shareholder,Shareholder,data_subject,An individual or organization that holds equity in the organization. +supplier_vendor,Supplier/Vendor,data_subject,An individual or organization that provides services or goods to the organization. +trainee,Trainee,data_subject,An individual undergoing training by the organization. +visitor,Visitor,data_subject,An individual visiting a location. diff --git a/data_subjects.json b/data_files/data_subjects.json similarity index 100% rename from data_subjects.json rename to data_files/data_subjects.json diff --git a/data_subjects.yml b/data_files/data_subjects.yml similarity index 100% rename from data_subjects.yml rename to data_files/data_subjects.yml diff --git a/data_uses.csv b/data_files/data_uses.csv similarity index 99% rename from data_uses.csv rename to data_files/data_uses.csv index d8e5d9a4..29e20a3a 100644 --- a/data_uses.csv +++ b/data_files/data_uses.csv @@ -1,25 +1,25 @@ -fides_key,name,parent_key,description -data_use,Data Use,, -provide,Provide the capability,data_use,"Provide, give, or make available the product, service, application or system." -provide.system,System,provide,"The source system, product, service or application being provided to the user." -provide.system.operations,System Operations,provide.system,Use of specified data categories to operate and protect the system in order to provide the service. -provide.system.operations.support,Operations Support,provide.system.operations,Use of specified data categories to provide support for operation and protection of the system in order to provide the service. -provide.system.operations.support.optimization,Support Optimization,provide.system.operations.support,Use of specified data categories to optimize and improve support operations in order to provide the service. -provide.system.upgrades,Offer Upgrades,provide.system,Offer upgrades or upsales such as increased capacity for the service based on monitoring of service usage. -improve,Improve the capability,data_use,"Improve the product, service, application or system." -improve.system,System,improve,"The source system, product, service or application being improved." -personalize,Personalize the capability,data_use,"Personalize the product, service, application or system." -personalize.system,System,personalize,"The source system, product, service or application being personalized." -advertising,"Advertising, Marketing or Promotion",data_use,The promotion of products or services targeted to users based on the the processing of user provided data in the system. -advertising.first_party,First Party Advertising,advertising,The promotion of products or services targeting users based on processing of derviced data from prior use of the system. -advertising.third_party,Third Party Advertising,advertising,The promotion of products or services targeting users based on processing of specific categories of data acquired from third party sources. -advertising.first_party.contextual,First Party Contextual Advertising,advertising.first_party,The promotion of products or services targeted to users based on the processing of derived data from the users prior use of the services. -advertising.first_party.personalized,First Party Personalized Advertising,advertising.first_party,The targeting and changing of promotional content based on processing of specific data categories from the user. -advertising.third_party.personalized,Third Party Personalized Advertising,advertising.third_party,The targeting and changing of promotional content based on processing of specific categories of user data acquired from third party sources. -third_party_sharing,Third Party Sharing,data_use,The transfer of specified data categories to third parties outside of the system/application's scope. -third_party_sharing.payment_processing,Sharing for Processing Payments,third_party_sharing,Sharing of specified data categories with a third party for payment processing. -third_party_sharing.personalized_advertising,Sharing for Personalized Advertising,third_party_sharing,Sharing of specified data categories for the purpose of marketing/advertising/promotion. -third_party_sharing.fraud_detection,Sharing for Fraud Detection,third_party_sharing,Sharing of specified data categories with a third party fo fraud prevention/detection. -third_party_sharing.legal_obligation,Sharing for Legal Obligation,third_party_sharing,"Sharing of data for legal obligations, including contracts, applicable laws or regulations." -collect,Collect,data_use,Collecting and storing data in order to use it for another purpose such as data training for ML. -train_ai_system,Train AI System,data_use,"Training an AI system. Please note when this data use is specified, the method and degree to which a user may be directly identified in the resulting AI system should be appended." +fides_key,name,parent_key,description +data_use,Data Use,, +provide,Provide the capability,data_use,"Provide, give, or make available the product, service, application or system." +provide.system,System,provide,"The source system, product, service or application being provided to the user." +provide.system.operations,System Operations,provide.system,Use of specified data categories to operate and protect the system in order to provide the service. +provide.system.operations.support,Operations Support,provide.system.operations,Use of specified data categories to provide support for operation and protection of the system in order to provide the service. +provide.system.operations.support.optimization,Support Optimization,provide.system.operations.support,Use of specified data categories to optimize and improve support operations in order to provide the service. +provide.system.upgrades,Offer Upgrades,provide.system,Offer upgrades or upsales such as increased capacity for the service based on monitoring of service usage. +improve,Improve the capability,data_use,"Improve the product, service, application or system." +improve.system,System,improve,"The source system, product, service or application being improved." +personalize,Personalize the capability,data_use,"Personalize the product, service, application or system." +personalize.system,System,personalize,"The source system, product, service or application being personalized." +advertising,"Advertising, Marketing or Promotion",data_use,The promotion of products or services targeted to users based on the the processing of user provided data in the system. +advertising.first_party,First Party Advertising,advertising,The promotion of products or services targeting users based on processing of derviced data from prior use of the system. +advertising.third_party,Third Party Advertising,advertising,The promotion of products or services targeting users based on processing of specific categories of data acquired from third party sources. +advertising.first_party.contextual,First Party Contextual Advertising,advertising.first_party,The promotion of products or services targeted to users based on the processing of derived data from the users prior use of the services. +advertising.first_party.personalized,First Party Personalized Advertising,advertising.first_party,The targeting and changing of promotional content based on processing of specific data categories from the user. +advertising.third_party.personalized,Third Party Personalized Advertising,advertising.third_party,The targeting and changing of promotional content based on processing of specific categories of user data acquired from third party sources. +third_party_sharing,Third Party Sharing,data_use,The transfer of specified data categories to third parties outside of the system/application's scope. +third_party_sharing.payment_processing,Sharing for Processing Payments,third_party_sharing,Sharing of specified data categories with a third party for payment processing. +third_party_sharing.personalized_advertising,Sharing for Personalized Advertising,third_party_sharing,Sharing of specified data categories for the purpose of marketing/advertising/promotion. +third_party_sharing.fraud_detection,Sharing for Fraud Detection,third_party_sharing,Sharing of specified data categories with a third party fo fraud prevention/detection. +third_party_sharing.legal_obligation,Sharing for Legal Obligation,third_party_sharing,"Sharing of data for legal obligations, including contracts, applicable laws or regulations." +collect,Collect,data_use,Collecting and storing data in order to use it for another purpose such as data training for ML. +train_ai_system,Train AI System,data_use,"Training an AI system. Please note when this data use is specified, the method and degree to which a user may be directly identified in the resulting AI system should be appended." diff --git a/data_uses.json b/data_files/data_uses.json similarity index 100% rename from data_uses.json rename to data_files/data_uses.json diff --git a/data_uses.yml b/data_files/data_uses.yml similarity index 100% rename from data_uses.yml rename to data_files/data_uses.yml diff --git a/demo_resources/demo_dataset.yml b/demo_resources/demo_dataset.yml new file mode 100644 index 00000000..b5af3829 --- /dev/null +++ b/demo_resources/demo_dataset.yml @@ -0,0 +1,51 @@ +dataset: +- fides_key: demo_users_dataset + organization_fides_key: default_organization + name: Demo Users Dataset + description: Data collected about users for our analytics system. + meta: null + data_categories: [] + data_qualifiers: + - aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: "30 days after account deletion" + third_country_transfers: + - GBR + - CAN + collections: + - name: users + description: User information + data_categories: [] + data_qualifiers: + - aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + fields: + - name: created_at + description: User's creation timestamp + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + - name: email + description: User's Email + data_categories: + - user.provided.identifiable.contact.email + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: Account termination + - name: first_name + description: User's first name + data_categories: + - user.provided.identifiable.name + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: Account termination + - name: food_preference + description: User's favorite food + data_categories: [] + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + - name: state + description: User's State + data_categories: + - user.provided.identifiable.contact.state + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + - name: uuid + description: User's unique ID + data_categories: + - user.derived.identifiable.unique_id + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified diff --git a/demo_resources/demo_extended_taxonomy.yml b/demo_resources/demo_extended_taxonomy.yml new file mode 100644 index 00000000..b75af01d --- /dev/null +++ b/demo_resources/demo_extended_taxonomy.yml @@ -0,0 +1,24 @@ +data_use: + - fides_key: third_party_sharing.personalized_advertising.direct_marketing + name: Direct Marketing + description: User information for direct marketing purposes + recipients: + - Processor - marketing co. + legal_basis: Legitimate Interests + special_category: Vital Interests + legitimate_interest_impact_assessment: https://example.org/legitimate_interest_assessment + parent_key: third_party_sharing.personalized_advertising + +data_subject: + - fides_key: potential_customer + name: Potential Customer + description: A prospective individual or other organization that purchases goods or services from the organization. + rights: + strategy: INCLUDE + values: + - Informed + - Access + - Rectification + - Erasure + - Object + automated_decisions_or_profiling: true diff --git a/demo_resources/demo_organization.yml b/demo_resources/demo_organization.yml new file mode 100644 index 00000000..8ce3b6e9 --- /dev/null +++ b/demo_resources/demo_organization.yml @@ -0,0 +1,22 @@ +organization: + - fides_key: default_organization + name: Demo Organization + description: An e-commerce organization + security_policy: https://ethyca.com/privacy-policy/ + controller: + name: Con Troller + address: 123 demo street, New York, NY, USA + email: controller@demo_company.com + phone: +1 555 555 5555 + data_protection_officer: + name: DataPro Tection + address: 123 demo street, New York, NY, USA + email: dpo@demo_company.com + phone: +1 555 555 5555 + representative: + name: Rep Resentative + address: 123 demo street, New York, NY, USA + email: representative@demo_company.com + phone: +1 555 555 5555 + + diff --git a/demo_resources/demo_policy.yml b/demo_resources/demo_policy.yml new file mode 100644 index 00000000..38b8bb67 --- /dev/null +++ b/demo_resources/demo_policy.yml @@ -0,0 +1,20 @@ +policy: + - fides_key: demo_privacy_policy + name: Demo Privacy Policy + description: The main privacy policy for the organization. + rules: + - name: Reject Direct Marketing + description: Disallow collecting any user contact info to use for marketing. + data_categories: + matches: ANY + values: + - user.provided.identifiable.contact + data_uses: + matches: ANY + values: + - advertising + data_subjects: + matches: ANY + values: + - customer + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified diff --git a/demo_resources/demo_registry.yml b/demo_resources/demo_registry.yml new file mode 100644 index 00000000..7d4658e0 --- /dev/null +++ b/demo_resources/demo_registry.yml @@ -0,0 +1,4 @@ +registry: + - fides_key: demo_registry + name: Demo Registry + description: Demo Registry diff --git a/demo_resources/demo_system.yml b/demo_resources/demo_system.yml new file mode 100644 index 00000000..a4c257c6 --- /dev/null +++ b/demo_resources/demo_system.yml @@ -0,0 +1,40 @@ +system: + - fides_key: demo_analytics_system + name: Demo Analytics System + description: A system used for analyzing customer behaviour. + system_type: Service + administrating_department: Engineering + data_responsibility_title: Controller + third_country_transfers: + - USA + - CAN + data_protection_impact_assessment: + is_required: True + progress: Complete + link: https://example.org/analytics_system_data_protection_impact_assessment + privacy_declarations: + - name: Analyze customer behaviour for improvements. + data_categories: + - user.provided.identifiable.contact + - user.derived.identifiable.device.cookie_id + data_use: improve.system + data_subjects: + - customer + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + dataset_references: + - demo_users_dataset + + - fides_key: demo_marketing_system + name: Demo Marketing System + description: Collect data about our users for marketing. + system_type: Service + data_responsibility_title: Processor + privacy_declarations: + - name: Collect data for marketing + data_categories: + #- user.provided.identifiable.contact # uncomment to add this category to the system + - user.derived.identifiable.device.cookie_id + data_use: advertising + data_subjects: + - customer + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified diff --git a/dev-requirements.txt b/dev-requirements.txt new file mode 100644 index 00000000..43549c1e --- /dev/null +++ b/dev-requirements.txt @@ -0,0 +1,11 @@ +black==22.3 +ipython +mypy==0.910 +packaging==20.9 +pre-commit==2.9.3 +pylint==2.6.0 +pytest==6.2.2 +pytest-cov==2.11.1 +requests-mock==1.8.0 +types-PyYAML +xenon==0.7.3 diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 00000000..cf4a2370 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,19 @@ +services: + fideslang: + image: ethyca/fideslang:local + command: /bin/bash + volumes: + - type: bind + source: . + target: /fideslang + read_only: False + + docs: + build: + context: mkdocs/ + volumes: + - ./mkdocs:/docs + expose: + - 8000 + ports: + - "8000:8000" diff --git a/mkdocs/Dockerfile b/mkdocs/Dockerfile new file mode 100644 index 00000000..c8630a5a --- /dev/null +++ b/mkdocs/Dockerfile @@ -0,0 +1,19 @@ +FROM python:3.8-slim-buster + +# Install auxiliary software +RUN apt-get update +RUN apt-get install -y \ + git \ + vim + +# Update pip and install requirements +RUN pip install -U pip +COPY requirements.txt requirements.txt +RUN pip install -r requirements.txt + +# Copy in the required files +COPY . /docs +WORKDIR /docs + +EXPOSE 8000 +CMD ["mkdocs", "serve", "--dev-addr=0.0.0.0:8000"] diff --git a/optional-requirements.txt b/optional-requirements.txt new file mode 100644 index 00000000..eb6d6ff3 --- /dev/null +++ b/optional-requirements.txt @@ -0,0 +1 @@ +fastapi>=0.68 diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..f334e561 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,80 @@ +[build-system] +requires = ["setuptools", "wheel", "versioneer-518"] # PEP 508 specifications. + +###### +# MyPy +###### +# [tool.mypy] Waiting for new version of Mypy +# warn_unused_configs = true +# ignore_missing_imports = true +# pretty = true + +####### +# Black +####### +[tool.black] +py39 = true +line-length = 88 +include = '\.pyi?$' +exclude = ''' +/( + \.git + | \.hg + | \.mypy_cache + | \.tox + | \.venv + | _build + | buck-out + | build + | dist + + # The following are specific to Black, you probably don't want those. + | blib2to3 + | tests/data +)/ +''' + +######## +# Pylint +######## +[tool.pylint.messages_control] +ignore="migrations" +disable=[ + "line-too-long", + "too-few-public-methods", + "duplicate-code", + "import-error", + "unused-argument", + "no-self-use", + "import-outside-toplevel", + "unsubscriptable-object", # Otherwise throws errors on certain Type annotations + "too-many-arguments", + "missing-module-docstring", + "raise-missing-from", + "fixme"] +extension-pkg-whitelist = "pydantic" + +[tool.pylint.reports] +reports="no" +output-format="colorized" + +[tool.pylint.format] +max-line-length="88" + +[tool.pylint.basic] +good-names="_,i,setUp,tearDown,maxDiff,default_app_config" + +######## +# Pytest +######## +[tool.pytest.ini_options] +testpaths="tests" +log_level = "DEBUG" +addopts = ["--cov=fideslang", + "--cov-report=term-missing", + "-vv", + "--no-cov-on-fail", + "--disable-pytest-warnings"] +markers = [ + "unit: only runs tests that don't require non-python dependencies (i.e. a database)", +] diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..7e5c6712 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +pydantic>=1.8.1,<2.0.0 +pyyaml>=5,<6 +versioneer==0.19 diff --git a/scripts/dev-requirements.txt b/scripts/dev-requirements.txt deleted file mode 100644 index 932bd69e..00000000 --- a/scripts/dev-requirements.txt +++ /dev/null @@ -1 +0,0 @@ -PyYAML==5.4.1 diff --git a/scripts/import_from_fidesctl.sh b/scripts/import_from_fidesctl.sh deleted file mode 100755 index e5f6c602..00000000 --- a/scripts/import_from_fidesctl.sh +++ /dev/null @@ -1,4 +0,0 @@ -#! /bin/zsh -# Copy over the source taxonomy files from fidesctl project - -cp ~/git/fides/fidesctl/default_taxonomy/*.yml ../ diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 00000000..52137b12 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,31 @@ +[versioneer] +VCS = git +style = pep440 +versionfile_source = src/fideslang/_version.py +versionfile_build = fideslang/_version.py +tag_prefix = +parentdir_prefix = + +[mypy] +show_error_codes = True +check_untyped_defs = True +disallow_any_explicit = True +disallow_untyped_defs = True +files = src/ +exclude = migrations/ +ignore_missing_imports = True +no_implicit_reexport = True +pretty = True +plugins = pydantic.mypy +warn_redundant_casts = True +warn_unused_configs = True +warn_unused_ignores = True + +[pydantic-mypy] +init_forbid_extra = True +init_typed = True +warn_required_dynamic_aliases = True +warn_untyped_fields = True + +[mypy-src.fideslang._version] +ignore_errors = True diff --git a/setup.py b/setup.py new file mode 100644 index 00000000..3e7202ea --- /dev/null +++ b/setup.py @@ -0,0 +1,44 @@ +import pathlib +from setuptools import setup, find_packages +import versioneer + +here = pathlib.Path(__file__).parent.resolve() +long_description = open("README.md").read() + +# Requirements + +install_requires = open("requirements.txt").read().strip().split("\n") +dev_requires = open("dev-requirements.txt").read().strip().split("\n") + +# Human-Readable/Reusable Extras +# Add these to `optional-requirements.txt` as well +fastapi = "fastapi==0.68" + +extras = {"fastapi": [fastapi]} +extras["all"] = sum(extras.values(), []) + +setup( + name="fideslang", + version=versioneer.get_version(), + cmdclass=versioneer.get_cmdclass(), + description="Fides Taxonomy Language", + long_description=long_description, + long_description_content_type="text/markdown", + url="https://github.com/ethyca/fideslang", + python_requires=">=3.7, <4", + packages=find_packages(where="src"), + package_dir={"": "src"}, + author="Ethyca, Inc.", + author_email="fidesteam@ethyca.com", + license="Apache License 2.0", + install_requires=install_requires, + dev_requires=dev_requires, + extras_require=extras, + classifiers=[ + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Topic :: Software Development :: Libraries", + ], +) diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/fideslang/__init__.py b/src/fideslang/__init__.py new file mode 100644 index 00000000..e8734d1d --- /dev/null +++ b/src/fideslang/__init__.py @@ -0,0 +1,42 @@ +""" +Exports various fideslang objects for easier use elsewhere. +""" + +from typing import Dict, Type, Union + +# Export the Models +from .models import ( + DataCategory, + DataQualifier, + DataSubject, + DataUse, + Dataset, + DatasetField, + Evaluation, + FidesModel, + Organization, + Policy, + PolicyRule, + PrivacyRule, + Registry, + PrivacyDeclaration, + System, + Taxonomy, +) +from .default_taxonomy import DEFAULT_TAXONOMY +from .default_fixtures import COUNTRY_CODES + +FidesModelType = Union[Type[FidesModel], Type[Evaluation]] +model_map: Dict[str, FidesModelType] = { + "data_category": DataCategory, + "data_qualifier": DataQualifier, + "data_subject": DataSubject, + "data_use": DataUse, + "dataset": Dataset, + "organization": Organization, + "policy": Policy, + "registry": Registry, + "system": System, + "evaluation": Evaluation, +} +model_list = list(model_map.keys()) diff --git a/src/fideslang/_version.py b/src/fideslang/_version.py new file mode 100644 index 00000000..603a4b2c --- /dev/null +++ b/src/fideslang/_version.py @@ -0,0 +1,562 @@ +# This file helps to compute a version number in source trees obtained from +# git-archive tarball (such as those provided by githubs download-from-tag +# feature). Distribution tarballs (built by setup.py sdist) and build +# directories (produced by setup.py build) will contain a much shorter file +# that just contains the computed version number. + +# This file is released into the public domain. Generated by +# versioneer-0.19 (https://github.com/python-versioneer/python-versioneer) + +"""Git implementation of _version.py.""" +# pylint: skip-file + +import errno +import os +import re +import subprocess +import sys + + +def get_keywords(): + """Get the keywords needed to look up the version information.""" + # these strings will be replaced by git during git-archive. + # setup.py/versioneer.py will grep for the variable names, so they must + # each be defined on a line of their own. _version.py will just call + # get_keywords(). + git_refnames = "$Format:%d$" + git_full = "$Format:%H$" + git_date = "$Format:%ci$" + keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} + return keywords + + +class VersioneerConfig: + """Container for Versioneer configuration parameters.""" + + +def get_config(): + """Create, populate and return the VersioneerConfig() object.""" + # these strings are filled in when 'setup.py versioneer' creates + # _version.py + cfg = VersioneerConfig() + cfg.VCS = "git" + cfg.style = "pep440" + cfg.tag_prefix = "" + cfg.parentdir_prefix = "" + cfg.versionfile_source = "src/fideslang/_version.py" + cfg.verbose = False + return cfg + + +class NotThisMethod(Exception): + """Exception raised if a method is not valid for the current scenario.""" + + +LONG_VERSION_PY = {} +HANDLERS = {} + + +def register_vcs_handler(vcs, method): # decorator + """Create decorator to mark a method as the handler of a VCS.""" + + def decorate(f): + """Store f in HANDLERS[vcs][method].""" + if vcs not in HANDLERS: + HANDLERS[vcs] = {} + HANDLERS[vcs][method] = f + return f + + return decorate + + +def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None): + """Call the given command(s).""" + assert isinstance(commands, list) + p = None + for c in commands: + try: + dispcmd = str([c] + args) + # remember shell=False, so use git.cmd on windows, not just git + p = subprocess.Popen( + [c] + args, + cwd=cwd, + env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr else None), + ) + break + except EnvironmentError: + e = sys.exc_info()[1] + if e.errno == errno.ENOENT: + continue + if verbose: + print("unable to run %s" % dispcmd) + print(e) + return None, None + else: + if verbose: + print("unable to find command, tried %s" % (commands,)) + return None, None + stdout = p.communicate()[0].strip().decode() + if p.returncode != 0: + if verbose: + print("unable to run %s (error)" % dispcmd) + print("stdout was %s" % stdout) + return None, p.returncode + return stdout, p.returncode + + +def versions_from_parentdir(parentdir_prefix, root, verbose): + """Try to determine the version from the parent directory name. + + Source tarballs conventionally unpack into a directory that includes both + the project name and a version string. We will also support searching up + two directory levels for an appropriately named parent directory + """ + rootdirs = [] + + for i in range(3): + dirname = os.path.basename(root) + if dirname.startswith(parentdir_prefix): + return { + "version": dirname[len(parentdir_prefix) :], + "full-revisionid": None, + "dirty": False, + "error": None, + "date": None, + } + else: + rootdirs.append(root) + root = os.path.dirname(root) # up a level + + if verbose: + print( + "Tried directories %s but none started with prefix %s" + % (str(rootdirs), parentdir_prefix) + ) + raise NotThisMethod("rootdir doesn't start with parentdir_prefix") + + +@register_vcs_handler("git", "get_keywords") +def git_get_keywords(versionfile_abs): + """Extract version information from the given file.""" + # the code embedded in _version.py can just fetch the value of these + # keywords. When used from setup.py, we don't want to import _version.py, + # so we do it with a regexp instead. This function is not used from + # _version.py. + keywords = {} + try: + f = open(versionfile_abs, "r") + for line in f.readlines(): + if line.strip().startswith("git_refnames ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["refnames"] = mo.group(1) + if line.strip().startswith("git_full ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["full"] = mo.group(1) + if line.strip().startswith("git_date ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["date"] = mo.group(1) + f.close() + except EnvironmentError: + pass + return keywords + + +@register_vcs_handler("git", "keywords") +def git_versions_from_keywords(keywords, tag_prefix, verbose): + """Get version information from git keywords.""" + if not keywords: + raise NotThisMethod("no keywords at all, weird") + date = keywords.get("date") + if date is not None: + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] + + # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant + # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 + # -like" string, which we must then edit to make compliant), because + # it's been around since git-1.5.3, and it's too difficult to + # discover which version we're using, or to work around using an + # older one. + date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + refnames = keywords["refnames"].strip() + if refnames.startswith("$Format"): + if verbose: + print("keywords are unexpanded, not using") + raise NotThisMethod("unexpanded keywords, not a git-archive tarball") + refs = set([r.strip() for r in refnames.strip("()").split(",")]) + # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of + # just "foo-1.0". If we see a "tag: " prefix, prefer those. + TAG = "tag: " + tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)]) + if not tags: + # Either we're using git < 1.8.3, or there really are no tags. We use + # a heuristic: assume all version tags have a digit. The old git %d + # expansion behaves like git log --decorate=short and strips out the + # refs/heads/ and refs/tags/ prefixes that would let us distinguish + # between branches and tags. By ignoring refnames without digits, we + # filter out many common branch names like "release" and + # "stabilization", as well as "HEAD" and "master". + tags = set([r for r in refs if re.search(r"\d", r)]) + if verbose: + print("discarding '%s', no digits" % ",".join(refs - tags)) + if verbose: + print("likely tags: %s" % ",".join(sorted(tags))) + for ref in sorted(tags): + # sorting will prefer e.g. "2.0" over "2.0rc1" + if ref.startswith(tag_prefix): + r = ref[len(tag_prefix) :] + if verbose: + print("picking %s" % r) + return { + "version": r, + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": None, + "date": date, + } + # no suitable tags, so version is "0+unknown", but full hex is still there + if verbose: + print("no suitable tags, using unknown + full revision id") + return { + "version": "0+unknown", + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": "no suitable tags", + "date": None, + } + + +@register_vcs_handler("git", "pieces_from_vcs") +def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): + """Get version from 'git describe' in the root of the source tree. + + This only gets called if the git-archive 'subst' keywords were *not* + expanded, and _version.py hasn't already been rewritten with a short + version string, meaning we're inside a checked out source tree. + """ + GITS = ["git"] + if sys.platform == "win32": + GITS = ["git.cmd", "git.exe"] + + out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True) + if rc != 0: + if verbose: + print("Directory %s not under git control" % root) + raise NotThisMethod("'git rev-parse --git-dir' returned error") + + # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] + # if there isn't one, this yields HEX[-dirty] (no NUM) + describe_out, rc = run_command( + GITS, + [ + "describe", + "--tags", + "--dirty", + "--always", + "--long", + "--match", + "%s*" % tag_prefix, + ], + cwd=root, + ) + # --long was added in git-1.5.5 + if describe_out is None: + raise NotThisMethod("'git describe' failed") + describe_out = describe_out.strip() + full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) + if full_out is None: + raise NotThisMethod("'git rev-parse' failed") + full_out = full_out.strip() + + pieces = {} + pieces["long"] = full_out + pieces["short"] = full_out[:7] # maybe improved later + pieces["error"] = None + + # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] + # TAG might have hyphens. + git_describe = describe_out + + # look for -dirty suffix + dirty = git_describe.endswith("-dirty") + pieces["dirty"] = dirty + if dirty: + git_describe = git_describe[: git_describe.rindex("-dirty")] + + # now we have TAG-NUM-gHEX or HEX + + if "-" in git_describe: + # TAG-NUM-gHEX + mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe) + if not mo: + # unparseable. Maybe git-describe is misbehaving? + pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out + return pieces + + # tag + full_tag = mo.group(1) + if not full_tag.startswith(tag_prefix): + if verbose: + fmt = "tag '%s' doesn't start with prefix '%s'" + print(fmt % (full_tag, tag_prefix)) + pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( + full_tag, + tag_prefix, + ) + return pieces + pieces["closest-tag"] = full_tag[len(tag_prefix) :] + + # distance: number of commits since tag + pieces["distance"] = int(mo.group(2)) + + # commit: short hex revision ID + pieces["short"] = mo.group(3) + + else: + # HEX: no tags + pieces["closest-tag"] = None + count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root) + pieces["distance"] = int(count_out) # total number of commits + + # commit date: see ISO-8601 comment in git_versions_from_keywords() + date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[ + 0 + ].strip() + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] + pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + + return pieces + + +def plus_or_dot(pieces): + """Return a + if we don't already have one, else return a .""" + if "+" in pieces.get("closest-tag", ""): + return "." + return "+" + + +def render_pep440(pieces): + """Build up version string, with post-release "local version identifier". + + Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you + get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty + + Exceptions: + 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += plus_or_dot(pieces) + rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_pre(pieces): + """TAG[.post0.devDISTANCE] -- No -dirty. + + Exceptions: + 1: no tags. 0.post0.devDISTANCE + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += ".post0.dev%d" % pieces["distance"] + else: + # exception #1 + rendered = "0.post0.dev%d" % pieces["distance"] + return rendered + + +def render_pep440_post(pieces): + """TAG[.postDISTANCE[.dev0]+gHEX] . + + The ".dev0" means dirty. Note that .dev0 sorts backwards + (a dirty tree will appear "older" than the corresponding clean one), + but you shouldn't be releasing software with -dirty anyways. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%s" % pieces["short"] + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += "+g%s" % pieces["short"] + return rendered + + +def render_pep440_old(pieces): + """TAG[.postDISTANCE[.dev0]] . + + The ".dev0" means dirty. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + return rendered + + +def render_git_describe(pieces): + """TAG[-DISTANCE-gHEX][-dirty]. + + Like 'git describe --tags --dirty --always'. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render_git_describe_long(pieces): + """TAG-DISTANCE-gHEX[-dirty]. + + Like 'git describe --tags --dirty --always -long'. + The distance/hash is unconditional. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render(pieces, style): + """Render the given version pieces into the requested style.""" + if pieces["error"]: + return { + "version": "unknown", + "full-revisionid": pieces.get("long"), + "dirty": None, + "error": pieces["error"], + "date": None, + } + + if not style or style == "default": + style = "pep440" # the default + + if style == "pep440": + rendered = render_pep440(pieces) + elif style == "pep440-pre": + rendered = render_pep440_pre(pieces) + elif style == "pep440-post": + rendered = render_pep440_post(pieces) + elif style == "pep440-old": + rendered = render_pep440_old(pieces) + elif style == "git-describe": + rendered = render_git_describe(pieces) + elif style == "git-describe-long": + rendered = render_git_describe_long(pieces) + else: + raise ValueError("unknown style '%s'" % style) + + return { + "version": rendered, + "full-revisionid": pieces["long"], + "dirty": pieces["dirty"], + "error": None, + "date": pieces.get("date"), + } + + +def get_versions(): + """Get version information or return default if unable to do so.""" + # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have + # __file__, we can work backwards from there to the root. Some + # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which + # case we can only use expanded keywords. + + cfg = get_config() + verbose = cfg.verbose + + try: + return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose) + except NotThisMethod: + pass + + try: + root = os.path.realpath(__file__) + # versionfile_source is the relative path from the top of the source + # tree (where the .git directory might live) to this file. Invert + # this to find the root from __file__. + for i in cfg.versionfile_source.split("/"): + root = os.path.dirname(root) + except NameError: + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to find root of source tree", + "date": None, + } + + try: + pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) + return render(pieces, cfg.style) + except NotThisMethod: + pass + + try: + if cfg.parentdir_prefix: + return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) + except NotThisMethod: + pass + + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to compute version", + "date": None, + } diff --git a/src/fideslang/default_fixtures.py b/src/fideslang/default_fixtures.py new file mode 100644 index 00000000..9838a83e --- /dev/null +++ b/src/fideslang/default_fixtures.py @@ -0,0 +1,267 @@ +""" +This is a slowly changing dataset built from a public endpoint +and stored here as a constant to reduce non-value added api calls. + +https://restcountries.com/v2/all?fields=name,alpha3Code + +To update: +1. Make a GET request to the above url +2. Copy/Paste the JSON response below +""" + + +COUNTRY_CODES = [ + {"name": "Afghanistan", "alpha3Code": "AFG"}, + {"name": "Åland Islands", "alpha3Code": "ALA"}, + {"name": "Albania", "alpha3Code": "ALB"}, + {"name": "Algeria", "alpha3Code": "DZA"}, + {"name": "American Samoa", "alpha3Code": "ASM"}, + {"name": "Andorra", "alpha3Code": "AND"}, + {"name": "Angola", "alpha3Code": "AGO"}, + {"name": "Anguilla", "alpha3Code": "AIA"}, + {"name": "Antarctica", "alpha3Code": "ATA"}, + {"name": "Antigua and Barbuda", "alpha3Code": "ATG"}, + {"name": "Argentina", "alpha3Code": "ARG"}, + {"name": "Armenia", "alpha3Code": "ARM"}, + {"name": "Aruba", "alpha3Code": "ABW"}, + {"name": "Australia", "alpha3Code": "AUS"}, + {"name": "Austria", "alpha3Code": "AUT"}, + {"name": "Azerbaijan", "alpha3Code": "AZE"}, + {"name": "Bahamas", "alpha3Code": "BHS"}, + {"name": "Bahrain", "alpha3Code": "BHR"}, + {"name": "Bangladesh", "alpha3Code": "BGD"}, + {"name": "Barbados", "alpha3Code": "BRB"}, + {"name": "Belarus", "alpha3Code": "BLR"}, + {"name": "Belgium", "alpha3Code": "BEL"}, + {"name": "Belize", "alpha3Code": "BLZ"}, + {"name": "Benin", "alpha3Code": "BEN"}, + {"name": "Bermuda", "alpha3Code": "BMU"}, + {"name": "Bhutan", "alpha3Code": "BTN"}, + {"name": "Bolivia (Plurinational State of)", "alpha3Code": "BOL"}, + {"name": "Bonaire, Sint Eustatius and Saba", "alpha3Code": "BES"}, + {"name": "Bosnia and Herzegovina", "alpha3Code": "BIH"}, + {"name": "Botswana", "alpha3Code": "BWA"}, + {"name": "Bouvet Island", "alpha3Code": "BVT"}, + {"name": "Brazil", "alpha3Code": "BRA"}, + {"name": "British Indian Ocean Territory", "alpha3Code": "IOT"}, + {"name": "United States Minor Outlying Islands", "alpha3Code": "UMI"}, + {"name": "Virgin Islands (British)", "alpha3Code": "VGB"}, + {"name": "Virgin Islands (U.S.)", "alpha3Code": "VIR"}, + {"name": "Brunei Darussalam", "alpha3Code": "BRN"}, + {"name": "Bulgaria", "alpha3Code": "BGR"}, + {"name": "Burkina Faso", "alpha3Code": "BFA"}, + {"name": "Burundi", "alpha3Code": "BDI"}, + {"name": "Cambodia", "alpha3Code": "KHM"}, + {"name": "Cameroon", "alpha3Code": "CMR"}, + {"name": "Canada", "alpha3Code": "CAN"}, + {"name": "Cabo Verde", "alpha3Code": "CPV"}, + {"name": "Cayman Islands", "alpha3Code": "CYM"}, + {"name": "Central African Republic", "alpha3Code": "CAF"}, + {"name": "Chad", "alpha3Code": "TCD"}, + {"name": "Chile", "alpha3Code": "CHL"}, + {"name": "China", "alpha3Code": "CHN"}, + {"name": "Christmas Island", "alpha3Code": "CXR"}, + {"name": "Cocos (Keeling) Islands", "alpha3Code": "CCK"}, + {"name": "Colombia", "alpha3Code": "COL"}, + {"name": "Comoros", "alpha3Code": "COM"}, + {"name": "Congo", "alpha3Code": "COG"}, + {"name": "Congo (Democratic Republic of the)", "alpha3Code": "COD"}, + {"name": "Cook Islands", "alpha3Code": "COK"}, + {"name": "Costa Rica", "alpha3Code": "CRI"}, + {"name": "Croatia", "alpha3Code": "HRV"}, + {"name": "Cuba", "alpha3Code": "CUB"}, + {"name": "Curaçao", "alpha3Code": "CUW"}, + {"name": "Cyprus", "alpha3Code": "CYP"}, + {"name": "Czech Republic", "alpha3Code": "CZE"}, + {"name": "Denmark", "alpha3Code": "DNK"}, + {"name": "Djibouti", "alpha3Code": "DJI"}, + {"name": "Dominica", "alpha3Code": "DMA"}, + {"name": "Dominican Republic", "alpha3Code": "DOM"}, + {"name": "Ecuador", "alpha3Code": "ECU"}, + {"name": "Egypt", "alpha3Code": "EGY"}, + {"name": "El Salvador", "alpha3Code": "SLV"}, + {"name": "Equatorial Guinea", "alpha3Code": "GNQ"}, + {"name": "Eritrea", "alpha3Code": "ERI"}, + {"name": "Estonia", "alpha3Code": "EST"}, + {"name": "Ethiopia", "alpha3Code": "ETH"}, + {"name": "Falkland Islands (Malvinas)", "alpha3Code": "FLK"}, + {"name": "Faroe Islands", "alpha3Code": "FRO"}, + {"name": "Fiji", "alpha3Code": "FJI"}, + {"name": "Finland", "alpha3Code": "FIN"}, + {"name": "France", "alpha3Code": "FRA"}, + {"name": "French Guiana", "alpha3Code": "GUF"}, + {"name": "French Polynesia", "alpha3Code": "PYF"}, + {"name": "French Southern Territories", "alpha3Code": "ATF"}, + {"name": "Gabon", "alpha3Code": "GAB"}, + {"name": "Gambia", "alpha3Code": "GMB"}, + {"name": "Georgia", "alpha3Code": "GEO"}, + {"name": "Germany", "alpha3Code": "DEU"}, + {"name": "Ghana", "alpha3Code": "GHA"}, + {"name": "Gibraltar", "alpha3Code": "GIB"}, + {"name": "Greece", "alpha3Code": "GRC"}, + {"name": "Greenland", "alpha3Code": "GRL"}, + {"name": "Grenada", "alpha3Code": "GRD"}, + {"name": "Guadeloupe", "alpha3Code": "GLP"}, + {"name": "Guam", "alpha3Code": "GUM"}, + {"name": "Guatemala", "alpha3Code": "GTM"}, + {"name": "Guernsey", "alpha3Code": "GGY"}, + {"name": "Guinea", "alpha3Code": "GIN"}, + {"name": "Guinea-Bissau", "alpha3Code": "GNB"}, + {"name": "Guyana", "alpha3Code": "GUY"}, + {"name": "Haiti", "alpha3Code": "HTI"}, + {"name": "Heard Island and McDonald Islands", "alpha3Code": "HMD"}, + {"name": "Vatican City", "alpha3Code": "VAT"}, + {"name": "Honduras", "alpha3Code": "HND"}, + {"name": "Hungary", "alpha3Code": "HUN"}, + {"name": "Hong Kong", "alpha3Code": "HKG"}, + {"name": "Iceland", "alpha3Code": "ISL"}, + {"name": "India", "alpha3Code": "IND"}, + {"name": "Indonesia", "alpha3Code": "IDN"}, + {"name": "Ivory Coast", "alpha3Code": "CIV"}, + {"name": "Iran (Islamic Republic of)", "alpha3Code": "IRN"}, + {"name": "Iraq", "alpha3Code": "IRQ"}, + {"name": "Ireland", "alpha3Code": "IRL"}, + {"name": "Isle of Man", "alpha3Code": "IMN"}, + {"name": "Israel", "alpha3Code": "ISR"}, + {"name": "Italy", "alpha3Code": "ITA"}, + {"name": "Jamaica", "alpha3Code": "JAM"}, + {"name": "Japan", "alpha3Code": "JPN"}, + {"name": "Jersey", "alpha3Code": "JEY"}, + {"name": "Jordan", "alpha3Code": "JOR"}, + {"name": "Kazakhstan", "alpha3Code": "KAZ"}, + {"name": "Kenya", "alpha3Code": "KEN"}, + {"name": "Kiribati", "alpha3Code": "KIR"}, + {"name": "Kuwait", "alpha3Code": "KWT"}, + {"name": "Kyrgyzstan", "alpha3Code": "KGZ"}, + {"name": "Lao People's Democratic Republic", "alpha3Code": "LAO"}, + {"name": "Latvia", "alpha3Code": "LVA"}, + {"name": "Lebanon", "alpha3Code": "LBN"}, + {"name": "Lesotho", "alpha3Code": "LSO"}, + {"name": "Liberia", "alpha3Code": "LBR"}, + {"name": "Libya", "alpha3Code": "LBY"}, + {"name": "Liechtenstein", "alpha3Code": "LIE"}, + {"name": "Lithuania", "alpha3Code": "LTU"}, + {"name": "Luxembourg", "alpha3Code": "LUX"}, + {"name": "Macao", "alpha3Code": "MAC"}, + {"name": "North Macedonia", "alpha3Code": "MKD"}, + {"name": "Madagascar", "alpha3Code": "MDG"}, + {"name": "Malawi", "alpha3Code": "MWI"}, + {"name": "Malaysia", "alpha3Code": "MYS"}, + {"name": "Maldives", "alpha3Code": "MDV"}, + {"name": "Mali", "alpha3Code": "MLI"}, + {"name": "Malta", "alpha3Code": "MLT"}, + {"name": "Marshall Islands", "alpha3Code": "MHL"}, + {"name": "Martinique", "alpha3Code": "MTQ"}, + {"name": "Mauritania", "alpha3Code": "MRT"}, + {"name": "Mauritius", "alpha3Code": "MUS"}, + {"name": "Mayotte", "alpha3Code": "MYT"}, + {"name": "Mexico", "alpha3Code": "MEX"}, + {"name": "Micronesia (Federated States of)", "alpha3Code": "FSM"}, + {"name": "Moldova (Republic of)", "alpha3Code": "MDA"}, + {"name": "Monaco", "alpha3Code": "MCO"}, + {"name": "Mongolia", "alpha3Code": "MNG"}, + {"name": "Montenegro", "alpha3Code": "MNE"}, + {"name": "Montserrat", "alpha3Code": "MSR"}, + {"name": "Morocco", "alpha3Code": "MAR"}, + {"name": "Mozambique", "alpha3Code": "MOZ"}, + {"name": "Myanmar", "alpha3Code": "MMR"}, + {"name": "Namibia", "alpha3Code": "NAM"}, + {"name": "Nauru", "alpha3Code": "NRU"}, + {"name": "Nepal", "alpha3Code": "NPL"}, + {"name": "Netherlands", "alpha3Code": "NLD"}, + {"name": "New Caledonia", "alpha3Code": "NCL"}, + {"name": "New Zealand", "alpha3Code": "NZL"}, + {"name": "Nicaragua", "alpha3Code": "NIC"}, + {"name": "Niger", "alpha3Code": "NER"}, + {"name": "Nigeria", "alpha3Code": "NGA"}, + {"name": "Niue", "alpha3Code": "NIU"}, + {"name": "Norfolk Island", "alpha3Code": "NFK"}, + {"name": "Korea (Democratic People's Republic of)", "alpha3Code": "PRK"}, + {"name": "Northern Mariana Islands", "alpha3Code": "MNP"}, + {"name": "Norway", "alpha3Code": "NOR"}, + {"name": "Oman", "alpha3Code": "OMN"}, + {"name": "Pakistan", "alpha3Code": "PAK"}, + {"name": "Palau", "alpha3Code": "PLW"}, + {"name": "Palestine, State of", "alpha3Code": "PSE"}, + {"name": "Panama", "alpha3Code": "PAN"}, + {"name": "Papua New Guinea", "alpha3Code": "PNG"}, + {"name": "Paraguay", "alpha3Code": "PRY"}, + {"name": "Peru", "alpha3Code": "PER"}, + {"name": "Philippines", "alpha3Code": "PHL"}, + {"name": "Pitcairn", "alpha3Code": "PCN"}, + {"name": "Poland", "alpha3Code": "POL"}, + {"name": "Portugal", "alpha3Code": "PRT"}, + {"name": "Puerto Rico", "alpha3Code": "PRI"}, + {"name": "Qatar", "alpha3Code": "QAT"}, + {"name": "Republic of Kosovo", "alpha3Code": "UNK"}, + {"name": "Réunion", "alpha3Code": "REU"}, + {"name": "Romania", "alpha3Code": "ROU"}, + {"name": "Russian Federation", "alpha3Code": "RUS"}, + {"name": "Rwanda", "alpha3Code": "RWA"}, + {"name": "Saint Barthélemy", "alpha3Code": "BLM"}, + {"name": "Saint Helena, Ascension and Tristan da Cunha", "alpha3Code": "SHN"}, + {"name": "Saint Kitts and Nevis", "alpha3Code": "KNA"}, + {"name": "Saint Lucia", "alpha3Code": "LCA"}, + {"name": "Saint Martin (French part)", "alpha3Code": "MAF"}, + {"name": "Saint Pierre and Miquelon", "alpha3Code": "SPM"}, + {"name": "Saint Vincent and the Grenadines", "alpha3Code": "VCT"}, + {"name": "Samoa", "alpha3Code": "WSM"}, + {"name": "San Marino", "alpha3Code": "SMR"}, + {"name": "Sao Tome and Principe", "alpha3Code": "STP"}, + {"name": "Saudi Arabia", "alpha3Code": "SAU"}, + {"name": "Senegal", "alpha3Code": "SEN"}, + {"name": "Serbia", "alpha3Code": "SRB"}, + {"name": "Seychelles", "alpha3Code": "SYC"}, + {"name": "Sierra Leone", "alpha3Code": "SLE"}, + {"name": "Singapore", "alpha3Code": "SGP"}, + {"name": "Sint Maarten (Dutch part)", "alpha3Code": "SXM"}, + {"name": "Slovakia", "alpha3Code": "SVK"}, + {"name": "Slovenia", "alpha3Code": "SVN"}, + {"name": "Solomon Islands", "alpha3Code": "SLB"}, + {"name": "Somalia", "alpha3Code": "SOM"}, + {"name": "South Africa", "alpha3Code": "ZAF"}, + {"name": "South Georgia and the South Sandwich Islands", "alpha3Code": "SGS"}, + {"name": "Korea (Republic of)", "alpha3Code": "KOR"}, + {"name": "Spain", "alpha3Code": "ESP"}, + {"name": "Sri Lanka", "alpha3Code": "LKA"}, + {"name": "Sudan", "alpha3Code": "SDN"}, + {"name": "South Sudan", "alpha3Code": "SSD"}, + {"name": "Suriname", "alpha3Code": "SUR"}, + {"name": "Svalbard and Jan Mayen", "alpha3Code": "SJM"}, + {"name": "Swaziland", "alpha3Code": "SWZ"}, + {"name": "Sweden", "alpha3Code": "SWE"}, + {"name": "Switzerland", "alpha3Code": "CHE"}, + {"name": "Syrian Arab Republic", "alpha3Code": "SYR"}, + {"name": "Taiwan", "alpha3Code": "TWN"}, + {"name": "Tajikistan", "alpha3Code": "TJK"}, + {"name": "Tanzania, United Republic of", "alpha3Code": "TZA"}, + {"name": "Thailand", "alpha3Code": "THA"}, + {"name": "Timor-Leste", "alpha3Code": "TLS"}, + {"name": "Togo", "alpha3Code": "TGO"}, + {"name": "Tokelau", "alpha3Code": "TKL"}, + {"name": "Tonga", "alpha3Code": "TON"}, + {"name": "Trinidad and Tobago", "alpha3Code": "TTO"}, + {"name": "Tunisia", "alpha3Code": "TUN"}, + {"name": "Turkey", "alpha3Code": "TUR"}, + {"name": "Turkmenistan", "alpha3Code": "TKM"}, + {"name": "Turks and Caicos Islands", "alpha3Code": "TCA"}, + {"name": "Tuvalu", "alpha3Code": "TUV"}, + {"name": "Uganda", "alpha3Code": "UGA"}, + {"name": "Ukraine", "alpha3Code": "UKR"}, + {"name": "United Arab Emirates", "alpha3Code": "ARE"}, + { + "name": "United Kingdom of Great Britain and Northern Ireland", + "alpha3Code": "GBR", + }, + {"name": "United States of America", "alpha3Code": "USA"}, + {"name": "Uruguay", "alpha3Code": "URY"}, + {"name": "Uzbekistan", "alpha3Code": "UZB"}, + {"name": "Vanuatu", "alpha3Code": "VUT"}, + {"name": "Venezuela (Bolivarian Republic of)", "alpha3Code": "VEN"}, + {"name": "Vietnam", "alpha3Code": "VNM"}, + {"name": "Wallis and Futuna", "alpha3Code": "WLF"}, + {"name": "Western Sahara", "alpha3Code": "ESH"}, + {"name": "Yemen", "alpha3Code": "YEM"}, + {"name": "Zambia", "alpha3Code": "ZMB"}, + {"name": "Zimbabwe", "alpha3Code": "ZWE"}, +] diff --git a/src/fideslang/default_taxonomy.py b/src/fideslang/default_taxonomy.py new file mode 100644 index 00000000..efa80fed --- /dev/null +++ b/src/fideslang/default_taxonomy.py @@ -0,0 +1,847 @@ +"""This module contains the the default resources that Fideslang ships with.""" + +from fideslang import ( + Taxonomy, + DataCategory, + DataQualifier, + DataUse, + DataSubject, + Organization, +) + +DEFAULT_TAXONOMY = Taxonomy( + data_category=[ + DataCategory( + fides_key="account", + organization_fides_key="default_organization", + name="Account Data", + description="Data related to a system account.", + parent_key=None, + ), + DataCategory( + fides_key="account.contact", + organization_fides_key="default_organization", + name="Account Contact Data", + description="Contact data related to a system account.", + parent_key="account", + ), + DataCategory( + fides_key="account.contact.city", + organization_fides_key="default_organization", + name="Account City", + description="Account's city level address data.", + parent_key="account.contact", + ), + DataCategory( + fides_key="account.contact.country", + organization_fides_key="default_organization", + name="Account Country", + description="Account's country level address data.", + parent_key="account.contact", + ), + DataCategory( + fides_key="account.contact.email", + organization_fides_key="default_organization", + name="Account Email", + description="Account's email address.", + parent_key="account.contact", + ), + DataCategory( + fides_key="account.contact.phone_number", + organization_fides_key="default_organization", + name="Account Phone Number", + description="Account's phone number.", + parent_key="account.contact", + ), + DataCategory( + fides_key="account.contact.postal_code", + organization_fides_key="default_organization", + name="Account Postal Code", + description="Account's postal code.", + parent_key="account.contact", + ), + DataCategory( + fides_key="account.contact.state", + organization_fides_key="default_organization", + name="Account State", + description="Account's state level address data.", + parent_key="account.contact", + ), + DataCategory( + fides_key="account.contact.street", + organization_fides_key="default_organization", + name="Account Street", + description="Account's street level address.", + parent_key="account.contact", + ), + DataCategory( + fides_key="account.payment", + organization_fides_key="default_organization", + name="Payment Data", + description="Payment data related to system account.", + parent_key="account", + ), + DataCategory( + fides_key="account.payment.financial_account_number", + organization_fides_key="default_organization", + name="Account Payment Financial Account Number", + description="Financial account number for an account's payment card, bank account, or other financial system.", + parent_key="account.payment", + ), + DataCategory( + fides_key="system", + organization_fides_key="default_organization", + name="System Data", + description="Data unique to, and under control of the system.", + parent_key=None, + ), + DataCategory( + fides_key="system.authentication", + organization_fides_key="default_organization", + name="Authentication Data", + description="Data used to manage access to the system.", + parent_key="system", + ), + DataCategory( + fides_key="system.operations", + organization_fides_key="default_organization", + name="Operations Data", + description="Data used for system operations.", + parent_key="system", + ), + DataCategory( + fides_key="user", + organization_fides_key="default_organization", + name="User Data", + description="Data related to the user of the system, either provided directly or derived based on their usage.", + parent_key=None, + ), + DataCategory( + fides_key="user.derived", + organization_fides_key="default_organization", + name="Derived Data", + description="Data derived from user provided data or as a result of user actions in the system.", + parent_key="user", + ), + DataCategory( + fides_key="user.derived.identifiable", + organization_fides_key="default_organization", + name="Derived User Identifiable Data", + description="Derived data that is linked to, or identifies a user.", + parent_key="user.derived", + ), + DataCategory( + fides_key="user.derived.identifiable.biometric_health", + organization_fides_key="default_organization", + name="Biometric Health Data", + description="Encoded characteristic collected about a user.", + parent_key="user.derived.identifiable", + ), + DataCategory( + fides_key="user.derived.identifiable.browsing_history", + organization_fides_key="default_organization", + name="Browsing History", + description="Content browsing history of a user.", + parent_key="user.derived.identifiable", + ), + DataCategory( + fides_key="user.derived.identifiable.demographic", + organization_fides_key="default_organization", + name="Demographic Data", + description="Demographic data about a user.", + parent_key="user.derived.identifiable", + ), + DataCategory( + fides_key="user.derived.identifiable.contact", + organization_fides_key="default_organization", + name="Derived Contact Data", + description="Contact data collected about a user.", + parent_key="user.derived.identifiable", + ), + DataCategory( + fides_key="user.derived.identifiable.device", + organization_fides_key="default_organization", + name="Device Data", + description="Data related to a user's device, configuration and setting.", + parent_key="user.derived.identifiable", + ), + DataCategory( + fides_key="user.derived.identifiable.device.cookie_id", + organization_fides_key="default_organization", + name="Cookie ID", + description="Cookie unique identification number.", + parent_key="user.derived.identifiable.device", + ), + DataCategory( + fides_key="user.derived.identifiable.device.device_id", + organization_fides_key="default_organization", + name="Device ID", + description="Device unique identification number.", + parent_key="user.derived.identifiable.device", + ), + DataCategory( + fides_key="user.derived.identifiable.device.ip_address", + organization_fides_key="default_organization", + name="IP Address", + description="Unique identifier related to device connection.", + parent_key="user.derived.identifiable.device", + ), + DataCategory( + fides_key="user.derived.identifiable.gender", + organization_fides_key="default_organization", + name="Derived Gender", + description="Gender of an individual.", + parent_key="user.derived.identifiable", + ), + DataCategory( + fides_key="user.derived.identifiable.location", + organization_fides_key="default_organization", + name="Location Data", + description="Records of the location of a user.", + parent_key="user.derived.identifiable", + ), + DataCategory( + fides_key="user.derived.identifiable.media_consumption", + organization_fides_key="default_organization", + name="Media Consumption Data", + description="Media type consumption data of a user.", + parent_key="user.derived.identifiable", + ), + DataCategory( + fides_key="user.derived.identifiable.non_specific_age", + organization_fides_key="default_organization", + name="Derived Non-Specific Age", + description="Age range data.", + parent_key="user.derived.identifiable", + ), + DataCategory( + fides_key="user.derived.identifiable.observed", + organization_fides_key="default_organization", + name="Observed Data", + description="Data collected through observation of use of the system.", + parent_key="user.derived.identifiable", + ), + DataCategory( + fides_key="user.derived.identifiable.profiling", + organization_fides_key="default_organization", + name="Profiling Data", + description="Preference and interest data about a user.", + parent_key="user.derived.identifiable", + ), + DataCategory( + fides_key="user.derived.identifiable.race", + organization_fides_key="default_organization", + name="Derived Race", + description="Racial or ethnic origin data.", + parent_key="user.derived.identifiable", + ), + DataCategory( + fides_key="user.derived.identifiable.religious_belief", + organization_fides_key="default_organization", + name="Derived Religious Belief", + description="Religion or religious belief.", + parent_key="user.derived.identifiable", + ), + DataCategory( + fides_key="user.derived.identifiable.search_history", + organization_fides_key="default_organization", + name="Search History", + description="Records of search history and queries of a user.", + parent_key="user.derived.identifiable", + ), + DataCategory( + fides_key="user.derived.identifiable.sexual_orientation", + organization_fides_key="default_organization", + name="Derived Sexual Orientation", + description="Personal sex life or sexual data.", + parent_key="user.derived.identifiable", + ), + DataCategory( + fides_key="user.derived.identifiable.social", + organization_fides_key="default_organization", + name="Social Data", + description="Social activity and interaction data.", + parent_key="user.derived.identifiable", + ), + DataCategory( + fides_key="user.derived.identifiable.telemetry", + organization_fides_key="default_organization", + name="Telemetry Data", + description="User identifiable measurement data from system sensors and monitoring.", + parent_key="user.derived.identifiable", + ), + DataCategory( + fides_key="user.derived.identifiable.unique_id", + organization_fides_key="default_organization", + name="Unique ID", + description="Unique identifier for a user assigned through system use.", + parent_key="user.derived.identifiable", + ), + DataCategory( + fides_key="user.derived.identifiable.user_sensor", + organization_fides_key="default_organization", + name="User Sensor Data", + description="Measurement data derived about a user's environment through system use.", + parent_key="user.derived.identifiable", + ), + DataCategory( + fides_key="user.derived.identifiable.organization", + organization_fides_key="default_organization", + name="Organization Identifiable Data", + description="Derived data that is linked to, or identifies an organization.", + parent_key="user.derived.identifiable", + ), + DataCategory( + fides_key="user.derived.identifiable.workplace", + organization_fides_key="default_organization", + name="Derived Workplace", + description="Organization of employment.", + parent_key="user.derived.identifiable", + ), + DataCategory( + fides_key="user.derived.nonidentifiable", + organization_fides_key="default_organization", + name="Derived User Non-Identifiable Data", + description="Non-user identifiable data derived related to a user as a result of user actions in the system.", + parent_key="user.derived", + ), + DataCategory( + fides_key="user.derived.nonidentifiable.sensor", + organization_fides_key="default_organization", + name="Sensor Data", + description="Non-user identifiable measurement data derived from sensors and monitoring systems.", + parent_key="user.derived.nonidentifiable", + ), + DataCategory( + fides_key="user.provided", + organization_fides_key="default_organization", + name="User Provided Data", + description="Data provided or created directly by a user of the system.", + parent_key="user", + ), + DataCategory( + fides_key="user.provided.identifiable", + organization_fides_key="default_organization", + name="User Provided Identifiable Data", + description="Data provided or created directly by a user that is linked to or identifies a user.", + parent_key="user.provided", + ), + DataCategory( + fides_key="user.provided.identifiable.biometric", + organization_fides_key="default_organization", + name="Biometric Data", + description="Encoded characteristics provided by a user.", + parent_key="user.provided.identifiable", + ), + DataCategory( + fides_key="user.provided.identifiable.childrens", + organization_fides_key="default_organization", + name="Children's Data", + description="Data relating to children.", + parent_key="user.provided.identifiable", + ), + DataCategory( + fides_key="user.provided.identifiable.contact", + organization_fides_key="default_organization", + name="Provided Contact Data", + description="User provided contact data for purposes other than account management.", + parent_key="user.provided.identifiable", + ), + DataCategory( + fides_key="user.provided.identifiable.contact.city", + organization_fides_key="default_organization", + name="User Provided City", + description="User's city level address data.", + parent_key="user.provided.identifiable.contact", + ), + DataCategory( + fides_key="user.provided.identifiable.contact.country", + organization_fides_key="default_organization", + name="User Provided Country", + description="User's country level address data.", + parent_key="user.provided.identifiable.contact", + ), + DataCategory( + fides_key="user.provided.identifiable.contact.email", + organization_fides_key="default_organization", + name="User Provided Email", + description="User's provided email address.", + parent_key="user.provided.identifiable.contact", + ), + DataCategory( + fides_key="user.provided.identifiable.contact.phone_number", + organization_fides_key="default_organization", + name="User Provided Phone Number", + description="User's phone number.", + parent_key="user.provided.identifiable.contact", + ), + DataCategory( + fides_key="user.provided.identifiable.contact.postal_code", + organization_fides_key="default_organization", + name="User Provided Postal Code", + description="User's postal code.", + parent_key="user.provided.identifiable.contact", + ), + DataCategory( + fides_key="user.provided.identifiable.contact.state", + organization_fides_key="default_organization", + name="User Provided State", + description="User's state level address data.", + parent_key="user.provided.identifiable.contact", + ), + DataCategory( + fides_key="user.provided.identifiable.contact.street", + organization_fides_key="default_organization", + name="User Provided Street", + description="User's street level address data.", + parent_key="user.provided.identifiable.contact", + ), + DataCategory( + fides_key="user.provided.identifiable.credentials", + organization_fides_key="default_organization", + name="Credentials", + description="User provided authentication data.", + parent_key="user.provided.identifiable", + ), + DataCategory( + fides_key="user.provided.identifiable.credentials.biometric_credentials", + organization_fides_key="default_organization", + name="Biometric Credentials", + description="Credentials for system authentication.", + parent_key="user.provided.identifiable.credentials", + ), + DataCategory( + fides_key="user.provided.identifiable.credentials.password", + organization_fides_key="default_organization", + name="Password", + description="Password for system authentication.", + parent_key="user.provided.identifiable.credentials", + ), + DataCategory( + fides_key="user.provided.identifiable.date_of_birth", + organization_fides_key="default_organization", + name="Date of Birth", + description="User's date of birth.", + parent_key="user.provided.identifiable", + ), + DataCategory( + fides_key="user.provided.identifiable.financial", + organization_fides_key="default_organization", + name="Financial Data", + description="Payment data and financial history.", + parent_key="user.provided.identifiable", + ), + DataCategory( + fides_key="user.provided.identifiable.financial.account_number", + organization_fides_key="default_organization", + name="User Provided Financial Account Number", + description="User's account number for a payment card, bank account, or other financial system.", + parent_key="user.provided.identifiable.financial", + ), + DataCategory( + fides_key="user.provided.identifiable.gender", + organization_fides_key="default_organization", + name="User Provided Gender", + description="Gender of an individual.", + parent_key="user.provided.identifiable", + ), + DataCategory( + fides_key="user.provided.identifiable.genetic", + organization_fides_key="default_organization", + name="Genetic Data", + description="Data about the genetic makeup provided by a user.", + parent_key="user.provided.identifiable", + ), + DataCategory( + fides_key="user.provided.identifiable.government_id", + organization_fides_key="default_organization", + name="Government ID", + description="State provided identification data.", + parent_key="user.provided.identifiable", + ), + DataCategory( + fides_key="user.provided.identifiable.government_id.drivers_license_number", + organization_fides_key="default_organization", + name="Driver's License Number", + description="State issued driving identification number.", + parent_key="user.provided.identifiable.government_id", + ), + DataCategory( + fides_key="user.provided.identifiable.government_id.national_identification_number", + organization_fides_key="default_organization", + name="National Identification Number", + description="State issued personal identification number.", + parent_key="user.provided.identifiable.government_id", + ), + DataCategory( + fides_key="user.provided.identifiable.government_id.passport_number", + organization_fides_key="default_organization", + name="Passport Number", + description="State issued passport data.", + parent_key="user.provided.identifiable.government_id", + ), + DataCategory( + fides_key="user.provided.identifiable.health_and_medical", + organization_fides_key="default_organization", + name="Health and Medical Data", + description="Health records or individual's personal medical information.", + parent_key="user.provided.identifiable", + ), + DataCategory( + fides_key="user.provided.identifiable.job_title", + organization_fides_key="default_organization", + name="Job Title", + description="Professional data.", + parent_key="user.provided.identifiable", + ), + DataCategory( + fides_key="user.provided.identifiable.name", + organization_fides_key="default_organization", + name="Name", + description="User's real name.", + parent_key="user.provided.identifiable", + ), + DataCategory( + fides_key="user.provided.identifiable.non_specific_age", + organization_fides_key="default_organization", + name="User Provided Non-Specific Age", + description="Age range data.", + parent_key="user.provided.identifiable", + ), + DataCategory( + fides_key="user.provided.identifiable.political_opinion", + organization_fides_key="default_organization", + name="Political Opinion", + description="Data related to the individual's political opinions.", + parent_key="user.provided.identifiable", + ), + DataCategory( + fides_key="user.provided.identifiable.race", + organization_fides_key="default_organization", + name="User Provided Race", + description="Racial or ethnic origin data.", + parent_key="user.provided.identifiable", + ), + DataCategory( + fides_key="user.provided.identifiable.religious_belief", + organization_fides_key="default_organization", + name="User Provided Religious Belief", + description="Religion or religious belief.", + parent_key="user.provided.identifiable", + ), + DataCategory( + fides_key="user.provided.identifiable.sexual_orientation", + organization_fides_key="default_organization", + name="User Provided Sexual Orientation", + description="Personal sex life or sexual data.", + parent_key="user.provided.identifiable", + ), + DataCategory( + fides_key="user.provided.identifiable.workplace", + organization_fides_key="default_organization", + name="User Provided Workplace", + description="Organization of employment.", + parent_key="user.provided.identifiable", + ), + DataCategory( + fides_key="user.provided.nonidentifiable", + organization_fides_key="default_organization", + name="User Provided Non-Identifiable Data", + description="Data provided or created directly by a user that is not identifiable.", + parent_key="user.provided", + ), + ], + data_subject=[ + DataSubject( + fides_key="anonymous_user", + organization_fides_key="default_organization", + name="Anonymous User", + description="An individual that is unidentifiable to the systems. Note - This should only be applied to truly anonymous users where there is no risk of re-identification", + ), + DataSubject( + fides_key="citizen_voter", + organization_fides_key="default_organization", + name="Citizen Voter", + description="An individual registered to voter with a state or authority.", + ), + DataSubject( + fides_key="commuter", + organization_fides_key="default_organization", + name="Commuter", + description="An individual that is traveling or transiting in the context of location tracking.", + ), + DataSubject( + fides_key="consultant", + organization_fides_key="default_organization", + name="Consultant", + description="An individual employed in a consultative/temporary capacity by the organization.", + ), + DataSubject( + fides_key="customer", + organization_fides_key="default_organization", + name="Customer", + description="An individual or other organization that purchases goods or services from the organization.", + ), + DataSubject( + fides_key="employee", + organization_fides_key="default_organization", + name="Employee", + description="An individual employed by the organization.", + ), + DataSubject( + fides_key="job_applicant", + organization_fides_key="default_organization", + name="Job Applicant", + description="An individual applying for employment to the organization.", + ), + DataSubject( + fides_key="next_of_kin", + organization_fides_key="default_organization", + name="Next of Kin", + description="A relative of any other individual subject where such a relationship is known.", + ), + DataSubject( + fides_key="passenger", + organization_fides_key="default_organization", + name="Passenger", + description="An individual traveling on some means of provided transport.", + ), + DataSubject( + fides_key="patient", + organization_fides_key="default_organization", + name="Patient", + description="An individual identified for the purposes of any medical care.", + ), + DataSubject( + fides_key="prospect", + organization_fides_key="default_organization", + name="Prospect", + description="An individual or organization to whom an organization is selling goods or services.", + ), + DataSubject( + fides_key="shareholder", + organization_fides_key="default_organization", + name="Shareholder", + description="An individual or organization that holds equity in the organization.", + ), + DataSubject( + fides_key="supplier_vendor", + organization_fides_key="default_organization", + name="Supplier/Vendor", + description="An individual or organization that provides services or goods to the organization.", + ), + DataSubject( + fides_key="trainee", + organization_fides_key="default_organization", + name="Trainee", + description="An individual undergoing training by the organization.", + ), + DataSubject( + fides_key="visitor", + organization_fides_key="default_organization", + name="Visitor", + description="An individual visiting a location.", + ), + ], + data_use=[ + DataUse( + fides_key="provide", + organization_fides_key="default_organization", + name="Provide the capability", + description="Provide, give, or make available the product, service, application or system.", + parent_key=None, + ), + DataUse( + fides_key="provide.system", + organization_fides_key="default_organization", + name="System", + description="The source system, product, service or application being provided to the user.", + parent_key="provide", + ), + DataUse( + fides_key="provide.system.operations", + organization_fides_key="default_organization", + name="System Operations", + description="Use of specified data categories to operate and protect the system in order to provide the service.", + parent_key="provide.system", + ), + DataUse( + fides_key="provide.system.operations.support", + organization_fides_key="default_organization", + name="Operations Support", + description="Use of specified data categories to provide support for operation and protection of the system in order to provide the service.", + parent_key="provide.system.operations", + ), + DataUse( + fides_key="provide.system.operations.support.optimization", + organization_fides_key="default_organization", + name="Support Optimization", + description="Use of specified data categories to optimize and improve support operations in order to provide the service.", + parent_key="provide.system.operations.support", + ), + DataUse( + fides_key="provide.system.upgrades", + organization_fides_key="default_organization", + name="Offer Upgrades", + description="Offer upgrades or upsales such as increased capacity for the service based on monitoring of service usage.", + parent_key="provide.system", + ), + DataUse( + fides_key="improve", + organization_fides_key="default_organization", + name="Improve the capability", + description="Improve the product, service, application or system.", + parent_key=None, + ), + DataUse( + fides_key="improve.system", + organization_fides_key="default_organization", + name="System", + description="The source system, product, service or application being improved.", + parent_key="improve", + ), + DataUse( + fides_key="personalize", + organization_fides_key="default_organization", + name="Personalize the capability", + description="Personalize the product, service, application or system.", + parent_key=None, + ), + DataUse( + fides_key="personalize.system", + organization_fides_key="default_organization", + name="System", + description="The source system, product, service or application being personalized.", + parent_key="personalize", + ), + DataUse( + fides_key="advertising", + organization_fides_key="default_organization", + name="Advertising, Marketing or Promotion", + description="The promotion of products or services targeted to users based on the the processing of user provided data in the system.", + parent_key=None, + ), + DataUse( + fides_key="advertising.first_party", + organization_fides_key="default_organization", + name="First Party Advertising", + description="The promotion of products or services targeting users based on processing of derviced data from prior use of the system.", + parent_key="advertising", + ), + DataUse( + fides_key="advertising.third_party", + organization_fides_key="default_organization", + name="Third Party Advertising", + description="The promotion of products or services targeting users based on processing of specific categories of data acquired from third party sources.", + parent_key="advertising", + ), + DataUse( + fides_key="advertising.first_party.contextual", + organization_fides_key="default_organization", + name="First Party Contextual Advertising", + description="The promotion of products or services targeted to users based on the processing of derived data from the users prior use of the services.", + parent_key="advertising.first_party", + ), + DataUse( + fides_key="advertising.first_party.personalized", + organization_fides_key="default_organization", + name="First Party Personalized Advertising", + description="The targeting and changing of promotional content based on processing of specific data categories from the user.", + parent_key="advertising.first_party", + ), + DataUse( + fides_key="advertising.third_party.personalized", + organization_fides_key="default_organization", + name="Third Party Personalized Advertising", + description="The targeting and changing of promotional content based on processing of specific categories of user data acquired from third party sources.", + parent_key="advertising.third_party", + ), + DataUse( + fides_key="third_party_sharing", + organization_fides_key="default_organization", + name="Third Party Sharing", + description="The transfer of specified data categories to third parties outside of the system/application's scope.", + parent_key=None, + ), + DataUse( + fides_key="third_party_sharing.payment_processing", + organization_fides_key="default_organization", + name="Sharing for Processing Payments", + description="Sharing of specified data categories with a third party for payment processing.", + parent_key="third_party_sharing", + ), + DataUse( + fides_key="third_party_sharing.personalized_advertising", + organization_fides_key="default_organization", + name="Sharing for Personalized Advertising", + description="Sharing of specified data categories for the purpose of marketing/advertising/promotion.", + parent_key="third_party_sharing", + ), + DataUse( + fides_key="third_party_sharing.fraud_detection", + organization_fides_key="default_organization", + name="Sharing for Fraud Detection", + description="Sharing of specified data categories with a third party fo fraud prevention/detection.", + parent_key="third_party_sharing", + ), + DataUse( + fides_key="third_party_sharing.legal_obligation", + organization_fides_key="default_organization", + name="Sharing for Legal Obligation", + description="Sharing of data for legal obligations, including contracts, applicable laws or regulations.", + parent_key="third_party_sharing", + ), + DataUse( + fides_key="collect", + organization_fides_key="default_organization", + name="Collect", + description="Collecting and storing data in order to use it for another purpose such as data training for ML.", + parent_key=None, + ), + DataUse( + fides_key="train_ai_system", + organization_fides_key="default_organization", + name="Train AI System", + description="Training an AI system. Please note when this data use is specified, the method and degree to which a user may be directly identified in the resulting AI system should be appended.", + parent_key=None, + ), + ], + data_qualifier=[ + DataQualifier( + fides_key="aggregated", + organization_fides_key="default_organization", + name="Aggregated Data", + description="Statistical data that does not contain individually identifying information but includes information about groups of individuals that renders individual identification impossible.", + parent_key=None, + ), + DataQualifier( + fides_key="aggregated.anonymized", + organization_fides_key="default_organization", + name="Anonymized Data", + description="Data where all attributes have been sufficiently altered that the individaul cannot be reidentified by this data or in combination with other datasets.", + parent_key="aggregated", + ), + DataQualifier( + fides_key="aggregated.anonymized.unlinked_pseudonymized", + organization_fides_key="default_organization", + name="Unlinked Pseudonymized Data", + description="Data for which all identifiers have been substituted with unrelated values and linkages broken such that it may not be reversed, even by the party that performed the pseudonymization.", + parent_key="aggregated.anonymized", + ), + DataQualifier( + fides_key="aggregated.anonymized.unlinked_pseudonymized.pseudonymized", + organization_fides_key="default_organization", + name="Pseudonymized Data", + description="Data for which all identifiers have been substituted with unrelated values, rendering the individual unidentifiable and cannot be reasonably reversed other than by the party that performed the pseudonymization.", + parent_key="aggregated.anonymized.unlinked_pseudonymized", + ), + DataQualifier( + fides_key="aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified", + organization_fides_key="default_organization", + name="Identified Data", + description="Data that directly identifies an individual.", + parent_key="aggregated.anonymized.unlinked_pseudonymized.pseudonymized", + ), + ], + organization=[Organization(fides_key="default_organization")], +) diff --git a/src/fideslang/manifests.py b/src/fideslang/manifests.py new file mode 100644 index 00000000..f04ea9d8 --- /dev/null +++ b/src/fideslang/manifests.py @@ -0,0 +1,84 @@ +"""This module handles anything related to working with raw manifest files.""" +import glob +from functools import reduce +from typing import Dict, List, Set, Union + +import yaml + + +def write_manifest( + file_name: str, manifest: Union[List, Dict], resource_type: str +) -> None: + """ + Write a dict representation of a resource out to a file. + """ + if isinstance(manifest, dict): + manifest = {resource_type: [manifest]} + else: + manifest = {resource_type: manifest} + + with open(file_name, "w") as manifest_file: + yaml.dump(manifest, manifest_file, sort_keys=False, indent=2) + + +def load_yaml_into_dict(file_path: str) -> Dict: + """ + This loads yaml files into a dictionary to be used in API calls. + """ + with open(file_path, "r") as yaml_file: + loaded = yaml.safe_load(yaml_file) + if isinstance(loaded, dict): + return loaded + + print(f"Failed to parse invalid manifest: {file_path.split('/')[-1]}. Skipping.") + return {} + + +def filter_manifest_by_type( + manifests: Dict[str, List], filter_types: List[str] +) -> Dict[str, List]: + "Filter the resources so that only the specified resource types are returned." + return {key: value for key, value in manifests.items() if key in filter_types} + + +def union_manifests(manifests: List[Dict]) -> Dict[str, List[Dict]]: + """ + Combine all of the manifests into a single dictionary, + appending resource values with the same keys. + """ + + key_lists: List[List[str]] = [list(manifest.keys()) for manifest in manifests] + key_set: Set[str] = set(reduce(lambda x, y: [*x, *y], key_lists)) + + unioned_dict: Dict[str, List] = {} + for manifest in manifests: + for key in key_set: + if key in manifest.keys() and key in unioned_dict.keys(): + unioned_dict[key] += manifest[key] + elif key in manifest.keys(): + unioned_dict[key] = manifest[key] + return unioned_dict + + +def ingest_manifests(manifests_dir: str) -> Dict[str, List[Dict]]: + """ + Ingest either a single file or all of the manifests available in a + directory and concatenate them into a single object. + + Directories will be searched recursively. + """ + yml_endings = ["yml", "yaml"] + if manifests_dir.split(".")[-1] in yml_endings: + manifests = load_yaml_into_dict(manifests_dir) + + else: + manifest_list = [] + for yml_ending in yml_endings: + manifest_list += glob.glob( + f"{manifests_dir}/**/*.{yml_ending}", recursive=True + ) + + manifests = union_manifests( + [load_yaml_into_dict(file) for file in manifest_list] + ) + return manifests diff --git a/src/fideslang/models.py b/src/fideslang/models.py new file mode 100644 index 00000000..8ef34a56 --- /dev/null +++ b/src/fideslang/models.py @@ -0,0 +1,714 @@ +""" +Contains all of the Fides resources modeled as Pydantic models. +""" +from __future__ import annotations + +from enum import Enum +from typing import Dict, List, Optional + +from pydantic import root_validator, validator, BaseModel, Field, AnyUrl, HttpUrl + +from fideslang.validation import ( + FidesKey, + sort_list_objects_by_name, + no_self_reference, + matching_parent_key, + check_valid_country_code, +) + +# Reusable components +country_code_validator = validator("third_country_transfers", allow_reuse=True)( + check_valid_country_code +) + +matching_parent_key_validator = validator("parent_key", allow_reuse=True, always=True)( + matching_parent_key +) +no_self_reference_validator = validator("parent_key", allow_reuse=True)( + no_self_reference +) + +name_field = Field(description="Human-Readable name for this resource.") +description_field = Field( + description="A detailed description of what this resource is." +) + + +# Fides Base Model +class FidesModel(BaseModel): + """The base model for all Fides Resources.""" + + fides_key: FidesKey = Field( + description="A unique key used to identify this resource." + ) + organization_fides_key: FidesKey = Field( + default="default_organization", + description="Defines the Organization that this resource belongs to.", + ) + name: Optional[str] = name_field + description: Optional[str] = description_field + + class Config: + "Config for the FidesModel" + extra = "ignore" + orm_mode = True + + +class DataResponsibilityTitle(str, Enum): + """ + The model defining the responsibility or role over + the system that processes personal data. + + Used to identify whether the organization is a + Controller, Processor, or Sub-Processor of the data + """ + + CONTROLLER = "Controller" + PROCESSOR = "Processor" + SUB_PROCESSOR = "Sub-Processor" + + +class IncludeExcludeEnum(str, Enum): + """ + Determine whether or not defined rights are + being included or excluded. + """ + + ALL = "ALL" + EXCLUDE = "EXCLUDE" + INCLUDE = "INCLUDE" + NONE = "NONE" + + +class DataSubjectRightsEnum(str, Enum): + """ + The model for data subject rights over + personal data. + + Based upon chapter 3 of the GDPR + """ + + INFORMED = "Informed" + ACCESS = "Access" + RECTIFICATION = "Rectification" + ERASURE = "Erasure" + PORTABILITY = "Portability" + RESTRICT_PROCESSING = "Restrict Processing" + WITHDRAW_CONSENT = "Withdraw Consent" + OBJECT = "Object" + OBJECT_TO_AUTOMATED_PROCESSING = "Object to Automated Processing" + + +class LegalBasisEnum(str, Enum): + """ + The model for allowable legal basis categories + + Based upon article 6 of the GDPR + """ + + CONSENT = "Consent" + CONTRACT = "Contract" + LEGAL_OBLIGATION = "Legal Obligation" + VITAL_INTEREST = "Vital Interest" + PUBLIC_INTEREST = "Public Interest" + LEGITIMATE_INTEREST = "Legitimate Interests" + + +class SpecialCategoriesEnum(str, Enum): + """ + The model for processing special categories + of personal data. + + Based upon article 9 of the GDPR + """ + + CONSENT = "Consent" + EMPLOYMENT = "Employment" + VITAL_INTEREST = "Vital Interests" + NON_PROFIT_BODIES = "Non-profit Bodies" + PUBLIC_BY_DATA_SUBJECT = "Public by Data Subject" + LEGAL_CLAIMS = "Legal Claims" + PUBLIC_INTEREST = "Substantial Public Interest" + MEDICAL = "Medical" + PUBLIC_HEALTH_INTEREST = "Public Health Interest" + + +# Privacy Data Types +class DataCategory(FidesModel): + """The DataCategory resource model.""" + + parent_key: Optional[FidesKey] + + _matching_parent_key: classmethod = matching_parent_key_validator + _no_self_reference: classmethod = no_self_reference_validator + + +class DataQualifier(FidesModel): + """The DataQualifier resource model.""" + + parent_key: Optional[FidesKey] + + _matching_parent_key: classmethod = matching_parent_key_validator + _no_self_reference: classmethod = no_self_reference_validator + + +class DataSubjectRights(BaseModel): + """ + The DataSubjectRights resource model. + + Includes a strategy and optionally a + list of data subject rights to apply + via the set strategy. + """ + + strategy: IncludeExcludeEnum = Field( + description="Defines the strategy used when mapping data rights to a data subject.", + ) + values: Optional[List[DataSubjectRightsEnum]] = Field( + description="A list of valid data subject rights to be used when applying data rights to a data subject via a strategy.", + ) + + @root_validator() + @classmethod + def include_exclude_has_values(cls, values: Dict) -> Dict: + """ + Validate the if include or exclude is chosen, that at least one + value is present. + """ + strategy, rights = values.get("strategy"), values.get("values") + if strategy in ("INCLUDE", "EXCLUDE"): + assert ( + rights is not None + ), f"If {strategy} is chosen, rights must also be listed." + return values + + +class DataSubject(FidesModel): + """The DataSubject resource model.""" + + rights: Optional[DataSubjectRights] = Field(description=DataSubjectRights.__doc__) + automated_decisions_or_profiling: Optional[bool] = Field( + description="A boolean value to annotate whether or not automated decisions/profiling exists for the data subject.", + ) + + +class DataUse(FidesModel): + """The DataUse resource model.""" + + parent_key: Optional[FidesKey] + legal_basis: Optional[LegalBasisEnum] = Field( + description="The legal basis category of which the data use falls under. This field is used as part of the creation of an exportable data map.", + ) + special_category: Optional[SpecialCategoriesEnum] = Field( + description="The special category for processing of which the data use falls under. This field is used as part of the creation of an exportable data map.", + ) + recipients: Optional[List[str]] = Field( + description="An array of recipients when sharing personal data outside of your organization.", + ) + legitimate_interest: bool = Field( + default=False, + description="A boolean representation of if the legal basis used is `Legitimate Interest`. Validated at run time and looks for a `legitimate_interest_impact_assessment` to exist if true.", + ) + legitimate_interest_impact_assessment: Optional[AnyUrl] = Field( + description="A url pointing to the legitimate interest impact assessment. Required if the legal bases used is legitimate interest.", + ) + + _matching_parent_key: classmethod = matching_parent_key_validator + _no_self_reference: classmethod = no_self_reference_validator + + @validator("legitimate_interest", always=True) + @classmethod + def set_legitimate_interest(cls, value: bool, values: Dict) -> bool: + """Sets if a legitimate interest is used.""" + if values["legal_basis"] == "Legitimate Interests": + value = True + return value + + @validator("legitimate_interest_impact_assessment", always=True) + @classmethod + def ensure_impact_assessment(cls, value: AnyUrl, values: Dict) -> AnyUrl: + """ + Validates an impact assessment is applied if a + legitimate interest has been defined. + """ + if values["legitimate_interest"]: + assert ( + value is not None + ), "Impact assessment cannot be null for a legitimate interest, please provide a valid url" + return value + + +# Dataset +class DatasetField(BaseModel): + """ + The DatasetField resource model. + + This resource is nested within a DatasetCollection. + """ + + name: str = name_field + description: Optional[str] = description_field + data_categories: Optional[List[FidesKey]] = Field( + description="Arrays of Data Categories, identified by `fides_key`, that applies to this field.", + ) + data_qualifier: FidesKey = Field( + default="aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified", + description="A Data Qualifier that applies to this field. Note that this field holds a single value, therefore, the property name is singular.", + ) + retention: Optional[str] = Field( + description="An optional string to describe the retention policy for a dataset. This field can also be applied more granularly at either the Collection or field level of a Dataset.", + ) + fields: Optional[List[DatasetField]] = Field( + description="An optional array of objects that describe hierarchical/nested fields (typically found in NoSQL databases).", + ) + + +class DatasetCollection(BaseModel): + """ + The DatasetCollection resource model. + + This resource is nested witin a Dataset. + """ + + name: str = name_field + description: Optional[str] = description_field + data_categories: Optional[List[FidesKey]] = Field( + description="Array of Data Category resources identified by `fides_key`, that apply to all fields in the collection.", + ) + data_qualifier: FidesKey = Field( + default="aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified", + description="Array of Data Qualifier resources identified by `fides_key`, that apply to all fields in the collection.", + ) + retention: Optional[str] = Field( + description="An optional string to describe the retention policy for a Dataset collection. This field can also be applied more granularly at the field level of a Dataset.", + ) + fields: List[DatasetField] = Field( + description="An array of objects that describe the collection's fields.", + ) + + _sort_fields: classmethod = validator("fields", allow_reuse=True)( + sort_list_objects_by_name + ) + + +class ContactDetails(BaseModel): + """ + The contact details information model. + + Used to capture contact information for controllers, used + as part of exporting a data map / ROPA. + + This model is nested under an Organization and + potentially under a system/dataset. + """ + + name: str = Field( + default="", + description="An individual name used as part of publishing contact information. Encrypted at rest on the server.", + ) + address: str = Field( + default="", + description="An individual address used as part of publishing contact information. Encrypted at rest on the server.", + ) + email: str = Field( + default="", + description="An individual email used as part of publishing contact information. Encrypted at rest on the server.", + ) + phone: str = Field( + default="", + description="An individual phone number used as part of publishing contact information. Encrypted at rest on the server.", + ) + + +class DatasetMetadata(BaseModel): + """ + The DatasetMetadata resource model. + + Object used to hold application specific metadata for a dataset + """ + + resource_id: Optional[str] + + +class Dataset(FidesModel): + "The Dataset resource model." + + meta: Optional[Dict[str, str]] = Field( + description="An optional object that provides additional information about the Dataset. You can structure the object however you like. It can be a simple set of `key: value` properties or a deeply nested hierarchy of objects. How you use the object is up to you: Fides ignores it." + ) + data_categories: Optional[List[FidesKey]] = Field( + description="Array of Data Category resources identified by `fides_key`, that apply to all collections in the Dataset.", + ) + data_qualifier: FidesKey = Field( + default="aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified", + description="Array of Data Qualifier resources identified by `fides_key`, that apply to all collections in the Dataset.", + ) + fidesctl_meta: Optional[DatasetMetadata] = Field( + description=DatasetMetadata.__doc__, + ) + joint_controller: Optional[ContactDetails] = Field( + description=ContactDetails.__doc__, + ) + retention: Optional[str] = Field( + default="No retention or erasure policy", + description="An optional string to describe the retention policy for a dataset. This field can also be applied more granularly at either the Collection or field level of a Dataset.", + ) + third_country_transfers: Optional[List[str]] = Field( + description="An optional array to identify any third countries where data is transited to. For consistency purposes, these fields are required to follow the Alpha-3 code set in [ISO 3166-1](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3).", + ) + collections: List[DatasetCollection] = Field( + description="An array of objects that describe the Dataset's collections.", + ) + _sort_collections: classmethod = validator("collections", allow_reuse=True)( + sort_list_objects_by_name + ) + _check_valid_country_code: classmethod = country_code_validator + + +# Evaluation +class ViolationAttributes(BaseModel): + "The model for attributes which led to an evaluation violation" + + data_categories: List[str] = Field( + description="A list of data categories which led to an evaluation violation.", + ) + data_subjects: List[str] = Field( + description="A list of data subjects which led to an evaluation violation.", + ) + data_uses: List[str] = Field( + description="A list of data uses which led to an evaluation violation.", + ) + data_qualifier: str = Field( + description="The data qualifier which led to an evaluation violation.", + ) + + +class Violation(BaseModel): + "The model for violations within an evaluation." + + violating_attributes: ViolationAttributes = Field( + description=ViolationAttributes.__doc__ + ) + detail: str = Field( + description="A human-readable string detailing the evaluation violation.", + ) + + +class StatusEnum(str, Enum): + "The model for possible evaluation results." + + FAIL = "FAIL" + PASS = "PASS" + + +class Evaluation(BaseModel): + """ + The Evaluation resource model. + + This resource is created after an evaluation is executed. + """ + + fides_key: FidesKey = Field( + description="A uuid generated for each unique evaluation.", + ) + status: StatusEnum = Field(description=StatusEnum.__doc__) + violations: List[Violation] = Field( + default=[], + description=Violation.__doc__, + ) + message: str = Field( + default="", + description="A human-readable string response for the evaluation.", + ) + + class Config: + "Config for the Evaluation" + extra = "ignore" + orm_mode = True + + +# Organization +class ResourceFilter(BaseModel): + """ + The ResourceFilter resource model. + """ + + type: str = Field( + description="The type of filter to be used (i.e. ignore_resource_arn)", + ) + value: str = Field( + description="A string representation of resources to be filtered. Can include wildcards.", + ) + + +class OrganizationMetadata(BaseModel): + """ + The OrganizationMetadata resource model. + + Object used to hold application specific metadata for an organization + """ + + resource_filters: Optional[List[ResourceFilter]] = Field( + description="A list of filters that can be used when generating or scanning systems." + ) + + +class Organization(FidesModel): + """ + The Organization resource model. + + This resource is used as a way to organize all other resources. + """ + + # It inherits this from FidesModel but Organizations don't have this field + organization_parent_key: None = Field( + default=None, + description="An inherited field from the FidesModel that is unused with an Organization.", + ) + controller: Optional[ContactDetails] = Field( + description=ContactDetails.__doc__, + ) + data_protection_officer: Optional[ContactDetails] = Field( + description=ContactDetails.__doc__, + ) + fidesctl_meta: Optional[OrganizationMetadata] = Field( + description=OrganizationMetadata.__doc__, + ) + representative: Optional[ContactDetails] = Field( + description=ContactDetails.__doc__, + ) + security_policy: Optional[HttpUrl] = Field( + description="Am optional URL to the organization security policy." + ) + + +# Policy +class MatchesEnum(str, Enum): + """ + The MatchesEnum resource model. + + Determines how the listed resources are matched in the evaluation logic. + """ + + ANY = "ANY" + ALL = "ALL" + NONE = "NONE" + OTHER = "OTHER" + + +class PrivacyRule(BaseModel): + """ + The PrivacyRule resource model. + + A list of privacy data types and what match method to use. + """ + + matches: MatchesEnum = Field( + description=MatchesEnum.__doc__, + ) + values: List[FidesKey] = Field( + description="A list of fides keys to be used with the matching type in a privacy rule.", + ) + + +class PolicyRule(BaseModel): + """ + The PolicyRule resource model. + + Describes the allowed combination of the various privacy data types. + """ + + name: str + data_categories: PrivacyRule = Field( + description=PrivacyRule.__doc__, + ) + data_uses: PrivacyRule = Field( + description=PrivacyRule.__doc__, + ) + data_subjects: PrivacyRule = Field( + description=PrivacyRule.__doc__, + ) + data_qualifier: FidesKey = Field( + default="aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified", + description="The fides key of the data qualifier to be used in a privacy rule.", + ) + + +class Policy(FidesModel): + """ + The Policy resource model. + + An object used to organize a list of PolicyRules. + """ + + rules: List[PolicyRule] = Field( + description=PolicyRule.__doc__, + ) + + _sort_rules: classmethod = validator("rules", allow_reuse=True)( + sort_list_objects_by_name + ) + + +# Registry +class Registry(FidesModel): + """ + The Registry resource model. + + Systems can be assigned to this resource, but it doesn't inherently + point to any other resources. + """ + + +# System +class DataProtectionImpactAssessment(BaseModel): + """ + The DataProtectionImpactAssessment (DPIA) resource model. + + Contains information in regard to the data protection + impact assessment exported on a data map or Record of + Processing Activities (RoPA). + + A legal requirement under GDPR for any project that + introduces a high risk to personal information. + """ + + is_required: bool = Field( + default=False, + description="A boolean value determining if a data protection impact assessment is required. Defaults to False.", + ) + progress: Optional[str] = Field( + description="The optional status of a Data Protection Impact Assessment. Returned on an exported data map or RoPA.", + ) + link: Optional[AnyUrl] = Field( + description="The optional link to the Data Protection Impact Assessment. Returned on an exported data map or RoPA.", + ) + + +class PrivacyDeclaration(BaseModel): + """ + The PrivacyDeclaration resource model. + + States a function of a system, and describes how it relates + to the privacy data types. + """ + + name: str = Field( + description="The name of the privacy declaration on the system.", + ) + data_categories: List[FidesKey] = Field( + description="An array of data categories describing a system in a privacy declaration.", + ) + data_use: FidesKey = Field( + description="The Data Use describing a system in a privacy declaration.", + ) + data_qualifier: FidesKey = Field( + default="aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified", + description="The fides key of the data qualifier describing a system in a privacy declaration.", + ) + data_subjects: List[FidesKey] = Field( + description="An array of data subjects describing a system in a privacy declaration.", + ) + dataset_references: Optional[List[FidesKey]] = Field( + description="Referenced Dataset fides keys used by the system.", + ) + + +class SystemMetadata(BaseModel): + """ + The SystemMetadata resource model. + + Object used to hold application specific metadata for a system + """ + + resource_id: Optional[str] = Field( + description="The external resource id for the system being modeled." + ) + endpoint_address: Optional[str] = Field( + description="The host of the external resource for the system being modeled." + ) + endpoint_port: Optional[str] = Field( + description="The port of the external resource for the system being modeled." + ) + + +class System(FidesModel): + """ + The System resource model. + + Describes an application and includes a list of PrivacyDeclaration resources. + """ + + registry_id: Optional[int] = Field( + description="The id of the system registry, if used.", + ) + meta: Optional[Dict[str, str]] = Field( + description="An optional property to store any extra information for a system. Not used by fidesctl.", + ) + fidesctl_meta: Optional[SystemMetadata] = Field( + description=SystemMetadata.__doc__, + ) + system_type: str = Field( + description="A required value to describe the type of system being modeled, examples include: Service, Application, Third Party, etc.", + ) + data_responsibility_title: DataResponsibilityTitle = Field( + default=DataResponsibilityTitle.CONTROLLER, + description=DataResponsibilityTitle.__doc__, + ) + privacy_declarations: List[PrivacyDeclaration] = Field( + description=PrivacyDeclaration.__doc__, + ) + system_dependencies: Optional[List[FidesKey]] = Field( + description="A list of fides keys to model dependencies." + ) + joint_controller: Optional[ContactDetails] = Field( + description=ContactDetails.__doc__, + ) + third_country_transfers: Optional[List[str]] = Field( + description="An optional array to identify any third countries where data is transited to. For consistency purposes, these fields are required to follow the Alpha-3 code set in ISO 3166-1.", + ) + administrating_department: Optional[str] = Field( + default="Not defined", + description="An optional value to identify the owning department or group of the system within your organization", + ) + data_protection_impact_assessment: DataProtectionImpactAssessment = Field( + default=DataProtectionImpactAssessment(), + description=DataProtectionImpactAssessment.__doc__, + ) + + _sort_privacy_declarations: classmethod = validator( + "privacy_declarations", allow_reuse=True + )(sort_list_objects_by_name) + + _no_self_reference: classmethod = validator( + "system_dependencies", allow_reuse=True, each_item=True + )(no_self_reference) + + _check_valid_country_code: classmethod = country_code_validator + + class Config: + "Class for the System config" + use_enum_values = True + + +# Taxonomy +class Taxonomy(BaseModel): + """ + Represents an entire taxonomy of Fides Resources. + + The choice to not use pluralized forms of each resource name + was deliberate, as this would have caused huge amounts of complexity + elsewhere across the codebase. + """ + + data_category: List[DataCategory] = Field(default_factory=list) + data_subject: Optional[List[DataSubject]] = Field(default_factory=list) + data_use: Optional[List[DataUse]] = Field(default_factory=list) + data_qualifier: Optional[List[DataQualifier]] = Field(default_factory=list) + + dataset: Optional[List[Dataset]] = Field(default_factory=list) + system: Optional[List[System]] = Field(default_factory=list) + policy: Optional[List[Policy]] = Field(default_factory=list) + + registry: Optional[List[Registry]] = Field(default_factory=list) + organization: List[Organization] = Field(default_factory=list) diff --git a/src/fideslang/parse.py b/src/fideslang/parse.py new file mode 100644 index 00000000..16ebcf96 --- /dev/null +++ b/src/fideslang/parse.py @@ -0,0 +1,45 @@ +""" +This module handles everything related to parsing resources into Pydantic models, +either from local files or the server. +""" +from typing import List, Dict + +from fideslang import model_map, FidesModel, Taxonomy + + +def parse_dict( + resource_type: str, resource: Dict, from_server: bool = False +) -> FidesModel: + """ + Parse an individual resource into its Python model. + """ + resource_source = "server" if from_server else "manifest file" + if resource_type not in list(model_map.keys()): + print(f"This resource type does not exist: {resource_type}") + raise SystemExit(1) + + try: + parsed_manifest = model_map[resource_type].parse_obj(resource) + except Exception as err: + print( + "Failed to parse {} from {}:\n{}".format( + resource_type, resource_source, resource + ) + ) + raise SystemExit(err) + return parsed_manifest + + +def load_manifests_into_taxonomy(raw_manifests: Dict[str, List[Dict]]) -> Taxonomy: + """ + Parse the raw resource manifests into resource resources. + """ + taxonomy = Taxonomy.parse_obj( + { + resource_type: [ + parse_dict(resource_type, resource) for resource in resource_list + ] + for resource_type, resource_list in raw_manifests.items() + } + ) + return taxonomy diff --git a/src/fideslang/relationships.py b/src/fideslang/relationships.py new file mode 100644 index 00000000..b67780a7 --- /dev/null +++ b/src/fideslang/relationships.py @@ -0,0 +1,81 @@ +""" +This module is responsible for calculating what resources are referenced +by each other and building a dependency graph of relationships. +""" + +import inspect +from functools import reduce +from typing import List, Set + + +from fideslang.models import ( + FidesKey, + Taxonomy, + BaseModel, +) +from fideslang.utils import get_resource_by_fides_key + + +def find_nested_keys_in_list(parameter_value: List[BaseModel]) -> List[str]: + """ + Iterates a nested object list and returns any keys nested fides keys + """ + nested_keys = [ + nested_key + for param_element in parameter_value + for nested_key in find_referenced_fides_keys(param_element) + ] + return nested_keys + + +def find_referenced_fides_keys(resource: object) -> Set[FidesKey]: + """ + Use type-signature introspection to figure out which fields + include the FidesKey type and return all of those values. + + Note that this finds _all_ fides_keys, including the resource's own fides_key + """ + referenced_fides_keys: Set[FidesKey] = set() + signature = inspect.signature(type(resource), follow_wrapped=True) + parameter_values = filter( + lambda parameter: hasattr(resource, parameter.name), + signature.parameters.values(), + ) + for parameter in parameter_values: + parameter_value = resource.__getattribute__(parameter.name) + if parameter_value: + if parameter.annotation == FidesKey: + referenced_fides_keys.add(parameter_value) + elif parameter.annotation == List[FidesKey]: + referenced_fides_keys.update(resource.__getattribute__(parameter.name)) + elif ( + isinstance(parameter_value, list) and parameter.annotation != List[str] + ): + nested_keys = find_nested_keys_in_list(parameter_value) + referenced_fides_keys.update(nested_keys) + elif hasattr(parameter_value, "__dict__"): + referenced_fides_keys.update( + find_referenced_fides_keys(parameter_value) + ) + return referenced_fides_keys + + +def get_referenced_missing_keys(taxonomy: Taxonomy) -> List[FidesKey]: + """ + Iterate through the Taxonomy and create a set of all of the FidesKeys + that are contained within it. + """ + referenced_keys: List[Set[FidesKey]] = [ + find_referenced_fides_keys(resource) + for resource_type in taxonomy.__fields_set__ + for resource in getattr(taxonomy, resource_type) + ] + key_set: Set[FidesKey] = set( + reduce(lambda x, y: set().union(x).union(y), referenced_keys) + ) + keys_not_in_taxonomy = [ + fides_key + for fides_key in key_set + if get_resource_by_fides_key(taxonomy, fides_key) is None + ] + return keys_not_in_taxonomy diff --git a/src/fideslang/utils.py b/src/fideslang/utils.py new file mode 100644 index 00000000..5b64dbcb --- /dev/null +++ b/src/fideslang/utils.py @@ -0,0 +1,22 @@ +""" +Utils for use within various fideslang modules. +""" + +from typing import Dict, Optional + +from fideslang import FidesModel, Taxonomy + + +def get_resource_by_fides_key( + taxonomy: Taxonomy, fides_key: str +) -> Optional[Dict[str, FidesModel]]: + """ + Recurse through a taxonomy to find a specific resource its fides_key. + """ + + return { + resource_type: resource + for resource_type in taxonomy.__fields_set__ + for resource in getattr(taxonomy, resource_type) + if resource.fides_key == fides_key + } or None diff --git a/src/fideslang/validation.py b/src/fideslang/validation.py new file mode 100644 index 00000000..c6b8a7e1 --- /dev/null +++ b/src/fideslang/validation.py @@ -0,0 +1,95 @@ +""" +Contains all of the additional validation for the resource models. +""" + +import re +from typing import List, Dict, Pattern + +from pydantic import ConstrainedStr + +from fideslang.default_fixtures import COUNTRY_CODES + + +VALID_COUNTRY_CODES = [country["alpha3Code"] for country in COUNTRY_CODES] + + +class FidesValidationError(Exception): + """Custom exception for when the pydantic ValidationError can't be used.""" + + +class FidesKey(ConstrainedStr): + """ + A FidesKey type that creates a custom constrained string. + """ + + regex: Pattern[str] = re.compile(r"^[a-zA-Z0-9_.-]+$") + + @classmethod # This overrides the default method to throw the custom FidesValidationError + def validate(cls, value: str) -> str: + if not cls.regex.match(value): + raise FidesValidationError( + "FidesKey must only contain alphanumeric characters, '.', '_' or '-'." + ) + + return value + + +def sort_list_objects_by_name(values: List) -> List: + """ + Sort objects in a list by their name. + This makes resource comparisons deterministic. + """ + values.sort(key=lambda value: value.name) + return values + + +def no_self_reference(value: FidesKey, values: Dict) -> FidesKey: + """ + Check to make sure that the fides_key doesn't match other fides_key + references within an object. + + i.e. DataCategory.parent_key != DataCategory.fides_key + """ + + fides_key = FidesKey.validate(values.get("fides_key", "")) + if value == fides_key: + raise FidesValidationError("FidesKey can not self-reference!") + return value + + +def matching_parent_key(value: FidesKey, values: Dict) -> FidesKey: + """ + Confirm that the parent_key matches the parent parsed from the FidesKey. + """ + + fides_key = FidesKey.validate(values.get("fides_key", "")) + split_fides_key = fides_key.split(".") + + # Check if it is a top-level resource + if len(split_fides_key) == 1 and not value: + return value + + # Reform the parent_key from the fides_key and compare + parent_key_from_fides_key = ".".join(split_fides_key[:-1]) + if parent_key_from_fides_key != value: + raise FidesValidationError( + "The parent_key ({0}) does not match the parent parsed ({1}) from the fides_key ({2})!".format( + value, parent_key_from_fides_key, fides_key + ) + ) + return value + + +def check_valid_country_code(country_code_list: List) -> List: + """ + Validate all listed countries (if present) are valid country codes. + """ + if country_code_list is not None: + for country_code in country_code_list: + if country_code not in VALID_COUNTRY_CODES: + raise FidesValidationError( + "The country identified as {} is not a valid Alpha-3 code per ISO 3166.".format( + country_code + ) + ) + return country_code_list diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 00000000..4041da18 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,200 @@ +"""Common fixtures to be used across tests.""" +from typing import Any, Dict + +import os +import pytest +import yaml + +from fideslang import models + + +@pytest.fixture(scope="session") +def resources_dict(): + """ + Yields a resource containing sample representations of different + Fides resources. + """ + resources_dict: Dict[str, Any] = { + "data_category": models.DataCategory( + organization_fides_key=1, + fides_key="user.provided.identifiable.custom", + parent_key="user.provided.identifiable", + name="Custom Data Category", + description="Custom Data Category", + ), + "data_qualifier": models.DataQualifier( + organization_fides_key=1, + fides_key="custom_data_qualifier", + name="Custom Data Qualifier", + description="Custom Data Qualifier", + ), + "dataset": models.Dataset( + organization_fides_key=1, + fides_key="test_sample_db_dataset", + name="Sample DB Dataset", + description="This is a Sample Database Dataset", + collections=[ + models.DatasetCollection( + name="user", + fields=[ + models.DatasetField( + name="Food_Preference", + description="User's favorite food", + path="some.path", + ), + models.DatasetField( + name="First_Name", + description="A First Name Field", + path="another.path", + data_categories=["user.provided.identifiable.name"], + data_qualifier="aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified", + ), + models.DatasetField( + name="Email", + description="User's Email", + path="another.another.path", + data_categories=[ + "user.provided.identifiable.contact.email" + ], + data_qualifier="aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified", + ), + ], + ) + ], + ), + "data_subject": models.DataSubject( + organization_fides_key=1, + fides_key="custom_subject", + name="Custom Data Subject", + description="Custom Data Subject", + ), + "data_use": models.DataUse( + organization_fides_key=1, + fides_key="custom_data_use", + name="Custom Data Use", + description="Custom Data Use", + ), + "evaluation": models.Evaluation( + fides_key="test_evaluation", status="PASS", details=["foo"], message="bar" + ), + "organization": models.Organization( + fides_key="test_organization", + name="Test Organization", + description="Test Organization", + ), + "policy": models.Policy( + organization_fides_key=1, + fides_key="test_policy", + name="Test Policy", + version="1.3", + description="Test Policy", + rules=[], + ), + "policy_rule": models.PolicyRule( + name="Test Policy", + data_categories=models.PrivacyRule(matches="NONE", values=[]), + data_uses=models.PrivacyRule(matches="NONE", values=["provide.system"]), + data_subjects=models.PrivacyRule(matches="ANY", values=[]), + data_qualifier="aggregated.anonymized.unlinked_pseudonymized.pseudonymized", + ), + "registry": models.Registry( + organization_fides_key=1, + fides_key="test_registry", + name="Test Registry", + description="Test Regsitry", + systems=[], + ), + "system": models.System( + organization_fides_key=1, + registryId=1, + fides_key="test_system", + system_type="SYSTEM", + name="Test System", + description="Test Policy", + privacy_declarations=[ + models.PrivacyDeclaration( + name="declaration-name", + data_categories=[], + data_use="provide", + data_subjects=[], + data_qualifier="aggregated_data", + dataset_references=[], + ) + ], + system_dependencies=[], + ), + } + yield resources_dict + + +@pytest.fixture() +def test_manifests(): + test_manifests = { + "manifest_1": { + "dataset": [ + { + "name": "Test Dataset 1", + "organization_fides_key": 1, + "datasetType": {}, + "datasetLocation": "somedb:3306", + "description": "Test Dataset 1", + "fides_key": "some_dataset", + "datasetTables": [], + } + ], + "system": [ + { + "name": "Test System 1", + "organization_fides_key": 1, + "systemType": "mysql", + "description": "Test System 1", + "fides_key": "some_system", + } + ], + }, + "manifest_2": { + "dataset": [ + { + "name": "Test Dataset 2", + "description": "Test Dataset 2", + "organization_fides_key": 1, + "datasetType": {}, + "datasetLocation": "somedb:3306", + "fides_key": "another_dataset", + "datasetTables": [], + } + ], + "system": [ + { + "name": "Test System 2", + "organization_fides_key": 1, + "systemType": "mysql", + "description": "Test System 2", + "fides_key": "another_system", + } + ], + }, + } + yield test_manifests + + +@pytest.fixture() +def populated_manifest_dir(test_manifests, tmp_path): + manifest_dir = f"{tmp_path}/populated_manifest" + os.mkdir(manifest_dir) + for manifest in test_manifests.keys(): + with open(f"{manifest_dir}/{manifest}.yml", "w") as manifest_file: + yaml.dump(test_manifests[manifest], manifest_file) + return manifest_dir + + +@pytest.fixture() +def populated_nested_manifest_dir(test_manifests, tmp_path): + manifest_dir = f"{tmp_path}/populated_nested_manifest" + os.mkdir(manifest_dir) + for manifest in test_manifests.keys(): + nested_manifest_dir = f"{manifest_dir}/{manifest}" + os.mkdir(nested_manifest_dir) + with open(f"{nested_manifest_dir}/{manifest}.yml", "w") as manifest_file: + yaml.dump(test_manifests[manifest], manifest_file) + return manifest_dir diff --git a/tests/data/failing_dataset_collection_taxonomy.yml b/tests/data/failing_dataset_collection_taxonomy.yml new file mode 100644 index 00000000..6d176d4f --- /dev/null +++ b/tests/data/failing_dataset_collection_taxonomy.yml @@ -0,0 +1,54 @@ +dataset: + - fides_key: test_db_dataset_failing_dataset + name: Sample DB Dataset + description: This is a Sample Database Dataset + collections: + - name: users + description: User's information + data_categories: + - user.provided.identifiable.political_opinion + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized + fields: + - name: First_Name + description: A First Name Field + data_categories: + - user.provided.identifiable.name + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + +system: + - fides_key: customer_data_sharing_system + name: Customer Data Sharing System + description: Share data about our users with third-parties for advertising + system_type: Service + privacy_declarations: + - name: Share Political Opinions + data_categories: + - user.provided.identifiable + data_use: advertising + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + data_subjects: + - customer + dataset_references: + - test_db_dataset_failing_dataset + system_dependencies: [] + +policy: + - fides_key: primary_privacy_policy + name: Primary Privacy Policy + description: The main privacy policy for the organization. + rules: + - fides_key: reject_political_opinion + description: Disallow advertising of customer political opinion data + data_categories: + matches: ANY + values: + - user.provided.identifiable.political_opinion + data_uses: + matches: ANY + values: + - advertising + data_subjects: + matches: ANY + values: + - customer + data_qualifier: aggregated diff --git a/tests/data/failing_dataset_field_taxonomy.yml b/tests/data/failing_dataset_field_taxonomy.yml new file mode 100644 index 00000000..d91d585e --- /dev/null +++ b/tests/data/failing_dataset_field_taxonomy.yml @@ -0,0 +1,55 @@ +dataset: + - fides_key: test_db_dataset_failing_dataset + name: Sample DB Dataset + description: This is a Sample Database Dataset + collections: + - name: users + description: User's information + fields: + - name: First_Name + description: A First Name Field + data_categories: + - user.provided.identifiable.name + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + - name: political_opinion + description: User's political opinion + data_categories: + - user.provided.identifiable.political_opinion + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized +system: + - fides_key: customer_data_sharing_system + name: Customer Data Sharing System + description: Share data about our users with third-parties for advertising + system_type: Service + privacy_declarations: + - name: Share Political Opinions + data_categories: + - user.provided.identifiable + data_use: advertising + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + data_subjects: + - customer + dataset_references: + - test_db_dataset_failing_dataset + system_dependencies: [] + +policy: + - fides_key: primary_privacy_policy + name: Primary Privacy Policy + description: The main privacy policy for the organization. + rules: + - fides_key: reject_political_opinion + description: Disallow advertising of customer political opinion data + data_categories: + matches: ANY + values: + - user.provided.identifiable.political_opinion + data_uses: + matches: ANY + values: + - advertising + data_subjects: + matches: ANY + values: + - customer + data_qualifier: aggregated diff --git a/tests/data/failing_dataset_taxonomy.yml b/tests/data/failing_dataset_taxonomy.yml new file mode 100644 index 00000000..4b5f2528 --- /dev/null +++ b/tests/data/failing_dataset_taxonomy.yml @@ -0,0 +1,54 @@ +dataset: + - fides_key: test_db_dataset_failing_dataset + name: Sample DB Dataset + description: This is a Sample Database Dataset + data_categories: + - user.provided.identifiable.political_opinion + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized + collections: + - name: users + description: User's information + fields: + - name: First_Name + description: A First Name Field + data_categories: + - user.provided.identifiable.name + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + +system: + - fides_key: customer_data_sharing_system + name: Customer Data Sharing System + description: Share data about our users with third-parties for advertising + system_type: Service + privacy_declarations: + - name: Share Political Opinions + data_categories: + - user.provided.identifiable + data_use: advertising + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + data_subjects: + - customer + dataset_references: + - test_db_dataset_failing_dataset + system_dependencies: [] + +policy: + - fides_key: primary_privacy_policy + name: Primary Privacy Policy + description: The main privacy policy for the organization. + rules: + - fides_key: reject_political_opinion + description: Disallow advertising of customer political opinion data + data_categories: + matches: ANY + values: + - user.provided.identifiable.political_opinion + data_uses: + matches: ANY + values: + - advertising + data_subjects: + matches: ANY + values: + - customer + data_qualifier: aggregated diff --git a/tests/data/failing_declaration_taxonomy.yml b/tests/data/failing_declaration_taxonomy.yml new file mode 100644 index 00000000..a081fe8a --- /dev/null +++ b/tests/data/failing_declaration_taxonomy.yml @@ -0,0 +1,35 @@ +system: + - fides_key: customer_data_sharing_system + name: Customer Data Sharing System + description: Share data about our users with third-parties for payment processing + system_type: Service + privacy_declarations: + - name: Share Political Opinions + data_categories: + - user.provided.identifiable.political_opinion + data_use: third_party_sharing.payment_processing + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + data_subjects: + - customer + system_dependencies: [] + +policy: + - fides_key: primary_privacy_policy + name: Primary Privacy Policy + description: The main privacy policy for the organization. + rules: + - name: reject_targeted_marketing + description: Disallow third party sharing of customer data + data_categories: + matches: ANY + values: + - user.provided + data_uses: + matches: ANY + values: + - third_party_sharing + data_subjects: + matches: ANY + values: + - customer + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified diff --git a/tests/data/failing_nested_dataset.yml b/tests/data/failing_nested_dataset.yml new file mode 100644 index 00000000..a0c733ef --- /dev/null +++ b/tests/data/failing_nested_dataset.yml @@ -0,0 +1,57 @@ +dataset: + - fides_key: test_failing_nested_dataset_field + name: Sample Nested Dataset + description: Nested fields dataset with failure to be captured in evaluation + collections: + - name: organization + description: Organization information + fields: + - name: organization_name + - name: organization_address + fields: + - name: street + data_categories: + - account.contact.street + - name: city + data_categories: + - account.contact.city + - name: state + data_categories: + - account.contact.state + +system: + - fides_key: client_analytics + name: Client Usage Analytics + description: Use aggregated and anonymous data to measure usage + system_type: Service + privacy_declarations: + - name: Mesaure usage of users + data_categories: + - user.derived + data_use: improve.system + data_subjects: + - customer + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + dataset_references: + - test_failing_nested_dataset_field + +policy: + - fides_key: primary_privacy_policy + name: Primary Privacy Policy + description: The main privacy policy for the organization. + rules: + - name: exclude_location_information + description: Do not allow any contact information + data_categories: + matches: OTHER + values: + - user.derived + data_uses: + matches: OTHER + values: + - provide + data_subjects: + matches: OTHER + values: + - anonymous_user + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified diff --git a/tests/data/passing_declaration_taxonomy.yml b/tests/data/passing_declaration_taxonomy.yml new file mode 100644 index 00000000..59ab86d5 --- /dev/null +++ b/tests/data/passing_declaration_taxonomy.yml @@ -0,0 +1,35 @@ +system: + - fides_key: customer_data_sharing_system + name: Customer Data Sharing System + description: Share data about our users with third-parties for payment processing + system_type: Service + privacy_declarations: + - name: Share Political Opinions + data_categories: + - user.provided.identifiable.political_opinion + data_use: third_party_sharing.payment_processing + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + data_subjects: + - customer + system_dependencies: [] + +policy: + - fides_key: primary_privacy_policy + name: Primary Privacy Policy + description: The main privacy policy for the organization. + rules: + - name: reject_targeted_marketing + description: Disallow advertising of customer data + data_categories: + matches: ANY + values: + - user.provided + data_uses: + matches: ANY + values: + - advertising + data_subjects: + matches: ANY + values: + - customer + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified diff --git a/tests/data/sample_hierarchy_figures.json b/tests/data/sample_hierarchy_figures.json new file mode 100644 index 00000000..2f77287f --- /dev/null +++ b/tests/data/sample_hierarchy_figures.json @@ -0,0 +1,937 @@ +{ + "data": [ + { + "hoverinfo": "skip", + "labels": [ + "account", + "account.contact", + "account.contact.city" + ], + "parents": [ + null, + "account", + "account.contact" + ], + "type": "sunburst" + }, + { + "hoverinfo": "skip", + "link": { + "source": [ + 0, + 1 + ], + "target": [ + 1, + 2 + ], + "value": [ + 1, + 2 + ] + }, + "node": { + "color": "blue", + "label": [ + "account", + "account.contact", + "account.contact.city" + ], + "line": { + "color": "black", + "width": 0.5 + }, + "pad": 15, + "thickness": 20 + }, + "type": "sankey", + "valueformat": ".1f", + "valuesuffix": "%", + "visible": false + }, + { + "hoverinfo": "skip", + "labels": [ + "account", + "account.contact", + "account.contact.city" + ], + "parents": [ + null, + "account", + "account.contact" + ], + "type": "icicle", + "visible": false + } + ], + "layout": { + "showlegend": false, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0.0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1.0, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0.0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1.0, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0.0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1.0, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0.0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1.0, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0.0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1.0, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0.0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1.0, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0.0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1.0, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0.0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1.0, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Fides Data Category Hierarchy" + }, + "updatemenus": [ + { + "active": 0, + "buttons": [ + { + "args": [ + { + "visible": [ + true, + false, + false + ] + } + ], + "label": "Sunburst", + "method": "update" + }, + { + "args": [ + { + "visible": [ + false, + true, + false + ] + } + ], + "label": "Sankey", + "method": "update" + }, + { + "args": [ + { + "visible": [ + false, + false, + true + ] + } + ], + "label": "Icicle", + "method": "update" + } + ] + } + ] + } +} \ No newline at end of file diff --git a/tests/data/sample_manifest.yml b/tests/data/sample_manifest.yml new file mode 100644 index 00000000..7bc2d1e5 --- /dev/null +++ b/tests/data/sample_manifest.yml @@ -0,0 +1,13 @@ +id: 0 +name: sample2 +version: 0.0.1 +description: some description +purpose: security +fields: + - name: myemail + pii: work_email + - name: myotheremail + pii: personal_email + - name: prefs + pii: preferences +raw: none diff --git a/tests/fideslang/test_manifests.py b/tests/fideslang/test_manifests.py new file mode 100644 index 00000000..5310624c --- /dev/null +++ b/tests/fideslang/test_manifests.py @@ -0,0 +1,157 @@ +import pytest +import yaml + +from fideslang import manifests + + +# Helpers +@pytest.fixture() +def sample_manifest(): + yield manifests.load_yaml_into_dict("tests/data/sample_manifest.yml") + + +@pytest.fixture() +def ingestion_manifest_directory( + populated_manifest_dir, populated_nested_manifest_dir, request +): + """ + Allows for parameterization of manifests to ingest by returning + the corresponding fixture + """ + return { + "populated_manifest_dir": populated_manifest_dir, + "populated_nested_manifest_dir": populated_nested_manifest_dir, + }[request.param] + + +# Unit +@pytest.mark.unit +def test_load_yaml_into_dict(sample_manifest): + """ + Make sure that the yaml loaded from the sample manifest matches + what is expected. + """ + expected_result = { + "id": 0, + "name": "sample2", + "version": "0.0.1", + "description": "some description", + "fields": [ + {"name": "myemail", "pii": "work_email"}, + {"name": "myotheremail", "pii": "personal_email"}, + {"name": "prefs", "pii": "preferences"}, + ], + "raw": "none", + "purpose": "security", + } + assert expected_result == sample_manifest + + +@pytest.mark.unit +def test_write_manifest(tmp_path): + test_resource = {"foo": "bar", "bar": "baz"} + expected_result = {"test": [{"foo": "bar", "bar": "baz"}]} + test_path = str(tmp_path) + "/test.yml" + manifests.write_manifest(test_path, test_resource, "test") + + with open(test_path, "r") as manifest: + actual_result = yaml.safe_load(manifest) + + assert actual_result == expected_result + + +@pytest.mark.unit +def test_union_manifests(test_manifests): + expected_result = { + "dataset": [ + { + "name": "Test Dataset 1", + "description": "Test Dataset 1", + "fides_key": "some_dataset", + "organization_fides_key": 1, + "datasetType": {}, + "datasetLocation": "somedb:3306", + "datasetTables": [], + }, + { + "name": "Test Dataset 2", + "description": "Test Dataset 2", + "fides_key": "another_dataset", + "organization_fides_key": 1, + "datasetType": {}, + "datasetLocation": "somedb:3306", + "datasetTables": [], + }, + ], + "system": [ + { + "name": "Test System 1", + "organization_fides_key": 1, + "systemType": "mysql", + "description": "Test System 1", + "fides_key": "some_system", + }, + { + "name": "Test System 2", + "organization_fides_key": 1, + "systemType": "mysql", + "description": "Test System 2", + "fides_key": "another_system", + }, + ], + } + actual_result = manifests.union_manifests(test_manifests.values()) + print(expected_result) + print(actual_result) + assert expected_result == actual_result + + +@pytest.mark.unit +@pytest.mark.parametrize( + "ingestion_manifest_directory", + ["populated_manifest_dir", "populated_nested_manifest_dir"], + indirect=["ingestion_manifest_directory"], +) +def test_ingest_manifests(ingestion_manifest_directory): + actual_result = manifests.ingest_manifests(str(ingestion_manifest_directory)) + + # Battery of assertions for consistency + assert sorted(actual_result) == ["dataset", "system"] + assert len(actual_result["dataset"]) == 2 + assert len(actual_result["system"]) == 2 + assert sorted(actual_result["dataset"], key=lambda x: x["name"]) == [ + { + "name": "Test Dataset 1", + "organization_fides_key": 1, + "datasetType": {}, + "datasetLocation": "somedb:3306", + "description": "Test Dataset 1", + "fides_key": "some_dataset", + "datasetTables": [], + }, + { + "name": "Test Dataset 2", + "description": "Test Dataset 2", + "organization_fides_key": 1, + "datasetType": {}, + "datasetLocation": "somedb:3306", + "fides_key": "another_dataset", + "datasetTables": [], + }, + ] + assert sorted(actual_result["system"], key=lambda x: x["name"]) == [ + { + "name": "Test System 1", + "organization_fides_key": 1, + "systemType": "mysql", + "description": "Test System 1", + "fides_key": "some_system", + }, + { + "name": "Test System 2", + "organization_fides_key": 1, + "systemType": "mysql", + "description": "Test System 2", + "fides_key": "another_system", + }, + ] diff --git a/tests/fideslang/test_parse.py b/tests/fideslang/test_parse.py new file mode 100644 index 00000000..b646f466 --- /dev/null +++ b/tests/fideslang/test_parse.py @@ -0,0 +1,83 @@ +import pytest + +import fideslang as models +from fideslang import parse + + +@pytest.mark.unit +def test_parse_manifest(): + expected_result = models.DataCategory( + organization_fides_key=1, + fides_key="some_resource", + name="Test resource 1", + description="Test Description", + ) + test_dict = { + "organization_fides_key": 1, + "fides_key": "some_resource", + "name": "Test resource 1", + "description": "Test Description", + } + actual_result = parse.parse_dict("data_category", test_dict) + assert actual_result == expected_result + + +@pytest.mark.unit +def test_parse_manifest_no_fides_key_validation_error(): + with pytest.raises(SystemExit): + test_dict = { + "organization_fides_key": 1, + "name": "Test resource 1", + "description": "Test Description", + } + parse.parse_dict("data_category", test_dict) + assert True + + +@pytest.mark.unit +def test_parse_manifest_resource_type_error(): + with pytest.raises(SystemExit): + test_dict = { + "organization_fides_key": 1, + "fides_key": "some_resource", + "name": "Test resource 1", + "description": "Test Description", + } + parse.parse_dict("data-category", test_dict) + assert True + + +@pytest.mark.unit +def test_load_manifests_into_taxonomy(): + manifest_dict = { + "data_category": [ + { + "name": "User Data", + "fides_key": "user", + "description": "Test top-level category", + }, + { + "name": "User Provided Data", + "fides_key": "user.provided", + "parent_key": "user", + "description": "Test sub-category", + }, + ] + } + + expected_taxonomy = models.Taxonomy( + data_category=[ + models.DataCategory( + name="User Data", + fides_key="user", + description="Test top-level category", + ), + models.DataCategory( + name="User Provided Data", + fides_key="user.provided", + parent_key="user", + description="Test sub-category", + ), + ] + ) + assert parse.load_manifests_into_taxonomy(manifest_dict) == expected_taxonomy diff --git a/tests/fideslang/test_relationships.py b/tests/fideslang/test_relationships.py new file mode 100644 index 00000000..c65f87e5 --- /dev/null +++ b/tests/fideslang/test_relationships.py @@ -0,0 +1,184 @@ +import pytest + +from fideslang import relationships +from fideslang.models import ( + DataCategory, + Dataset, + DatasetCollection, + DatasetField, + MatchesEnum, + Policy, + PolicyRule, + PrivacyDeclaration, + System, + Taxonomy, +) + + +@pytest.mark.unit +def test_find_referenced_fides_keys_1(): + test_data_category = DataCategory( + name="test_dc", + fides_key="key_1.test_dc", + description="test description", + parent_key="key_1", + ) + expected_referenced_key = {"key_1", "key_1.test_dc", "default_organization"} + referenced_keys = relationships.find_referenced_fides_keys(test_data_category) + assert referenced_keys == set(expected_referenced_key) + + +@pytest.mark.unit +def test_find_referenced_fides_keys_2(): + test_system = System.construct( + name="test_dc", + fides_key="test_dc", + description="test description", + system_dependencies=["key_1", "key_2"], + system_type="test", + privacy_declarations=None, + ) + expected_referenced_key = {"key_1", "key_2", "test_dc", "default_organization"} + referenced_keys = relationships.find_referenced_fides_keys(test_system) + assert referenced_keys == set(expected_referenced_key) + + +@pytest.mark.unit +def test_get_referenced_missing_keys(): + taxonomy = Taxonomy( + data_category=[ + DataCategory( + name="test_dc", + fides_key="key_1.test_dc", + description="test description", + parent_key="key_1", + ), + DataCategory( + name="test_dc2", + fides_key="key_1.test_dc2", + description="test description", + parent_key="key_1", + ), + ], + system=[ + System.construct( + name="test_system", + fides_key="test_system", + description="test description", + system_dependencies=["key_3", "key_4"], + system_type="test", + privacy_declarations=None, + ) + ], + ) + expected_referenced_key = {"key_1", "key_3", "key_4", "default_organization"} + referenced_keys = relationships.get_referenced_missing_keys(taxonomy) + assert sorted(referenced_keys) == sorted(set(expected_referenced_key)) + + +@pytest.mark.unit +def test_get_referenced_missing_privacy_declaration_keys(): + taxonomy = Taxonomy( + system=[ + System( + fides_key="system_1", + system_type="system_type_1", + privacy_declarations=[ + PrivacyDeclaration( + name="privacy_declaration_1", + data_categories=["privacy_declaration_data_category_1"], + data_use="privacy_declaration_data_use_1", + data_qualifier="privacy_declaration_data_qualifier_1", + data_subjects=["privacy_declaration_data_subject_1"], + dataset_references=["privacy_declaration_data_set_1"], + ) + ], + ) + ] + ) + expected_referenced_key = { + "default_organization", + "privacy_declaration_data_category_1", + "privacy_declaration_data_use_1", + "privacy_declaration_data_qualifier_1", + "privacy_declaration_data_subject_1", + "privacy_declaration_data_set_1", + } + referenced_keys = relationships.get_referenced_missing_keys(taxonomy) + assert sorted(referenced_keys) == sorted(set(expected_referenced_key)) + + +@pytest.mark.unit +def test_get_referenced_missing_policy_keys(): + taxonomy = Taxonomy( + policy=[ + Policy( + fides_key="policy_1", + rules=[ + PolicyRule( + name="policy_rule_1", + data_categories={ + "values": ["policy_rule_data_category_1"], + "matches": MatchesEnum.ANY, + }, + data_uses={ + "values": ["policy_rule_data_use_1"], + "matches": MatchesEnum.ANY, + }, + data_subjects={ + "values": ["policy_rule_data_subject_1"], + "matches": MatchesEnum.ANY, + }, + data_qualifier="policy_rule_data_qualifier_1", + ) + ], + ) + ], + ) + expected_referenced_key = { + "default_organization", + "policy_rule_data_category_1", + "policy_rule_data_use_1", + "policy_rule_data_subject_1", + "policy_rule_data_qualifier_1", + } + referenced_keys = relationships.get_referenced_missing_keys(taxonomy) + assert sorted(referenced_keys) == sorted(set(expected_referenced_key)) + + +@pytest.mark.unit +def test_get_referenced_missing_dataset_keys(): + taxonomy = Taxonomy( + dataset=[ + Dataset( + fides_key="dataset_1", + data_qualifier="dataset_qualifier_1", + data_categories=["dataset_data_category_1"], + collections=[ + DatasetCollection( + name="dataset_collection_1", + data_qualifier="data_collection_data_qualifier_1", + data_categories=["dataset_collection_data_category_1"], + fields=[ + DatasetField( + name="dataset_field_1", + data_categories=["dataset_field_data_category_1"], + data_qualifier="dataset_field_data_qualifier_1", + ) + ], + ) + ], + ) + ], + ) + expected_referenced_key = { + "default_organization", + "dataset_qualifier_1", + "dataset_data_category_1", + "data_collection_data_qualifier_1", + "dataset_collection_data_category_1", + "dataset_field_data_category_1", + "dataset_field_data_qualifier_1", + } + referenced_keys = relationships.get_referenced_missing_keys(taxonomy) + assert sorted(referenced_keys) == sorted(set(expected_referenced_key)) diff --git a/tests/fideslang/test_validation.py b/tests/fideslang/test_validation.py new file mode 100644 index 00000000..3babf4c1 --- /dev/null +++ b/tests/fideslang/test_validation.py @@ -0,0 +1,278 @@ +import pytest +from pydantic import ValidationError + +from fideslang.models import ( + DataCategory, + DataUse, + FidesModel, + Policy, + PolicyRule, + PrivacyDeclaration, + PrivacyRule, + System, +) + +from fideslang.validation import FidesValidationError, check_valid_country_code + + +@pytest.mark.unit +def test_top_level_resource(): + DataCategory( + organization_fides_key=1, + fides_key="user", + name="Custom Test Data", + description="Custom Test Data Category", + ) + assert DataCategory + + +@pytest.mark.unit +def test_fides_key_doesnt_match_stated_parent_key(): + with pytest.raises(FidesValidationError): + DataCategory( + organization_fides_key=1, + fides_key="user.provided.identifiable.custom_test_data", + name="Custom Test Data", + description="Custom Test Data Category", + parent_key="user.derived", + ) + assert DataCategory + + +@pytest.mark.unit +def test_fides_key_matches_stated_parent_key(): + DataCategory( + organization_fides_key=1, + fides_key="user.provided.identifiable.custom_test_data", + name="Custom Test Data", + description="Custom Test Data Category", + parent_key="user.provided.identifiable", + ) + assert DataCategory + + +@pytest.mark.unit +def test_no_parent_key_but_fides_key_contains_parent_key(): + with pytest.raises(FidesValidationError): + DataCategory( + organization_fides_key=1, + fides_key="user.provided.identifiable.custom_test_data", + name="Custom Test Data", + description="Custom Test Data Category", + ) + assert DataCategory + + +@pytest.mark.unit +def test_create_valid_data_category(): + DataCategory( + organization_fides_key=1, + fides_key="user.provided.identifiable.custom_test_data", + name="Custom Test Data", + description="Custom Test Data Category", + parent_key="user.provided.identifiable", + ) + assert DataCategory + + +@pytest.mark.unit +def test_circular_dependency_data_category(): + with pytest.raises(FidesValidationError): + DataCategory( + organization_fides_key=1, + fides_key="user.provided.identifiable", + name="User Provided Identifiable Data", + description="Test Data Category", + parent_key="user.provided.identifiable", + ) + assert True + + +@pytest.mark.unit +def test_create_valid_data_use(): + DataUse( + organization_fides_key=1, + fides_key="provide.system", + name="Provide the Product or Service", + parent_key="provide", + description="Test Data Use", + ) + assert True + + +@pytest.mark.unit +def test_circular_dependency_data_use(): + with pytest.raises(FidesValidationError): + DataUse( + organization_fides_key=1, + fides_key="provide.system", + name="Provide the Product or Service", + description="Test Data Use", + parent_key="provide.system", + ) + assert True + + +@pytest.mark.unit +@pytest.mark.parametrize("fides_key", ["foo_bar", "foo-bar", "foo.bar", "foo_bar_8"]) +def test_fides_model_valid(fides_key: str): + fides_key = FidesModel(fides_key=fides_key, name="Foo Bar") + assert fides_key + + +@pytest.mark.unit +@pytest.mark.parametrize("fides_key", ["foo/bar", "foo%bar", "foo^bar"]) +def test_fides_model_fides_key_invalid(fides_key): + "Check for a bunch of different possible bad characters here." + with pytest.raises(FidesValidationError): + FidesModel(fides_key=fides_key) + + +@pytest.mark.unit +def test_valid_privacy_rule(): + privacy_rule = PrivacyRule(matches="ANY", values=["foo_bar"]) + assert privacy_rule + + +@pytest.mark.unit +def test_invalid_fides_key_privacy_rule(): + with pytest.raises(FidesValidationError): + PrivacyRule(matches="ANY", values=["foo^bar"]) + assert True + + +@pytest.mark.unit +def test_invalid_matches_privacy_rule(): + with pytest.raises(ValidationError): + PrivacyRule(matches="AN", values=["foo_bar"]) + assert True + + +@pytest.mark.unit +def test_valid_policy_rule(): + assert PolicyRule( + organization_fides_key=1, + policyId=1, + fides_key="test_policy", + name="Test Policy", + description="Test Policy", + data_categories=PrivacyRule(matches="NONE", values=[]), + data_uses=PrivacyRule(matches="NONE", values=["provide.system"]), + data_subjects=PrivacyRule(matches="ANY", values=[]), + data_qualifier="aggregated.anonymized.unlinked_pseudonymized.pseudonymized", + ) + + +@pytest.mark.unit +def test_valid_policy(): + Policy( + organization_fides_key=1, + fides_key="test_policy", + name="Test Policy", + version="1.3", + description="Test Policy", + rules=[], + ) + assert True + + +@pytest.mark.unit +def test_create_valid_system(): + System( + organization_fides_key=1, + registryId=1, + fides_key="test_system", + system_type="SYSTEM", + name="Test System", + description="Test Policy", + privacy_declarations=[ + PrivacyDeclaration( + name="declaration-name", + data_categories=[], + data_use="provide.system", + data_subjects=[], + data_qualifier="aggregated_data", + dataset_references=[], + ) + ], + system_dependencies=["another_system", "yet_another_system"], + ) + assert True + + +@pytest.mark.unit +def test_circular_dependency_system(): + with pytest.raises(FidesValidationError): + System( + organization_fides_key=1, + registryId=1, + fides_key="test_system", + system_type="SYSTEM", + name="Test System", + description="Test Policy", + privacy_declarations=[ + PrivacyDeclaration( + name="declaration-name", + data_categories=[], + data_use="provide.system", + data_subjects=[], + data_qualifier="aggregated_data", + dataset_references=["test_system"], + ) + ], + system_dependencies=["test_system"], + ) + assert True + + +@pytest.mark.unit +@pytest.mark.parametrize("country_code", ["United States", "US", "usa"]) +def test_invalid_country_identifier(country_code: str): + "Validate some invalid country identifiers raise an error" + with pytest.raises(FidesValidationError): + System( + organization_fides_key=1, + registryId=1, + fides_key="test_system", + system_type="SYSTEM", + name="Test System", + description="Test Policy", + third_country_transfers=[country_code], + privacy_declarations=[ + PrivacyDeclaration( + name="declaration-name", + data_categories=[], + data_use="provide.system", + data_subjects=[], + data_qualifier="aggregated_data", + dataset_references=["test_system"], + ) + ], + ) + assert True + + +@pytest.mark.unit +@pytest.mark.parametrize("country_code", ["CAN", "USA", "GBR"]) +def test_valid_country_identifier(country_code: str): + "Validates usage of alpha-3 codes per ISO 3166" + System( + organization_fides_key=1, + registryId=1, + fides_key="test_system", + system_type="SYSTEM", + name="Test System", + description="Test Policy", + third_country_transfers=[country_code], + privacy_declarations=[ + PrivacyDeclaration( + name="declaration-name", + data_categories=[], + data_use="provide.system", + data_subjects=[], + data_qualifier="aggregated_data", + dataset_references=["test_system"], + ) + ], + ) + assert True diff --git a/versioneer.py b/versioneer.py new file mode 100644 index 00000000..6a5d1f4a --- /dev/null +++ b/versioneer.py @@ -0,0 +1,1648 @@ +# Version: 0.19 +# pylint: skip-file + +"""The Versioneer - like a rocketeer, but for versions.""" + +import configparser +import errno +import json +import os +import re +import subprocess +import sys + + +class VersioneerConfig: + """Container for Versioneer configuration parameters.""" + + +def get_root(): + """Get the project root directory. + + We require that all commands are run from the project root, i.e. the + directory that contains setup.py, setup.cfg, and versioneer.py . + """ + root = os.path.realpath(os.path.abspath(os.getcwd())) + setup_py = os.path.join(root, "setup.py") + versioneer_py = os.path.join(root, "versioneer.py") + if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): + # allow 'python path/to/setup.py COMMAND' + root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0]))) + setup_py = os.path.join(root, "setup.py") + versioneer_py = os.path.join(root, "versioneer.py") + if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): + err = ( + "Versioneer was unable to run the project root directory. " + "Versioneer requires setup.py to be executed from " + "its immediate directory (like 'python setup.py COMMAND'), " + "or in a way that lets it use sys.argv[0] to find the root " + "(like 'python path/to/setup.py COMMAND')." + ) + raise VersioneerBadRootError(err) + try: + # Certain runtime workflows (setup.py install/develop in a setuptools + # tree) execute all dependencies in a single python process, so + # "versioneer" may be imported multiple times, and python's shared + # module-import table will cache the first one. So we can't use + # os.path.dirname(__file__), as that will find whichever + # versioneer.py was first imported, even in later projects. + me = os.path.realpath(os.path.abspath(__file__)) + me_dir = os.path.normcase(os.path.splitext(me)[0]) + vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0]) + if me_dir != vsr_dir: + print( + "Warning: build in %s is using versioneer.py from %s" + % (os.path.dirname(me), versioneer_py) + ) + except NameError: + pass + return root + + +def get_config_from_root(root): + """Read the project setup.cfg file to determine Versioneer config.""" + # This might raise EnvironmentError (if setup.cfg is missing), or + # configparser.NoSectionError (if it lacks a [versioneer] section), or + # configparser.NoOptionError (if it lacks "VCS="). See the docstring at + # the top of versioneer.py for instructions on writing your setup.cfg . + setup_cfg = os.path.join(root, "setup.cfg") + parser = configparser.ConfigParser() + with open(setup_cfg, "r") as f: + parser.read_file(f) + VCS = parser.get("versioneer", "VCS") # mandatory + + def get(parser, name): + if parser.has_option("versioneer", name): + return parser.get("versioneer", name) + return None + + cfg = VersioneerConfig() + cfg.VCS = VCS + cfg.style = get(parser, "style") or "" + cfg.versionfile_source = get(parser, "versionfile_source") + cfg.versionfile_build = get(parser, "versionfile_build") + cfg.tag_prefix = get(parser, "tag_prefix") + if cfg.tag_prefix in ("''", '""'): + cfg.tag_prefix = "" + cfg.parentdir_prefix = get(parser, "parentdir_prefix") + cfg.verbose = get(parser, "verbose") + return cfg + + +class NotThisMethod(Exception): + """Exception raised if a method is not valid for the current scenario.""" + + +# these dictionaries contain VCS-specific tools +LONG_VERSION_PY = {} +HANDLERS = {} + + +def register_vcs_handler(vcs, method): # decorator + """Create decorator to mark a method as the handler of a VCS.""" + + def decorate(f): + """Store f in HANDLERS[vcs][method].""" + if vcs not in HANDLERS: + HANDLERS[vcs] = {} + HANDLERS[vcs][method] = f + return f + + return decorate + + +def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None): + """Call the given command(s).""" + assert isinstance(commands, list) + p = None + for c in commands: + try: + dispcmd = str([c] + args) + # remember shell=False, so use git.cmd on windows, not just git + p = subprocess.Popen( + [c] + args, + cwd=cwd, + env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr else None), + ) + break + except EnvironmentError: + e = sys.exc_info()[1] + if e.errno == errno.ENOENT: + continue + if verbose: + print("unable to run %s" % dispcmd) + print(e) + return None, None + else: + if verbose: + print("unable to find command, tried %s" % (commands,)) + return None, None + stdout = p.communicate()[0].strip().decode() + if p.returncode != 0: + if verbose: + print("unable to run %s (error)" % dispcmd) + print("stdout was %s" % stdout) + return None, p.returncode + return stdout, p.returncode + + +LONG_VERSION_PY[ + "git" +] = r''' +# This file helps to compute a version number in source trees obtained from +# git-archive tarball (such as those provided by githubs download-from-tag +# feature). Distribution tarballs (built by setup.py sdist) and build +# directories (produced by setup.py build) will contain a much shorter file +# that just contains the computed version number. + +# This file is released into the public domain. Generated by +# versioneer-0.19 (https://github.com/python-versioneer/python-versioneer) + +"""Git implementation of _version.py.""" + +import errno +import os +import re +import subprocess +import sys + + +def get_keywords(): + """Get the keywords needed to look up the version information.""" + # these strings will be replaced by git during git-archive. + # setup.py/versioneer.py will grep for the variable names, so they must + # each be defined on a line of their own. _version.py will just call + # get_keywords(). + git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s" + git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s" + git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s" + keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} + return keywords + + +class VersioneerConfig: + """Container for Versioneer configuration parameters.""" + + +def get_config(): + """Create, populate and return the VersioneerConfig() object.""" + # these strings are filled in when 'setup.py versioneer' creates + # _version.py + cfg = VersioneerConfig() + cfg.VCS = "git" + cfg.style = "%(STYLE)s" + cfg.tag_prefix = "%(TAG_PREFIX)s" + cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s" + cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s" + cfg.verbose = False + return cfg + + +class NotThisMethod(Exception): + """Exception raised if a method is not valid for the current scenario.""" + + +LONG_VERSION_PY = {} +HANDLERS = {} + + +def register_vcs_handler(vcs, method): # decorator + """Create decorator to mark a method as the handler of a VCS.""" + def decorate(f): + """Store f in HANDLERS[vcs][method].""" + if vcs not in HANDLERS: + HANDLERS[vcs] = {} + HANDLERS[vcs][method] = f + return f + return decorate + + +def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, + env=None): + """Call the given command(s).""" + assert isinstance(commands, list) + p = None + for c in commands: + try: + dispcmd = str([c] + args) + # remember shell=False, so use git.cmd on windows, not just git + p = subprocess.Popen([c] + args, cwd=cwd, env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr + else None)) + break + except EnvironmentError: + e = sys.exc_info()[1] + if e.errno == errno.ENOENT: + continue + if verbose: + print("unable to run %%s" %% dispcmd) + print(e) + return None, None + else: + if verbose: + print("unable to find command, tried %%s" %% (commands,)) + return None, None + stdout = p.communicate()[0].strip().decode() + if p.returncode != 0: + if verbose: + print("unable to run %%s (error)" %% dispcmd) + print("stdout was %%s" %% stdout) + return None, p.returncode + return stdout, p.returncode + + +def versions_from_parentdir(parentdir_prefix, root, verbose): + """Try to determine the version from the parent directory name. + + Source tarballs conventionally unpack into a directory that includes both + the project name and a version string. We will also support searching up + two directory levels for an appropriately named parent directory + """ + rootdirs = [] + + for i in range(3): + dirname = os.path.basename(root) + if dirname.startswith(parentdir_prefix): + return {"version": dirname[len(parentdir_prefix):], + "full-revisionid": None, + "dirty": False, "error": None, "date": None} + else: + rootdirs.append(root) + root = os.path.dirname(root) # up a level + + if verbose: + print("Tried directories %%s but none started with prefix %%s" %% + (str(rootdirs), parentdir_prefix)) + raise NotThisMethod("rootdir doesn't start with parentdir_prefix") + + +@register_vcs_handler("git", "get_keywords") +def git_get_keywords(versionfile_abs): + """Extract version information from the given file.""" + # the code embedded in _version.py can just fetch the value of these + # keywords. When used from setup.py, we don't want to import _version.py, + # so we do it with a regexp instead. This function is not used from + # _version.py. + keywords = {} + try: + f = open(versionfile_abs, "r") + for line in f.readlines(): + if line.strip().startswith("git_refnames ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["refnames"] = mo.group(1) + if line.strip().startswith("git_full ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["full"] = mo.group(1) + if line.strip().startswith("git_date ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["date"] = mo.group(1) + f.close() + except EnvironmentError: + pass + return keywords + + +@register_vcs_handler("git", "keywords") +def git_versions_from_keywords(keywords, tag_prefix, verbose): + """Get version information from git keywords.""" + if not keywords: + raise NotThisMethod("no keywords at all, weird") + date = keywords.get("date") + if date is not None: + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] + + # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant + # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601 + # -like" string, which we must then edit to make compliant), because + # it's been around since git-1.5.3, and it's too difficult to + # discover which version we're using, or to work around using an + # older one. + date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + refnames = keywords["refnames"].strip() + if refnames.startswith("$Format"): + if verbose: + print("keywords are unexpanded, not using") + raise NotThisMethod("unexpanded keywords, not a git-archive tarball") + refs = set([r.strip() for r in refnames.strip("()").split(",")]) + # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of + # just "foo-1.0". If we see a "tag: " prefix, prefer those. + TAG = "tag: " + tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) + if not tags: + # Either we're using git < 1.8.3, or there really are no tags. We use + # a heuristic: assume all version tags have a digit. The old git %%d + # expansion behaves like git log --decorate=short and strips out the + # refs/heads/ and refs/tags/ prefixes that would let us distinguish + # between branches and tags. By ignoring refnames without digits, we + # filter out many common branch names like "release" and + # "stabilization", as well as "HEAD" and "master". + tags = set([r for r in refs if re.search(r'\d', r)]) + if verbose: + print("discarding '%%s', no digits" %% ",".join(refs - tags)) + if verbose: + print("likely tags: %%s" %% ",".join(sorted(tags))) + for ref in sorted(tags): + # sorting will prefer e.g. "2.0" over "2.0rc1" + if ref.startswith(tag_prefix): + r = ref[len(tag_prefix):] + if verbose: + print("picking %%s" %% r) + return {"version": r, + "full-revisionid": keywords["full"].strip(), + "dirty": False, "error": None, + "date": date} + # no suitable tags, so version is "0+unknown", but full hex is still there + if verbose: + print("no suitable tags, using unknown + full revision id") + return {"version": "0+unknown", + "full-revisionid": keywords["full"].strip(), + "dirty": False, "error": "no suitable tags", "date": None} + + +@register_vcs_handler("git", "pieces_from_vcs") +def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): + """Get version from 'git describe' in the root of the source tree. + + This only gets called if the git-archive 'subst' keywords were *not* + expanded, and _version.py hasn't already been rewritten with a short + version string, meaning we're inside a checked out source tree. + """ + GITS = ["git"] + if sys.platform == "win32": + GITS = ["git.cmd", "git.exe"] + + out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, + hide_stderr=True) + if rc != 0: + if verbose: + print("Directory %%s not under git control" %% root) + raise NotThisMethod("'git rev-parse --git-dir' returned error") + + # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] + # if there isn't one, this yields HEX[-dirty] (no NUM) + describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", + "--always", "--long", + "--match", "%%s*" %% tag_prefix], + cwd=root) + # --long was added in git-1.5.5 + if describe_out is None: + raise NotThisMethod("'git describe' failed") + describe_out = describe_out.strip() + full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) + if full_out is None: + raise NotThisMethod("'git rev-parse' failed") + full_out = full_out.strip() + + pieces = {} + pieces["long"] = full_out + pieces["short"] = full_out[:7] # maybe improved later + pieces["error"] = None + + # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] + # TAG might have hyphens. + git_describe = describe_out + + # look for -dirty suffix + dirty = git_describe.endswith("-dirty") + pieces["dirty"] = dirty + if dirty: + git_describe = git_describe[:git_describe.rindex("-dirty")] + + # now we have TAG-NUM-gHEX or HEX + + if "-" in git_describe: + # TAG-NUM-gHEX + mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) + if not mo: + # unparseable. Maybe git-describe is misbehaving? + pieces["error"] = ("unable to parse git-describe output: '%%s'" + %% describe_out) + return pieces + + # tag + full_tag = mo.group(1) + if not full_tag.startswith(tag_prefix): + if verbose: + fmt = "tag '%%s' doesn't start with prefix '%%s'" + print(fmt %% (full_tag, tag_prefix)) + pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'" + %% (full_tag, tag_prefix)) + return pieces + pieces["closest-tag"] = full_tag[len(tag_prefix):] + + # distance: number of commits since tag + pieces["distance"] = int(mo.group(2)) + + # commit: short hex revision ID + pieces["short"] = mo.group(3) + + else: + # HEX: no tags + pieces["closest-tag"] = None + count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], + cwd=root) + pieces["distance"] = int(count_out) # total number of commits + + # commit date: see ISO-8601 comment in git_versions_from_keywords() + date = run_command(GITS, ["show", "-s", "--format=%%ci", "HEAD"], + cwd=root)[0].strip() + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] + pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + + return pieces + + +def plus_or_dot(pieces): + """Return a + if we don't already have one, else return a .""" + if "+" in pieces.get("closest-tag", ""): + return "." + return "+" + + +def render_pep440(pieces): + """Build up version string, with post-release "local version identifier". + + Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you + get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty + + Exceptions: + 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += plus_or_dot(pieces) + rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"], + pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_pre(pieces): + """TAG[.post0.devDISTANCE] -- No -dirty. + + Exceptions: + 1: no tags. 0.post0.devDISTANCE + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += ".post0.dev%%d" %% pieces["distance"] + else: + # exception #1 + rendered = "0.post0.dev%%d" %% pieces["distance"] + return rendered + + +def render_pep440_post(pieces): + """TAG[.postDISTANCE[.dev0]+gHEX] . + + The ".dev0" means dirty. Note that .dev0 sorts backwards + (a dirty tree will appear "older" than the corresponding clean one), + but you shouldn't be releasing software with -dirty anyways. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%%d" %% pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%%s" %% pieces["short"] + else: + # exception #1 + rendered = "0.post%%d" %% pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += "+g%%s" %% pieces["short"] + return rendered + + +def render_pep440_old(pieces): + """TAG[.postDISTANCE[.dev0]] . + + The ".dev0" means dirty. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%%d" %% pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + else: + # exception #1 + rendered = "0.post%%d" %% pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + return rendered + + +def render_git_describe(pieces): + """TAG[-DISTANCE-gHEX][-dirty]. + + Like 'git describe --tags --dirty --always'. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render_git_describe_long(pieces): + """TAG-DISTANCE-gHEX[-dirty]. + + Like 'git describe --tags --dirty --always -long'. + The distance/hash is unconditional. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render(pieces, style): + """Render the given version pieces into the requested style.""" + if pieces["error"]: + return {"version": "unknown", + "full-revisionid": pieces.get("long"), + "dirty": None, + "error": pieces["error"], + "date": None} + + if not style or style == "default": + style = "pep440" # the default + + if style == "pep440": + rendered = render_pep440(pieces) + elif style == "pep440-pre": + rendered = render_pep440_pre(pieces) + elif style == "pep440-post": + rendered = render_pep440_post(pieces) + elif style == "pep440-old": + rendered = render_pep440_old(pieces) + elif style == "git-describe": + rendered = render_git_describe(pieces) + elif style == "git-describe-long": + rendered = render_git_describe_long(pieces) + else: + raise ValueError("unknown style '%%s'" %% style) + + return {"version": rendered, "full-revisionid": pieces["long"], + "dirty": pieces["dirty"], "error": None, + "date": pieces.get("date")} + + +def get_versions(): + """Get version information or return default if unable to do so.""" + # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have + # __file__, we can work backwards from there to the root. Some + # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which + # case we can only use expanded keywords. + + cfg = get_config() + verbose = cfg.verbose + + try: + return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, + verbose) + except NotThisMethod: + pass + + try: + root = os.path.realpath(__file__) + # versionfile_source is the relative path from the top of the source + # tree (where the .git directory might live) to this file. Invert + # this to find the root from __file__. + for i in cfg.versionfile_source.split('/'): + root = os.path.dirname(root) + except NameError: + return {"version": "0+unknown", "full-revisionid": None, + "dirty": None, + "error": "unable to find root of source tree", + "date": None} + + try: + pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) + return render(pieces, cfg.style) + except NotThisMethod: + pass + + try: + if cfg.parentdir_prefix: + return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) + except NotThisMethod: + pass + + return {"version": "0+unknown", "full-revisionid": None, + "dirty": None, + "error": "unable to compute version", "date": None} +''' + + +@register_vcs_handler("git", "get_keywords") +def git_get_keywords(versionfile_abs): + """Extract version information from the given file.""" + # the code embedded in _version.py can just fetch the value of these + # keywords. When used from setup.py, we don't want to import _version.py, + # so we do it with a regexp instead. This function is not used from + # _version.py. + keywords = {} + try: + f = open(versionfile_abs, "r") + for line in f.readlines(): + if line.strip().startswith("git_refnames ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["refnames"] = mo.group(1) + if line.strip().startswith("git_full ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["full"] = mo.group(1) + if line.strip().startswith("git_date ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["date"] = mo.group(1) + f.close() + except EnvironmentError: + pass + return keywords + + +@register_vcs_handler("git", "keywords") +def git_versions_from_keywords(keywords, tag_prefix, verbose): + """Get version information from git keywords.""" + if not keywords: + raise NotThisMethod("no keywords at all, weird") + date = keywords.get("date") + if date is not None: + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] + + # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant + # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 + # -like" string, which we must then edit to make compliant), because + # it's been around since git-1.5.3, and it's too difficult to + # discover which version we're using, or to work around using an + # older one. + date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + refnames = keywords["refnames"].strip() + if refnames.startswith("$Format"): + if verbose: + print("keywords are unexpanded, not using") + raise NotThisMethod("unexpanded keywords, not a git-archive tarball") + refs = set([r.strip() for r in refnames.strip("()").split(",")]) + # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of + # just "foo-1.0". If we see a "tag: " prefix, prefer those. + TAG = "tag: " + tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)]) + if not tags: + # Either we're using git < 1.8.3, or there really are no tags. We use + # a heuristic: assume all version tags have a digit. The old git %d + # expansion behaves like git log --decorate=short and strips out the + # refs/heads/ and refs/tags/ prefixes that would let us distinguish + # between branches and tags. By ignoring refnames without digits, we + # filter out many common branch names like "release" and + # "stabilization", as well as "HEAD" and "master". + tags = set([r for r in refs if re.search(r"\d", r)]) + if verbose: + print("discarding '%s', no digits" % ",".join(refs - tags)) + if verbose: + print("likely tags: %s" % ",".join(sorted(tags))) + for ref in sorted(tags): + # sorting will prefer e.g. "2.0" over "2.0rc1" + if ref.startswith(tag_prefix): + r = ref[len(tag_prefix) :] + if verbose: + print("picking %s" % r) + return { + "version": r, + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": None, + "date": date, + } + # no suitable tags, so version is "0+unknown", but full hex is still there + if verbose: + print("no suitable tags, using unknown + full revision id") + return { + "version": "0+unknown", + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": "no suitable tags", + "date": None, + } + + +@register_vcs_handler("git", "pieces_from_vcs") +def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): + """Get version from 'git describe' in the root of the source tree. + + This only gets called if the git-archive 'subst' keywords were *not* + expanded, and _version.py hasn't already been rewritten with a short + version string, meaning we're inside a checked out source tree. + """ + GITS = ["git"] + if sys.platform == "win32": + GITS = ["git.cmd", "git.exe"] + + out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True) + if rc != 0: + if verbose: + print("Directory %s not under git control" % root) + raise NotThisMethod("'git rev-parse --git-dir' returned error") + + # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] + # if there isn't one, this yields HEX[-dirty] (no NUM) + describe_out, rc = run_command( + GITS, + [ + "describe", + "--tags", + "--dirty", + "--always", + "--long", + "--match", + "%s*" % tag_prefix, + ], + cwd=root, + ) + # --long was added in git-1.5.5 + if describe_out is None: + raise NotThisMethod("'git describe' failed") + describe_out = describe_out.strip() + full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) + if full_out is None: + raise NotThisMethod("'git rev-parse' failed") + full_out = full_out.strip() + + pieces = {} + pieces["long"] = full_out + pieces["short"] = full_out[:7] # maybe improved later + pieces["error"] = None + + # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] + # TAG might have hyphens. + git_describe = describe_out + + # look for -dirty suffix + dirty = git_describe.endswith("-dirty") + pieces["dirty"] = dirty + if dirty: + git_describe = git_describe[: git_describe.rindex("-dirty")] + + # now we have TAG-NUM-gHEX or HEX + + if "-" in git_describe: + # TAG-NUM-gHEX + mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe) + if not mo: + # unparseable. Maybe git-describe is misbehaving? + pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out + return pieces + + # tag + full_tag = mo.group(1) + if not full_tag.startswith(tag_prefix): + if verbose: + fmt = "tag '%s' doesn't start with prefix '%s'" + print(fmt % (full_tag, tag_prefix)) + pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( + full_tag, + tag_prefix, + ) + return pieces + pieces["closest-tag"] = full_tag[len(tag_prefix) :] + + # distance: number of commits since tag + pieces["distance"] = int(mo.group(2)) + + # commit: short hex revision ID + pieces["short"] = mo.group(3) + + else: + # HEX: no tags + pieces["closest-tag"] = None + count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root) + pieces["distance"] = int(count_out) # total number of commits + + # commit date: see ISO-8601 comment in git_versions_from_keywords() + date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[ + 0 + ].strip() + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] + pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + + return pieces + + +def do_vcs_install(manifest_in, versionfile_source, ipy): + """Git-specific installation logic for Versioneer. + + For Git, this means creating/changing .gitattributes to mark _version.py + for export-subst keyword substitution. + """ + GITS = ["git"] + if sys.platform == "win32": + GITS = ["git.cmd", "git.exe"] + files = [manifest_in, versionfile_source] + if ipy: + files.append(ipy) + try: + me = __file__ + if me.endswith(".pyc") or me.endswith(".pyo"): + me = os.path.splitext(me)[0] + ".py" + versioneer_file = os.path.relpath(me) + except NameError: + versioneer_file = "versioneer.py" + files.append(versioneer_file) + present = False + try: + f = open(".gitattributes", "r") + for line in f.readlines(): + if line.strip().startswith(versionfile_source): + if "export-subst" in line.strip().split()[1:]: + present = True + f.close() + except EnvironmentError: + pass + if not present: + f = open(".gitattributes", "a+") + f.write("%s export-subst\n" % versionfile_source) + f.close() + files.append(".gitattributes") + run_command(GITS, ["add", "--"] + files) + + +def versions_from_parentdir(parentdir_prefix, root, verbose): + """Try to determine the version from the parent directory name. + + Source tarballs conventionally unpack into a directory that includes both + the project name and a version string. We will also support searching up + two directory levels for an appropriately named parent directory + """ + rootdirs = [] + + for i in range(3): + dirname = os.path.basename(root) + if dirname.startswith(parentdir_prefix): + return { + "version": dirname[len(parentdir_prefix) :], + "full-revisionid": None, + "dirty": False, + "error": None, + "date": None, + } + else: + rootdirs.append(root) + root = os.path.dirname(root) # up a level + + if verbose: + print( + "Tried directories %s but none started with prefix %s" + % (str(rootdirs), parentdir_prefix) + ) + raise NotThisMethod("rootdir doesn't start with parentdir_prefix") + + +SHORT_VERSION_PY = """ +# This file was generated by 'versioneer.py' (0.19) from +# revision-control system data, or from the parent directory name of an +# unpacked source archive. Distribution tarballs contain a pre-generated copy +# of this file. + +import json + +version_json = ''' +%s +''' # END VERSION_JSON + + +def get_versions(): + return json.loads(version_json) +""" + + +def versions_from_file(filename): + """Try to determine the version from _version.py if present.""" + try: + with open(filename) as f: + contents = f.read() + except EnvironmentError: + raise NotThisMethod("unable to read _version.py") + mo = re.search( + r"version_json = '''\n(.*)''' # END VERSION_JSON", contents, re.M | re.S + ) + if not mo: + mo = re.search( + r"version_json = '''\r\n(.*)''' # END VERSION_JSON", contents, re.M | re.S + ) + if not mo: + raise NotThisMethod("no version_json in _version.py") + return json.loads(mo.group(1)) + + +def write_to_version_file(filename, versions): + """Write the given version number to the given _version.py file.""" + os.unlink(filename) + contents = json.dumps(versions, sort_keys=True, indent=1, separators=(",", ": ")) + with open(filename, "w") as f: + f.write(SHORT_VERSION_PY % contents) + + print("set %s to '%s'" % (filename, versions["version"])) + + +def plus_or_dot(pieces): + """Return a + if we don't already have one, else return a .""" + if "+" in pieces.get("closest-tag", ""): + return "." + return "+" + + +def render_pep440(pieces): + """Build up version string, with post-release "local version identifier". + + Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you + get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty + + Exceptions: + 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += plus_or_dot(pieces) + rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_pre(pieces): + """TAG[.post0.devDISTANCE] -- No -dirty. + + Exceptions: + 1: no tags. 0.post0.devDISTANCE + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += ".post0.dev%d" % pieces["distance"] + else: + # exception #1 + rendered = "0.post0.dev%d" % pieces["distance"] + return rendered + + +def render_pep440_post(pieces): + """TAG[.postDISTANCE[.dev0]+gHEX] . + + The ".dev0" means dirty. Note that .dev0 sorts backwards + (a dirty tree will appear "older" than the corresponding clean one), + but you shouldn't be releasing software with -dirty anyways. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%s" % pieces["short"] + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += "+g%s" % pieces["short"] + return rendered + + +def render_pep440_old(pieces): + """TAG[.postDISTANCE[.dev0]] . + + The ".dev0" means dirty. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + return rendered + + +def render_git_describe(pieces): + """TAG[-DISTANCE-gHEX][-dirty]. + + Like 'git describe --tags --dirty --always'. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render_git_describe_long(pieces): + """TAG-DISTANCE-gHEX[-dirty]. + + Like 'git describe --tags --dirty --always -long'. + The distance/hash is unconditional. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render(pieces, style): + """Render the given version pieces into the requested style.""" + if pieces["error"]: + return { + "version": "unknown", + "full-revisionid": pieces.get("long"), + "dirty": None, + "error": pieces["error"], + "date": None, + } + + if not style or style == "default": + style = "pep440" # the default + + if style == "pep440": + rendered = render_pep440(pieces) + elif style == "pep440-pre": + rendered = render_pep440_pre(pieces) + elif style == "pep440-post": + rendered = render_pep440_post(pieces) + elif style == "pep440-old": + rendered = render_pep440_old(pieces) + elif style == "git-describe": + rendered = render_git_describe(pieces) + elif style == "git-describe-long": + rendered = render_git_describe_long(pieces) + else: + raise ValueError("unknown style '%s'" % style) + + return { + "version": rendered, + "full-revisionid": pieces["long"], + "dirty": pieces["dirty"], + "error": None, + "date": pieces.get("date"), + } + + +class VersioneerBadRootError(Exception): + """The project root directory is unknown or missing key files.""" + + +def get_versions(verbose=False): + """Get the project version from whatever source is available. + + Returns dict with two keys: 'version' and 'full'. + """ + if "versioneer" in sys.modules: + # see the discussion in cmdclass.py:get_cmdclass() + del sys.modules["versioneer"] + + root = get_root() + cfg = get_config_from_root(root) + + assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg" + handlers = HANDLERS.get(cfg.VCS) + assert handlers, "unrecognized VCS '%s'" % cfg.VCS + verbose = verbose or cfg.verbose + assert ( + cfg.versionfile_source is not None + ), "please set versioneer.versionfile_source" + assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix" + + versionfile_abs = os.path.join(root, cfg.versionfile_source) + + # extract version from first of: _version.py, VCS command (e.g. 'git + # describe'), parentdir. This is meant to work for developers using a + # source checkout, for users of a tarball created by 'setup.py sdist', + # and for users of a tarball/zipball created by 'git archive' or github's + # download-from-tag feature or the equivalent in other VCSes. + + get_keywords_f = handlers.get("get_keywords") + from_keywords_f = handlers.get("keywords") + if get_keywords_f and from_keywords_f: + try: + keywords = get_keywords_f(versionfile_abs) + ver = from_keywords_f(keywords, cfg.tag_prefix, verbose) + if verbose: + print("got version from expanded keyword %s" % ver) + return ver + except NotThisMethod: + pass + + try: + ver = versions_from_file(versionfile_abs) + if verbose: + print("got version from file %s %s" % (versionfile_abs, ver)) + return ver + except NotThisMethod: + pass + + from_vcs_f = handlers.get("pieces_from_vcs") + if from_vcs_f: + try: + pieces = from_vcs_f(cfg.tag_prefix, root, verbose) + ver = render(pieces, cfg.style) + if verbose: + print("got version from VCS %s" % ver) + return ver + except NotThisMethod: + pass + + try: + if cfg.parentdir_prefix: + ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose) + if verbose: + print("got version from parentdir %s" % ver) + return ver + except NotThisMethod: + pass + + if verbose: + print("unable to compute version") + + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to compute version", + "date": None, + } + + +def get_version(): + """Get the short version string for this project.""" + return get_versions()["version"] + + +def get_cmdclass(cmdclass=None): + """Get the custom setuptools/distutils subclasses used by Versioneer. + + If the package uses a different cmdclass (e.g. one from numpy), it + should be provide as an argument. + """ + if "versioneer" in sys.modules: + del sys.modules["versioneer"] + # this fixes the "python setup.py develop" case (also 'install' and + # 'easy_install .'), in which subdependencies of the main project are + # built (using setup.py bdist_egg) in the same python process. Assume + # a main project A and a dependency B, which use different versions + # of Versioneer. A's setup.py imports A's Versioneer, leaving it in + # sys.modules by the time B's setup.py is executed, causing B to run + # with the wrong versioneer. Setuptools wraps the sub-dep builds in a + # sandbox that restores sys.modules to it's pre-build state, so the + # parent is protected against the child's "import versioneer". By + # removing ourselves from sys.modules here, before the child build + # happens, we protect the child from the parent's versioneer too. + # Also see https://github.com/python-versioneer/python-versioneer/issues/52 + + cmds = {} if cmdclass is None else cmdclass.copy() + + # we add "version" to both distutils and setuptools + from distutils.core import Command + + class cmd_version(Command): + description = "report generated version string" + user_options = [] + boolean_options = [] + + def initialize_options(self): + pass + + def finalize_options(self): + pass + + def run(self): + vers = get_versions(verbose=True) + print("Version: %s" % vers["version"]) + print(" full-revisionid: %s" % vers.get("full-revisionid")) + print(" dirty: %s" % vers.get("dirty")) + print(" date: %s" % vers.get("date")) + if vers["error"]: + print(" error: %s" % vers["error"]) + + cmds["version"] = cmd_version + + # we override "build_py" in both distutils and setuptools + # + # most invocation pathways end up running build_py: + # distutils/build -> build_py + # distutils/install -> distutils/build ->.. + # setuptools/bdist_wheel -> distutils/install ->.. + # setuptools/bdist_egg -> distutils/install_lib -> build_py + # setuptools/install -> bdist_egg ->.. + # setuptools/develop -> ? + # pip install: + # copies source tree to a tempdir before running egg_info/etc + # if .git isn't copied too, 'git describe' will fail + # then does setup.py bdist_wheel, or sometimes setup.py install + # setup.py egg_info -> ? + + # we override different "build_py" commands for both environments + if "build_py" in cmds: + _build_py = cmds["build_py"] + elif "setuptools" in sys.modules: + from setuptools.command.build_py import build_py as _build_py + else: + from distutils.command.build_py import build_py as _build_py + + class cmd_build_py(_build_py): + def run(self): + root = get_root() + cfg = get_config_from_root(root) + versions = get_versions() + _build_py.run(self) + # now locate _version.py in the new build/ directory and replace + # it with an updated value + if cfg.versionfile_build: + target_versionfile = os.path.join(self.build_lib, cfg.versionfile_build) + print("UPDATING %s" % target_versionfile) + write_to_version_file(target_versionfile, versions) + + cmds["build_py"] = cmd_build_py + + if "setuptools" in sys.modules: + from setuptools.command.build_ext import build_ext as _build_ext + else: + from distutils.command.build_ext import build_ext as _build_ext + + class cmd_build_ext(_build_ext): + def run(self): + root = get_root() + cfg = get_config_from_root(root) + versions = get_versions() + _build_ext.run(self) + if self.inplace: + # build_ext --inplace will only build extensions in + # build/lib<..> dir with no _version.py to write to. + # As in place builds will already have a _version.py + # in the module dir, we do not need to write one. + return + # now locate _version.py in the new build/ directory and replace + # it with an updated value + target_versionfile = os.path.join(self.build_lib, cfg.versionfile_source) + print("UPDATING %s" % target_versionfile) + write_to_version_file(target_versionfile, versions) + + cmds["build_ext"] = cmd_build_ext + + if "cx_Freeze" in sys.modules: # cx_freeze enabled? + from cx_Freeze.dist import build_exe as _build_exe + + # nczeczulin reports that py2exe won't like the pep440-style string + # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g. + # setup(console=[{ + # "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION + # "product_version": versioneer.get_version(), + # ... + + class cmd_build_exe(_build_exe): + def run(self): + root = get_root() + cfg = get_config_from_root(root) + versions = get_versions() + target_versionfile = cfg.versionfile_source + print("UPDATING %s" % target_versionfile) + write_to_version_file(target_versionfile, versions) + + _build_exe.run(self) + os.unlink(target_versionfile) + with open(cfg.versionfile_source, "w") as f: + LONG = LONG_VERSION_PY[cfg.VCS] + f.write( + LONG + % { + "DOLLAR": "$", + "STYLE": cfg.style, + "TAG_PREFIX": cfg.tag_prefix, + "PARENTDIR_PREFIX": cfg.parentdir_prefix, + "VERSIONFILE_SOURCE": cfg.versionfile_source, + } + ) + + cmds["build_exe"] = cmd_build_exe + del cmds["build_py"] + + if "py2exe" in sys.modules: # py2exe enabled? + from py2exe.distutils_buildexe import py2exe as _py2exe + + class cmd_py2exe(_py2exe): + def run(self): + root = get_root() + cfg = get_config_from_root(root) + versions = get_versions() + target_versionfile = cfg.versionfile_source + print("UPDATING %s" % target_versionfile) + write_to_version_file(target_versionfile, versions) + + _py2exe.run(self) + os.unlink(target_versionfile) + with open(cfg.versionfile_source, "w") as f: + LONG = LONG_VERSION_PY[cfg.VCS] + f.write( + LONG + % { + "DOLLAR": "$", + "STYLE": cfg.style, + "TAG_PREFIX": cfg.tag_prefix, + "PARENTDIR_PREFIX": cfg.parentdir_prefix, + "VERSIONFILE_SOURCE": cfg.versionfile_source, + } + ) + + cmds["py2exe"] = cmd_py2exe + + # we override different "sdist" commands for both environments + if "sdist" in cmds: + _sdist = cmds["sdist"] + elif "setuptools" in sys.modules: + from setuptools.command.sdist import sdist as _sdist + else: + from distutils.command.sdist import sdist as _sdist + + class cmd_sdist(_sdist): + def run(self): + versions = get_versions() + self._versioneer_generated_versions = versions + # unless we update this, the command will keep using the old + # version + self.distribution.metadata.version = versions["version"] + return _sdist.run(self) + + def make_release_tree(self, base_dir, files): + root = get_root() + cfg = get_config_from_root(root) + _sdist.make_release_tree(self, base_dir, files) + # now locate _version.py in the new base_dir directory + # (remembering that it may be a hardlink) and replace it with an + # updated value + target_versionfile = os.path.join(base_dir, cfg.versionfile_source) + print("UPDATING %s" % target_versionfile) + write_to_version_file( + target_versionfile, self._versioneer_generated_versions + ) + + cmds["sdist"] = cmd_sdist + + return cmds + + +CONFIG_ERROR = """ +setup.cfg is missing the necessary Versioneer configuration. You need +a section like: + + [versioneer] + VCS = git + style = pep440 + versionfile_source = src/myproject/_version.py + versionfile_build = myproject/_version.py + tag_prefix = + parentdir_prefix = myproject- + +You will also need to edit your setup.py to use the results: + + import versioneer + setup(version=versioneer.get_version(), + cmdclass=versioneer.get_cmdclass(), ...) + +Please read the docstring in ./versioneer.py for configuration instructions, +edit setup.cfg, and re-run the installer or 'python versioneer.py setup'. +""" + +SAMPLE_CONFIG = """ +# See the docstring in versioneer.py for instructions. Note that you must +# re-run 'versioneer.py setup' after changing this section, and commit the +# resulting files. + +[versioneer] +#VCS = git +#style = pep440 +#versionfile_source = +#versionfile_build = +#tag_prefix = +#parentdir_prefix = + +""" + +INIT_PY_SNIPPET = """ +from ._version import get_versions +__version__ = get_versions()['version'] +del get_versions +""" + + +def do_setup(): + """Do main VCS-independent setup function for installing Versioneer.""" + root = get_root() + try: + cfg = get_config_from_root(root) + except ( + EnvironmentError, + configparser.NoSectionError, + configparser.NoOptionError, + ) as e: + if isinstance(e, (EnvironmentError, configparser.NoSectionError)): + print("Adding sample versioneer config to setup.cfg", file=sys.stderr) + with open(os.path.join(root, "setup.cfg"), "a") as f: + f.write(SAMPLE_CONFIG) + print(CONFIG_ERROR, file=sys.stderr) + return 1 + + print(" creating %s" % cfg.versionfile_source) + with open(cfg.versionfile_source, "w") as f: + LONG = LONG_VERSION_PY[cfg.VCS] + f.write( + LONG + % { + "DOLLAR": "$", + "STYLE": cfg.style, + "TAG_PREFIX": cfg.tag_prefix, + "PARENTDIR_PREFIX": cfg.parentdir_prefix, + "VERSIONFILE_SOURCE": cfg.versionfile_source, + } + ) + + ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py") + if os.path.exists(ipy): + try: + with open(ipy, "r") as f: + old = f.read() + except EnvironmentError: + old = "" + if INIT_PY_SNIPPET not in old: + print(" appending to %s" % ipy) + with open(ipy, "a") as f: + f.write(INIT_PY_SNIPPET) + else: + print(" %s unmodified" % ipy) + else: + print(" %s doesn't exist, ok" % ipy) + ipy = None + + # Make sure both the top-level "versioneer.py" and versionfile_source + # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so + # they'll be copied into source distributions. Pip won't be able to + # install the package without this. + manifest_in = os.path.join(root, "MANIFEST.in") + simple_includes = set() + try: + with open(manifest_in, "r") as f: + for line in f: + if line.startswith("include "): + for include in line.split()[1:]: + simple_includes.add(include) + except EnvironmentError: + pass + # That doesn't cover everything MANIFEST.in can do + # (http://docs.python.org/2/distutils/sourcedist.html#commands), so + # it might give some false negatives. Appending redundant 'include' + # lines is safe, though. + if "versioneer.py" not in simple_includes: + print(" appending 'versioneer.py' to MANIFEST.in") + with open(manifest_in, "a") as f: + f.write("include versioneer.py\n") + else: + print(" 'versioneer.py' already in MANIFEST.in") + if cfg.versionfile_source not in simple_includes: + print( + " appending versionfile_source ('%s') to MANIFEST.in" + % cfg.versionfile_source + ) + with open(manifest_in, "a") as f: + f.write("include %s\n" % cfg.versionfile_source) + else: + print(" versionfile_source already in MANIFEST.in") + + # Make VCS-specific changes. For git, this means creating/changing + # .gitattributes to mark _version.py for export-subst keyword + # substitution. + do_vcs_install(manifest_in, cfg.versionfile_source, ipy) + return 0 + + +def scan_setup_py(): + """Validate the contents of setup.py against Versioneer's expectations.""" + found = set() + setters = False + errors = 0 + with open("setup.py", "r") as f: + for line in f.readlines(): + if "import versioneer" in line: + found.add("import") + if "versioneer.get_cmdclass()" in line: + found.add("cmdclass") + if "versioneer.get_version()" in line: + found.add("get_version") + if "versioneer.VCS" in line: + setters = True + if "versioneer.versionfile_source" in line: + setters = True + if len(found) != 3: + print("") + print("Your setup.py appears to be missing some important items") + print("(but I might be wrong). Please make sure it has something") + print("roughly like the following:") + print("") + print(" import versioneer") + print(" setup( version=versioneer.get_version(),") + print(" cmdclass=versioneer.get_cmdclass(), ...)") + print("") + errors += 1 + if setters: + print("You should remove lines like 'versioneer.VCS = ' and") + print("'versioneer.versionfile_source = ' . This configuration") + print("now lives in setup.cfg, and should be removed from setup.py") + print("") + errors += 1 + return errors + + +if __name__ == "__main__": + cmd = sys.argv[1] + if cmd == "setup": + errors = do_setup() + errors += scan_setup_py() + if errors: + sys.exit(1)