diff --git a/backend/Dockerfile b/backend/Dockerfile index 9ad7543e..fe161b75 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -49,5 +49,7 @@ COPY parser /parser USER off:off COPY --chown=off:off ./backend/editor /code/editor +COPY --chown=off:off ./backend/sample /code/sample +RUN find /code/sample -type f -name '*.py' -exec chmod +x {} \; CMD ["uvicorn", "editor.api:app", "--host", "0.0.0.0", "--port", "80"] \ No newline at end of file diff --git a/backend/editor/api.py b/backend/editor/api.py index ad1c6f92..aa66a3e7 100644 --- a/backend/editor/api.py +++ b/backend/editor/api.py @@ -398,7 +398,6 @@ async def upload_taxonomy( """ Upload taxonomy file to be parsed """ - # use the file name as the taxonomy name taxonomy = TaxonomyGraph(branch, taxonomy_name) if not taxonomy.is_valid_branch_name(): raise HTTPException(status_code=422, detail="branch_name: Enter a valid branch name!") diff --git a/backend/editor/entries.py b/backend/editor/entries.py index 8a188a4d..5a39fce9 100644 --- a/backend/editor/entries.py +++ b/backend/editor/entries.py @@ -12,7 +12,7 @@ from .exceptions import GithubBranchExistsError # Custom exceptions from .exceptions import ( GithubUploadError, - TaxnonomyImportError, + TaxonomyImportError, TaxonomyParsingError, TaxonomyUnparsingError, ) @@ -124,7 +124,7 @@ async def import_from_github(self, description): return status except Exception as e: - raise TaxnonomyImportError() from e + raise TaxonomyImportError() from e async def upload_taxonomy(self, filepath, description): """ @@ -136,7 +136,7 @@ async def upload_taxonomy(self, filepath, description): await self.create_project(description) return status except Exception as e: - raise TaxnonomyImportError() from e + raise TaxonomyImportError() from e def dump_taxonomy(self): """ @@ -614,11 +614,11 @@ async def delete_taxonomy_project(self, branch, taxonomy_name): """ delete_query = """ - MATCH (n:PROJECT {taxonomy_name: $taxonomy_name, branch: $branch}) + MATCH (n:PROJECT {taxonomy_name: $taxonomy_name, branch_name: $branch_name}) DELETE n """ result = await get_current_transaction().run( - delete_query, taxonomy_name=taxonomy_name, branch=branch + delete_query, taxonomy_name=taxonomy_name, branch_name=branch ) summary = await result.consume() count = summary.counters.nodes_deleted diff --git a/backend/editor/exceptions.py b/backend/editor/exceptions.py index e3f7c453..92a73387 100644 --- a/backend/editor/exceptions.py +++ b/backend/editor/exceptions.py @@ -23,7 +23,7 @@ def __init__(self): return super().__init__(exception_message) -class TaxnonomyImportError(RuntimeError): +class TaxonomyImportError(RuntimeError): """ Raised when attempting to fetch a taxonomy from GitHub """ diff --git a/backend/editor/github_functions.py b/backend/editor/github_functions.py index 4c0e683e..430c7975 100644 --- a/backend/editor/github_functions.py +++ b/backend/editor/github_functions.py @@ -3,8 +3,8 @@ """ from textwrap import dedent +import github from fastapi import HTTPException -from github import Github, GithubException from . import settings @@ -33,7 +33,7 @@ def init_driver_and_repo(self): raise HTTPException( status_code=400, detail="repo_uri is not set. Please add your access token in .env" ) - github_driver = Github(access_token) + github_driver = github.Github(access_token) repo = github_driver.get_repo(repo_uri) return repo @@ -72,7 +72,7 @@ def update_file(self, filename): current_file.sha, branch=self.branch_name, ) - except GithubException as e: + except github.GithubException as e: # Handle GitHub API-related exceptions raise Exception(f"GitHub API error: {e}") from e except FileNotFoundError as e: diff --git a/backend/poetry.lock b/backend/poetry.lock index acbfaa8a..3490c686 100644 --- a/backend/poetry.lock +++ b/backend/poetry.lock @@ -1,10 +1,9 @@ -# This file is automatically @generated by Poetry and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.0 and should not be changed by hand. [[package]] name = "annotated-types" version = "0.6.0" description = "Reusable constraint types to use with typing.Annotated" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -16,7 +15,6 @@ files = [ name = "anyio" version = "3.7.1" description = "High level compatibility layer for multiple asynchronous event loop implementations" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -37,7 +35,6 @@ trio = ["trio (<0.22)"] name = "black" version = "23.10.1" description = "The uncompromising code formatter." -category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -78,7 +75,6 @@ uvloop = ["uvloop (>=0.15.2)"] name = "certifi" version = "2023.7.22" description = "Python package for providing Mozilla's CA Bundle." -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -90,7 +86,6 @@ files = [ name = "cffi" version = "1.16.0" description = "Foreign Function Interface for Python calling C code." -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -155,7 +150,6 @@ pycparser = "*" name = "charset-normalizer" version = "2.1.1" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." -category = "main" optional = false python-versions = ">=3.6.0" files = [ @@ -170,7 +164,6 @@ unicode-backport = ["unicodedata2"] name = "click" version = "8.1.7" description = "Composable command line interface toolkit" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -185,7 +178,6 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""} name = "colorama" version = "0.4.6" description = "Cross-platform colored terminal text." -category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" files = [ @@ -197,7 +189,6 @@ files = [ name = "cryptography" version = "41.0.5" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -243,7 +234,6 @@ test-randomorder = ["pytest-randomly"] name = "deprecated" version = "1.2.14" description = "Python @deprecated decorator to deprecate old python classes, functions or methods." -category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -261,7 +251,6 @@ dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "sphinx (<2)", "tox"] name = "dnspython" version = "2.4.2" description = "DNS toolkit" -category = "main" optional = false python-versions = ">=3.8,<4.0" files = [ @@ -281,7 +270,6 @@ wmi = ["wmi (>=1.5.1,<2.0.0)"] name = "email-validator" version = "2.1.0.post1" description = "A robust email address syntax and deliverability validation library." -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -297,7 +285,6 @@ idna = ">=2.0.0" name = "fastapi" version = "0.104.1" description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -329,7 +316,6 @@ all = ["email-validator (>=2.0.0)", "httpx (>=0.23.0)", "itsdangerous (>=1.1.0)" name = "flake8" version = "6.1.0" description = "the modular source code checker: pep8 pyflakes and co" -category = "dev" optional = false python-versions = ">=3.8.1" files = [ @@ -346,7 +332,6 @@ pyflakes = ">=3.1.0,<3.2.0" name = "h11" version = "0.14.0" description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -358,7 +343,6 @@ files = [ name = "httpcore" version = "1.0.1" description = "A minimal low-level HTTP client." -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -373,14 +357,13 @@ h11 = ">=0.13,<0.15" [package.extras] asyncio = ["anyio (>=4.0,<5.0)"] http2 = ["h2 (>=3,<5)"] -socks = ["socksio (>=1.0.0,<2.0.0)"] +socks = ["socksio (==1.*)"] trio = ["trio (>=0.22.0,<0.23.0)"] [[package]] name = "httptools" version = "0.6.1" description = "A collection of framework independent HTTP protocol utils." -category = "main" optional = false python-versions = ">=3.8.0" files = [ @@ -429,7 +412,6 @@ test = ["Cython (>=0.29.24,<0.30.0)"] name = "httpx" version = "0.25.1" description = "The next generation HTTP client." -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -446,15 +428,14 @@ sniffio = "*" [package.extras] brotli = ["brotli", "brotlicffi"] -cli = ["click (>=8.0.0,<9.0.0)", "pygments (>=2.0.0,<3.0.0)", "rich (>=10,<14)"] +cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] http2 = ["h2 (>=3,<5)"] -socks = ["socksio (>=1.0.0,<2.0.0)"] +socks = ["socksio (==1.*)"] [[package]] name = "idna" version = "3.4" description = "Internationalized Domain Names in Applications (IDNA)" -category = "main" optional = false python-versions = ">=3.5" files = [ @@ -466,7 +447,6 @@ files = [ name = "iniconfig" version = "2.0.0" description = "brain-dead simple config-ini parsing" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -478,7 +458,6 @@ files = [ name = "iso-639" version = "0.4.5" description = "Python library for ISO 639 standard" -category = "main" optional = false python-versions = "*" files = [ @@ -489,7 +468,6 @@ files = [ name = "isort" version = "5.12.0" description = "A Python utility / library to sort Python imports." -category = "dev" optional = false python-versions = ">=3.8.0" files = [ @@ -507,7 +485,6 @@ requirements-deprecated-finder = ["pip-api", "pipreqs"] name = "itsdangerous" version = "2.1.2" description = "Safely pass data to untrusted environments and back." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -519,7 +496,6 @@ files = [ name = "jinja2" version = "3.1.2" description = "A very fast and expressive template engine." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -537,7 +513,6 @@ i18n = ["Babel (>=2.7)"] name = "markupsafe" version = "2.1.3" description = "Safely add untrusted strings to HTML/XML markup." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -607,7 +582,6 @@ files = [ name = "mccabe" version = "0.7.0" description = "McCabe checker, plugin for flake8" -category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -619,7 +593,6 @@ files = [ name = "mypy-extensions" version = "1.0.0" description = "Type system extensions for programs checked with the mypy type checker." -category = "dev" optional = false python-versions = ">=3.5" files = [ @@ -631,7 +604,6 @@ files = [ name = "neo4j" version = "5.14.0" description = "Neo4j Bolt driver for Python" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -650,7 +622,6 @@ pyarrow = ["pyarrow (>=1.0.0)"] name = "openfoodfacts-taxonomy-parser" version = "0.1.0" description = "Taxonomy Parser written in Python for Open Food Facts" -category = "main" optional = false python-versions = "^3.11" files = [] @@ -668,7 +639,6 @@ url = "../parser" name = "orjson" version = "3.9.10" description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -728,7 +698,6 @@ files = [ name = "packaging" version = "23.2" description = "Core utilities for Python packages" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -740,7 +709,6 @@ files = [ name = "pathspec" version = "0.11.2" description = "Utility library for gitignore style pattern matching of file paths." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -752,7 +720,6 @@ files = [ name = "platformdirs" version = "3.11.0" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -768,7 +735,6 @@ test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4)", "pytest-co name = "pluggy" version = "1.3.0" description = "plugin and hook calling mechanisms for python" -category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -784,7 +750,6 @@ testing = ["pytest", "pytest-benchmark"] name = "pycodestyle" version = "2.11.1" description = "Python style guide checker" -category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -796,7 +761,6 @@ files = [ name = "pycparser" version = "2.21" description = "C parser in Python" -category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -808,7 +772,6 @@ files = [ name = "pydantic" version = "2.4.2" description = "Data validation using Python type hints" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -828,7 +791,6 @@ email = ["email-validator (>=2.0.0)"] name = "pydantic-core" version = "2.10.1" description = "" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -947,7 +909,6 @@ typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" name = "pydantic-extra-types" version = "2.1.0" description = "Extra Pydantic types." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -965,7 +926,6 @@ all = ["phonenumbers (>=8,<9)", "pycountry (>=22,<23)"] name = "pydantic-settings" version = "2.0.3" description = "Settings management using Pydantic" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -981,7 +941,6 @@ python-dotenv = ">=0.21.0" name = "pyflakes" version = "3.1.0" description = "passive checker of Python programs" -category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -993,7 +952,6 @@ files = [ name = "pygithub" version = "2.1.1" description = "Use the full Github API v3" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1014,7 +972,6 @@ urllib3 = ">=1.26.0" name = "pyjwt" version = "2.8.0" description = "JSON Web Token implementation in Python" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1035,7 +992,6 @@ tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"] name = "pynacl" version = "1.5.0" description = "Python binding to the Networking and Cryptography (NaCl) library" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1062,7 +1018,6 @@ tests = ["hypothesis (>=3.27.0)", "pytest (>=3.2.1,!=3.3.0)"] name = "pytest" version = "7.4.3" description = "pytest: simple powerful testing with Python" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1079,11 +1034,27 @@ pluggy = ">=0.12,<2.0" [package.extras] testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] +[[package]] +name = "pytest-mock" +version = "3.12.0" +description = "Thin-wrapper around the mock package for easier use with pytest" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pytest-mock-3.12.0.tar.gz", hash = "sha256:31a40f038c22cad32287bb43932054451ff5583ff094bca6f675df2f8bc1a6e9"}, + {file = "pytest_mock-3.12.0-py3-none-any.whl", hash = "sha256:0972719a7263072da3a21c7f4773069bcc7486027d7e8e1f81d98a47e701bc4f"}, +] + +[package.dependencies] +pytest = ">=5.0" + +[package.extras] +dev = ["pre-commit", "pytest-asyncio", "tox"] + [[package]] name = "python-dateutil" version = "2.8.2" description = "Extensions to the standard Python datetime module" -category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" files = [ @@ -1098,7 +1069,6 @@ six = ">=1.5" name = "python-dotenv" version = "1.0.0" description = "Read key-value pairs from a .env file and set them as environment variables" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1113,7 +1083,6 @@ cli = ["click (>=5.0)"] name = "python-multipart" version = "0.0.6" description = "A streaming multipart parser for Python" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1128,7 +1097,6 @@ dev = ["atomicwrites (==1.2.1)", "attrs (==19.2.0)", "coverage (==6.5.0)", "hatc name = "pytz" version = "2023.3.post1" description = "World timezone definitions, modern and historical" -category = "main" optional = false python-versions = "*" files = [ @@ -1140,7 +1108,6 @@ files = [ name = "pyyaml" version = "6.0.1" description = "YAML parser and emitter for Python" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1200,7 +1167,6 @@ files = [ name = "requests" version = "2.31.0" description = "Python HTTP for Humans." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1222,7 +1188,6 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] name = "six" version = "1.16.0" description = "Python 2 and 3 compatibility utilities" -category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -1234,7 +1199,6 @@ files = [ name = "sniffio" version = "1.3.0" description = "Sniff out which async library your code is running under" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1246,7 +1210,6 @@ files = [ name = "starlette" version = "0.27.0" description = "The little ASGI library that shines." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1264,7 +1227,6 @@ full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart", "pyyam name = "typing-extensions" version = "4.8.0" description = "Backported and Experimental Type Hints for Python 3.8+" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1276,7 +1238,6 @@ files = [ name = "ujson" version = "5.8.0" description = "Ultra fast JSON encoder and decoder for Python" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1347,7 +1308,6 @@ files = [ name = "unidecode" version = "1.3.7" description = "ASCII transliterations of Unicode text" -category = "main" optional = false python-versions = ">=3.5" files = [ @@ -1359,7 +1319,6 @@ files = [ name = "urllib3" version = "2.0.7" description = "HTTP library with thread-safe connection pooling, file post, and more." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1377,7 +1336,6 @@ zstd = ["zstandard (>=0.18.0)"] name = "uvicorn" version = "0.23.2" description = "The lightning-fast ASGI server." -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1392,7 +1350,7 @@ h11 = ">=0.8" httptools = {version = ">=0.5.0", optional = true, markers = "extra == \"standard\""} python-dotenv = {version = ">=0.13", optional = true, markers = "extra == \"standard\""} pyyaml = {version = ">=5.1", optional = true, markers = "extra == \"standard\""} -uvloop = {version = ">=0.14.0,<0.15.0 || >0.15.0,<0.15.1 || >0.15.1", optional = true, markers = "sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\" and extra == \"standard\""} +uvloop = {version = ">=0.14.0,<0.15.0 || >0.15.0,<0.15.1 || >0.15.1", optional = true, markers = "(sys_platform != \"win32\" and sys_platform != \"cygwin\") and platform_python_implementation != \"PyPy\" and extra == \"standard\""} watchfiles = {version = ">=0.13", optional = true, markers = "extra == \"standard\""} websockets = {version = ">=10.4", optional = true, markers = "extra == \"standard\""} @@ -1403,7 +1361,6 @@ standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", name = "uvloop" version = "0.19.0" description = "Fast implementation of asyncio event loop on top of libuv" -category = "main" optional = false python-versions = ">=3.8.0" files = [ @@ -1448,7 +1405,6 @@ test = ["Cython (>=0.29.36,<0.30.0)", "aiohttp (==3.9.0b0)", "aiohttp (>=3.8.1)" name = "watchfiles" version = "0.21.0" description = "Simple, modern and high performance file watching and code reload in python." -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1536,7 +1492,6 @@ anyio = ">=3.0.0" name = "websockets" version = "12.0" description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1618,7 +1573,6 @@ files = [ name = "wrapt" version = "1.15.0" description = "Module for decorators, wrappers and monkey patching." -category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" files = [ @@ -1702,4 +1656,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "98a067f3a04edd1a9b285d0620f9207e654362b654d7550bed19f9ef69555541" +content-hash = "073ba8588ee04a10fd7f714cf685374776581f3efa7c7a3f442065a6fdea3865" diff --git a/backend/pyproject.toml b/backend/pyproject.toml index ea51bd66..1514c903 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -62,3 +62,5 @@ isort = "^5.12.0" [tool.poetry.group.test.dependencies] pytest = "^7.4.3" +pytest-mock = "^3.12.0" +httpx = "^0.25.1" diff --git a/backend/sample/dump.py b/backend/sample/dump.py new file mode 100644 index 00000000..0b7b8e7d --- /dev/null +++ b/backend/sample/dump.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +"""A script to dump a Neo4J database to a JSON file.""" +import argparse +import json +import os + +from neo4j import GraphDatabase + +DEFAULT_URL = os.environ.get("NEO4J_URI", "bolt://localhost:7687") + + +def get_session(uri=DEFAULT_URL): + """Get a session object for the Neo4J database.""" + return GraphDatabase.driver(uri).session() + + +def dump_nodes(session, file): + """Dump all nodes from the database to a JSON file.""" + node_count = session.run("MATCH (n) RETURN count(n)").single()[0] + for i, node in enumerate(session.run("MATCH (n) RETURN n")): + node_dict = dict(node["n"]) + labels_list = list(node["n"].labels) + node_dict["labels"] = labels_list + if i < node_count - 1: + file.write(json.dumps(node_dict, ensure_ascii=False, default=str) + ",") + else: + file.write(json.dumps(node_dict, ensure_ascii=False, default=str)) + + +def dump_relations(session, file): + """Dump all relationships from the database to a JSON file.""" + rels_count = session.run("MATCH (n)-[r]->(m) RETURN count(r)").single()[0] + for i, rel in enumerate(session.run("MATCH (n)-[r]->(m) RETURN r")): + start_node_id = rel["r"].nodes[0].id + end_node_id = rel["r"].nodes[1].id + start_node = session.run( + "MATCH (n) WHERE id(n) = $id RETURN n", {"id": start_node_id} + ).single()["n"]["id"] + end_node = session.run( + "MATCH (n) WHERE id(n) = $id RETURN n", {"id": end_node_id} + ).single()["n"]["id"] + rel_dict = {rel["r"].type: [start_node, end_node]} + if i < rels_count - 1: + file.write(json.dumps(rel_dict, ensure_ascii=False) + ",") + else: + file.write(json.dumps(rel_dict, ensure_ascii=False)) + + +def get_options(args=None): + """Parse command line arguments.""" + parser = argparse.ArgumentParser(description="Dump Neo4J database to JSON file") + parser.add_argument("--url", default=DEFAULT_URL, help="Neo4J database bolt URL") + parser.add_argument("file", help="JSON file name to dump") + return parser.parse_args(args) + + +if __name__ == "__main__": + options = get_options() + session = get_session(options.url) + with open(options.file, "w") as f: + f.write('{"nodes": [') + dump_nodes(session, f) + f.write('], "relations": [') + dump_relations(session, f) + f.write("]}") diff --git a/backend/sample/dumped-test-taxonomy.json b/backend/sample/dumped-test-taxonomy.json new file mode 100644 index 00000000..bc54cfbb --- /dev/null +++ b/backend/sample/dumped-test-taxonomy.json @@ -0,0 +1,501 @@ +{ + "nodes": [ + { + "preceding_lines": ["# test taxonomy"], + "src_position": 1, + "id": "__header__", + "labels": ["p_test_testbranch", "b_testbranch", "t_test", "TEXT"] + }, + { + "is_before": "__header__", + "preceding_lines": [], + "src_position": 3, + "tags_fr": ["aux", "au", "de", "le", "du", "la", "a", "et"], + "id": "stopwords:0", + "labels": ["p_test_testbranch", "b_testbranch", "t_test", "STOPWORDS"] + }, + { + "tags_en": ["passion fruit", "passionfruit"], + "is_before": "stopwords:0", + "preceding_lines": [], + "src_position": 5, + "id": "synonyms:0", + "tags_ids_en": ["passion-fruit", "passionfruit"], + "labels": ["p_test_testbranch", "b_testbranch", "t_test", "SYNONYMS"] + }, + { + "tags_ids_fr": ["fruit-passion", "fruits-passion", "maracuja", "passion"], + "is_before": "synonyms:0", + "preceding_lines": [""], + "src_position": 7, + "tags_fr": [ + "fruit de la passion", + "fruits de la passion", + "maracuja", + "passion" + ], + "id": "synonyms:1", + "labels": ["p_test_testbranch", "b_testbranch", "t_test", "SYNONYMS"] + }, + { + "tags_ids_fr": ["yaourts", "yoghourts", "yogourts"], + "prop_color_en": " white", + "preceding_lines": [], + "src_position": 9, + "tags_fr": ["yaourts", "yoghourts", "yogourts"], + "tags_ids_en": ["yogurts", "yoghurts"], + "tags_ids_nl": ["yoghurts"], + "main_language": "en", + "tags_en": ["yogurts", "yoghurts"], + "is_before": "synonyms:1", + "prop_flavour_en": " undef", + "prop_description_en": " a yogurts of whatever type", + "prop_description_fr": " un yaourt de n'importe quel type", + "id": "en:yogurts", + "tags_nl": ["yoghurts"], + "labels": ["p_test_testbranch", "b_testbranch", "t_test", "ENTRY"] + }, + { + "tags_ids_fr": ["yaourts-banane"], + "prop_color_en": " yellow", + "preceding_lines": [], + "src_position": 17, + "tags_fr": ["yaourts à la banane"], + "tags_ids_en": ["banana-yogurts"], + "tags_ids_nl": ["bananenyoghurt"], + "main_language": "en", + "tags_en": ["banana yogurts"], + "is_before": "en:yogurts", + "prop_flavour_en": " banana", + "prop_description_en": " a banana yogurt", + "prop_description_fr": " un yaourt à la banane", + "id": "en:banana-yogurts", + "tags_nl": ["bananenyoghurt"], + "parents": ["en:yogurts"], + "labels": ["p_test_testbranch", "b_testbranch", "t_test", "ENTRY"] + }, + { + "tags_ids_fr": ["yaourts-fruit-passion"], + "prop_color_en": " undef", + "preceding_lines": [], + "src_position": 26, + "tags_fr": ["yaourts au fruit de la passion"], + "tags_ids_en": ["passion-fruit-yogurts"], + "tags_ids_nl": ["yoghurts-met-passievrucht"], + "main_language": "en", + "tags_en": ["Passion fruit yogurts"], + "is_before": "en:banana-yogurts", + "prop_flavour_en": " passion fruit", + "id": "en:passion-fruit-yogurts", + "tags_nl": ["yoghurts met passievrucht"], + "parents": ["en:yogurts"], + "labels": ["p_test_testbranch", "b_testbranch", "t_test", "ENTRY"] + }, + { + "tags_ids_fr": ["yaourts-alleges"], + "main_language": "fr", + "is_before": "en:passion-fruit-yogurts", + "preceding_lines": [], + "src_position": 33, + "tags_fr": ["yaourts allégés"], + "id": "fr:yaourts-alleges", + "parents": ["en:yogurts"], + "labels": ["p_test_testbranch", "b_testbranch", "t_test", "ENTRY"] + }, + { + "tags_ids_fr": ["yaourts-citron"], + "prop_color_en": " yellow", + "preceding_lines": [], + "src_position": 36, + "tags_fr": ["yaourts au citron"], + "tags_ids_en": ["lemon-yogurts"], + "main_language": "en", + "tags_ids_nl": ["yoghurts-met-citroen"], + "tags_en": ["lemon yogurts"], + "is_before": "fr:yaourts-alleges", + "prop_flavour_en": " lemon", + "prop_description_en": " a yogurts with lemon inside", + "prop_description_fr": " un yaourt avec du citron", + "id": "en:lemon-yogurts", + "tags_nl": ["yoghurts met citroen"], + "parents": ["fr:yoghourts"], + "labels": ["p_test_testbranch", "b_testbranch", "t_test", "ENTRY"] + }, + { + "tags_ids_fr": ["yaourts-fruit-passion-alleges"], + "main_language": "fr", + "tags_ids_nl": ["magere-yoghurts-met-passievrucht"], + "is_before": "en:lemon-yogurts", + "preceding_lines": [], + "src_position": 45, + "tags_fr": ["yaourts au fruit de la passion allégés"], + "id": "fr:yaourts-fruit-passion-alleges", + "tags_nl": ["magere yoghurts met passievrucht"], + "parents": ["fr:yaourts-fruit-passion", "fr:yaourts-alleges"], + "labels": ["p_test_testbranch", "b_testbranch", "t_test", "ENTRY"] + }, + { + "tags_ids_fr": ["yaourts-citron-alleges"], + "main_language": "fr", + "tags_ids_nl": ["magere-citroenyoghurt"], + "is_before": "fr:yaourts-fruit-passion-alleges", + "preceding_lines": [""], + "src_position": 51, + "tags_fr": ["yaourts au citron allégés"], + "prop_description_en": " for light yogurts with lemon", + "id": "fr:yaourts-citron-alleges", + "tags_nl": ["magere citroenyoghurt"], + "parents": ["fr:yaourts-citron", "fr:yaourts-alleges"], + "labels": ["p_test_testbranch", "b_testbranch", "t_test", "ENTRY"] + }, + { + "tags_ids_fr": ["yaourts-myrtille"], + "main_language": "fr", + "tags_ids_nl": ["bosbessenyoghurt"], + "is_before": "fr:yaourts-citron-alleges", + "preceding_lines": [], + "src_position": 57, + "prop_flavour_en": " blueberry", + "tags_fr": ["yaourts à la myrtille"], + "id": "fr:yaourts-myrtille", + "prop_flavour_fr": " myrtille", + "tags_nl": ["bosbessenyoghurt"], + "parents": ["fr:yaourt"], + "labels": ["p_test_testbranch", "b_testbranch", "t_test", "ENTRY"] + }, + { + "main_language": "en", + "prop_vegan_en": "no", + "tags_en": ["meat"], + "is_before": "fr:yaourts-myrtille", + "preceding_lines": [], + "src_position": 63, + "id": "en:meat", + "tags_ids_en": ["meat"], + "prop_carbon_footprint_fr_foodges_value_fr": "10", + "labels": ["p_test_testbranch", "b_testbranch", "t_test", "ENTRY"] + }, + { + "main_language": "en", + "tags_en": ["beef"], + "is_before": "en:meat", + "preceding_lines": [], + "src_position": 67, + "id": "en:beef", + "tags_ids_en": ["beef"], + "parents": ["en:meat"], + "prop_carbon_footprint_fr_foodges_value_fr": "15", + "labels": ["p_test_testbranch", "b_testbranch", "t_test", "ENTRY"] + }, + { + "main_language": "en", + "tags_en": ["roast-beef"], + "is_before": "en:beef", + "preceding_lines": [], + "src_position": 71, + "id": "en:roast-beef", + "tags_ids_en": ["roast-beef"], + "parents": ["en:beef"], + "labels": ["p_test_testbranch", "b_testbranch", "t_test", "ENTRY"] + }, + { + "main_language": "en", + "prop_vegan_en": "yes", + "tags_en": ["fake-meat"], + "is_before": "en:roast-beef", + "preceding_lines": [ + "# undef will stop parents from transmitting a value" + ], + "src_position": 74, + "id": "en:fake-meat", + "tags_ids_en": ["fake-meat"], + "parents": ["en:meat"], + "prop_carbon_footprint_fr_foodges_value_fr": "undef", + "labels": ["p_test_testbranch", "b_testbranch", "t_test", "ENTRY"] + }, + { + "main_language": "en", + "tags_en": ["fake-stuff"], + "preceding_lines": [], + "is_before": "en:fake-meat", + "src_position": 80, + "id": "en:fake-stuff", + "tags_ids_en": ["fake-stuff"], + "labels": ["p_test_testbranch", "b_testbranch", "t_test", "ENTRY"] + }, + { + "main_language": "en", + "tags_en": ["fake-duck-meat"], + "is_before": "en:fake-stuff", + "preceding_lines": [], + "src_position": 82, + "id": "en:fake-duck-meat", + "tags_ids_en": ["fake-duck-meat"], + "parents": ["en:fake-stuff", "en:fake-meat"], + "labels": ["p_test_testbranch", "b_testbranch", "t_test", "ENTRY"] + }, + { + "prop_vegan_en": "yes", + "main_language": "en", + "tags_en": ["vegetable"], + "is_before": "en:fake-duck-meat", + "preceding_lines": [], + "src_position": 86, + "id": "en:vegetable", + "tags_ids_en": ["vegetable"], + "labels": ["p_test_testbranch", "b_testbranch", "t_test", "ENTRY"] + }, + { + "main_language": "en", + "prop_vegan_en": "maybe", + "tags_ids_xx": [ + "something-that-means-soup-in-every-language", + "something-else-that-means-soup-in-every-language" + ], + "tags_en": ["soup"], + "is_before": "en:vegetable", + "preceding_lines": [ + "# the soup yogourt synonym is used to test suggestions matching xx: synonyms" + ], + "src_position": 90, + "id": "en:soup", + "tags_xx": [ + "something that means soup in every language", + "something else that means soup in every language" + ], + "tags_ids_en": ["soup"], + "labels": ["p_test_testbranch", "b_testbranch", "t_test", "ENTRY"] + }, + { + "main_language": "en", + "prop_vegan_en": "yes", + "tags_en": ["vegan-soup"], + "is_before": "en:soup", + "preceding_lines": [], + "src_position": 94, + "id": "en:vegan-soup", + "tags_ids_en": ["vegan-soup"], + "parents": ["en:soup"], + "labels": ["p_test_testbranch", "b_testbranch", "t_test", "ENTRY"] + }, + { + "main_language": "en", + "prop_vegan_en": "no", + "tags_en": ["fish-soup"], + "is_before": "en:vegan-soup", + "preceding_lines": [], + "src_position": 98, + "id": "en:fish-soup", + "tags_ids_en": ["fish-soup"], + "parents": ["en:soup"], + "labels": ["p_test_testbranch", "b_testbranch", "t_test", "ENTRY"] + }, + { + "main_language": "de", + "preceding_lines": [], + "is_before": "en:fish-soup", + "tags_ids_de": ["spätzle"], + "src_position": 102, + "id": "de:spätzle", + "tags_de": ["Spätzle"], + "labels": ["p_test_testbranch", "b_testbranch", "t_test", "ENTRY"] + }, + { + "main_language": "en", + "tags_en": ["Kale"], + "is_before": "de:spätzle", + "tags_ids_de": ["grünkohl"], + "preceding_lines": [], + "src_position": 104, + "id": "en:kale", + "tags_de": ["Grünkohl"], + "tags_ids_en": ["kale"], + "labels": ["p_test_testbranch", "b_testbranch", "t_test", "ENTRY"] + }, + { + "tags_ids_fr": ["kefir-2-5"], + "preceding_lines": [], + "src_position": 107, + "tags_fr": ["Kéfir 2‚5 %"], + "tags_ids_ru": ["кефир-2.5"], + "tags_ids_en": ["kefir-2.5"], + "main_language": "en", + "tags_ru": ["Кефир 2.5 %", "Кефир 2.5%"], + "tags_en": ["Kefir 2.5 %"], + "is_before": "en:kale", + "tags_ids_de": ["kefir-2.5"], + "id": "en:kefir-2.5", + "tags_de": ["Kefir 2.5 %"], + "labels": ["p_test_testbranch", "b_testbranch", "t_test", "ENTRY"] + }, + { + "tags_ids_fr": ["french-entry"], + "main_language": "fr", + "is_before": "en:kefir-2.5", + "tags_ids_de": ["special-value-for-german"], + "preceding_lines": [], + "src_position": 112, + "tags_fr": ["French entry"], + "id": "fr:french-entry", + "tags_de": ["Special value for German"], + "labels": ["p_test_testbranch", "b_testbranch", "t_test", "ENTRY"] + }, + { + "tags_ids_fr": ["french-entry-with-default-value"], + "main_language": "fr", + "tags_ids_xx": ["french-entry-with-default-value"], + "is_before": "fr:french-entry", + "tags_ids_de": ["special-value-for-german-2"], + "preceding_lines": [], + "src_position": 115, + "tags_fr": ["French entry with default value"], + "id": "fr:french-entry-with-default-value", + "tags_xx": ["French entry with default value"], + "tags_de": ["Special value for German 2"], + "labels": ["p_test_testbranch", "b_testbranch", "t_test", "ENTRY"] + }, + { + "main_language": "xx", + "tags_ids_xx": ["language-less-entry"], + "is_before": "fr:french-entry-with-default-value", + "preceding_lines": [], + "tags_ids_de": ["special-value-for-german-3"], + "src_position": 119, + "id": "xx:language-less-entry", + "tags_xx": ["Language-less entry"], + "tags_de": ["Special value for German 3"], + "labels": ["p_test_testbranch", "b_testbranch", "t_test", "ENTRY"] + }, + { + "main_language": "sv", + "tags_sv": ["Ä-märket"], + "tags_ids_xx": ["ä-märket"], + "is_before": "xx:language-less-entry", + "preceding_lines": [ + "# xx: entry with accents, need to match unaccented version" + ], + "src_position": 123, + "tags_ids_sv": ["ä-märket"], + "id": "sv:ä-märket", + "tags_xx": ["Ä-märket"], + "labels": ["p_test_testbranch", "b_testbranch", "t_test", "ENTRY"] + }, + { + "main_language": "en", + "tags_en": [ + "Entry with (parentheses) and some *!#{}@$ characters", + "synonym with *%@$(]% chars" + ], + "is_before": "sv:ä-märket", + "preceding_lines": [], + "src_position": 126, + "id": "en:entry-with-parentheses-and-some-characters", + "tags_ids_en": [ + "entry-with-parentheses-and-some-characters", + "synonym-with-chars" + ], + "labels": ["p_test_testbranch", "b_testbranch", "t_test", "ENTRY"] + }, + { + "is_before": "en:entry-with-parentheses-and-some-characters", + "preceding_lines": [], + "src_position": 126, + "id": "__footer__", + "labels": ["p_test_testbranch", "b_testbranch", "t_test", "TEXT"] + }, + { + "warnings": [ + "parent not found for child fr:yaourts-myrtille with parent yaourt" + ], + "branch_name": "testbranch", + "created_at": "2023-11-16T00:36:22.663000000+00:00", + "id": "p_test_testbranch", + "taxonomy_name": "test", + "errors": [], + "labels": ["p_test_testbranch", "b_testbranch", "t_test", "ERRORS"] + }, + { + "branch_name": "testbranch", + "description": "just a test branch", + "created_at": "2023-11-16T00:36:22.686000000+00:00", + "id": "p_test_testbranch", + "taxonomy_name": "test", + "status": "OPEN", + "labels": ["PROJECT"] + } + ], + "relations": [ + { "is_child_of": ["en:banana-yogurts", "en:yogurts"] }, + { "is_child_of": ["en:passion-fruit-yogurts", "en:yogurts"] }, + { "is_child_of": ["fr:yaourts-alleges", "en:yogurts"] }, + { "is_child_of": ["en:lemon-yogurts", "en:yogurts"] }, + { + "is_child_of": [ + "fr:yaourts-fruit-passion-alleges", + "en:passion-fruit-yogurts" + ] + }, + { + "is_child_of": ["fr:yaourts-fruit-passion-alleges", "fr:yaourts-alleges"] + }, + { "is_child_of": ["fr:yaourts-citron-alleges", "en:lemon-yogurts"] }, + { "is_child_of": ["fr:yaourts-citron-alleges", "fr:yaourts-alleges"] }, + { "is_child_of": ["en:beef", "en:meat"] }, + { "is_child_of": ["en:roast-beef", "en:beef"] }, + { "is_child_of": ["en:fake-meat", "en:meat"] }, + { "is_child_of": ["en:fake-duck-meat", "en:fake-stuff"] }, + { "is_child_of": ["en:fake-duck-meat", "en:fake-meat"] }, + { "is_child_of": ["en:vegan-soup", "en:soup"] }, + { "is_child_of": ["en:fish-soup", "en:soup"] }, + { "is_before": ["__header__", "stopwords:0"] }, + { "is_before": ["stopwords:0", "synonyms:0"] }, + { "is_before": ["synonyms:0", "synonyms:1"] }, + { "is_before": ["synonyms:1", "en:yogurts"] }, + { "is_before": ["en:yogurts", "en:banana-yogurts"] }, + { "is_before": ["en:banana-yogurts", "en:passion-fruit-yogurts"] }, + { "is_before": ["en:passion-fruit-yogurts", "fr:yaourts-alleges"] }, + { "is_before": ["fr:yaourts-alleges", "en:lemon-yogurts"] }, + { "is_before": ["en:lemon-yogurts", "fr:yaourts-fruit-passion-alleges"] }, + { + "is_before": [ + "fr:yaourts-fruit-passion-alleges", + "fr:yaourts-citron-alleges" + ] + }, + { "is_before": ["fr:yaourts-citron-alleges", "fr:yaourts-myrtille"] }, + { "is_before": ["fr:yaourts-myrtille", "en:meat"] }, + { "is_before": ["en:meat", "en:beef"] }, + { "is_before": ["en:beef", "en:roast-beef"] }, + { "is_before": ["en:roast-beef", "en:fake-meat"] }, + { "is_before": ["en:fake-meat", "en:fake-stuff"] }, + { "is_before": ["en:fake-stuff", "en:fake-duck-meat"] }, + { "is_before": ["en:fake-duck-meat", "en:vegetable"] }, + { "is_before": ["en:vegetable", "en:soup"] }, + { "is_before": ["en:soup", "en:vegan-soup"] }, + { "is_before": ["en:vegan-soup", "en:fish-soup"] }, + { "is_before": ["en:fish-soup", "de:spätzle"] }, + { "is_before": ["de:spätzle", "en:kale"] }, + { "is_before": ["en:kale", "en:kefir-2.5"] }, + { "is_before": ["en:kefir-2.5", "fr:french-entry"] }, + { "is_before": ["fr:french-entry", "fr:french-entry-with-default-value"] }, + { + "is_before": [ + "fr:french-entry-with-default-value", + "xx:language-less-entry" + ] + }, + { "is_before": ["xx:language-less-entry", "sv:ä-märket"] }, + { + "is_before": [ + "sv:ä-märket", + "en:entry-with-parentheses-and-some-characters" + ] + }, + { + "is_before": [ + "en:entry-with-parentheses-and-some-characters", + "__footer__" + ] + } + ] +} diff --git a/backend/sample/load.py b/backend/sample/load.py index fe2c3ec6..3f4063c1 100644 --- a/backend/sample/load.py +++ b/backend/sample/load.py @@ -3,29 +3,40 @@ """ import argparse import json +import os import sys +from datetime import datetime from neo4j import GraphDatabase -DEFAULT_URL = "bolt://localhost:7687" +DEFAULT_URL = os.environ.get("NEO4J_URI", "bolt://localhost:7687") def get_session(uri=DEFAULT_URL): return GraphDatabase.driver(uri).session() + def clean_db(session): session.run("""match (p) detach delete(p)""") + def add_node(node, session): labels = node.pop("labels", []) - query = f"CREATE (n:{','.join(labels)} $data)" + if "created_at" in node: + # Truncate the microseconds to six digits to match the format string + stringified_datetime = node["created_at"][:26] + node["created_at"][29:] + node["created_at"] = datetime.strptime(stringified_datetime, "%Y-%m-%dT%H:%M:%S.%f%z") + query = f"CREATE (n:{':'.join(labels)} $data)" session.run(query, data=node) + def add_link(rel, session): if len(rel) != 1 and len(next(iter(rel.values()))) != 2: - raise ValueError(f""" + raise ValueError( + f""" Expecting relations to by dict like {{"rel_name": ["node1", "node2"]}}, got {rel} - """.trim()) + """.trim() + ) for rel_name, (from_id, to_id) in rel.items(): query = f""" MATCH(source) WHERE source.id = $from_id @@ -46,15 +57,14 @@ def load_jsonl(file_path, session): def get_options(args=None): - parser = argparse.ArgumentParser(description='Import json file to Neo4J database') - parser.add_argument('--url', default=DEFAULT_URL, help='Neo4J database bolt URL') - parser.add_argument('file', help='Json file to import') + parser = argparse.ArgumentParser(description="Import json file to Neo4J database") + parser.add_argument("--url", default=DEFAULT_URL, help="Neo4J database bolt URL") + parser.add_argument("file", help="Json file to import") parser.add_argument( - '--reset', default=False, action="store_true", - help='Clean all database before importing' + "--reset", default=False, action="store_true", help="Clean all database before importing" ) - parser.add_argument('--yes', default=False, action="store_true", - help='Assume yes to all questions' + parser.add_argument( + "--yes", default=False, action="store_true", help="Assume yes to all questions" ) return parser.parse_args(args) @@ -66,7 +76,7 @@ def confirm_clean_db(session): response = input(f"You are about to remove {num_nodes} nodes, are you sure ? [y/N]: ") return response.lower() in ("y", "yes") - + if __name__ == "__main__": options = get_options() session = get_session(options.url) @@ -76,4 +86,4 @@ def confirm_clean_db(session): sys.exit(1) else: clean_db(session) - load_jsonl(options.file, session) \ No newline at end of file + load_jsonl(options.file, session) diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py index 7addc9e4..5519b224 100644 --- a/backend/tests/conftest.py +++ b/backend/tests/conftest.py @@ -16,17 +16,17 @@ def client(): @pytest.fixture -def neo4j(): +def neo4j(scope="session"): """waiting for neo4j to be ready""" uri = os.environ.get("NEO4J_URI", "bolt://localhost:7687") - driver = GraphDatabase.driver(uri) - session = driver.session() - connected = False - while not connected: - try: - session.run("MATCH () return 1 limit 1") - except ServiceUnavailable: - time.sleep(1) - else: - connected = True - return driver + with GraphDatabase.driver(uri) as driver: + with driver.session() as session: + connected = False + while not connected: + try: + session.run("MATCH () return 1 limit 1") + except ServiceUnavailable: + time.sleep(1) + else: + connected = True + yield driver diff --git a/backend/tests/data/test.txt b/backend/tests/data/test.txt new file mode 100644 index 00000000..30f45d5b --- /dev/null +++ b/backend/tests/data/test.txt @@ -0,0 +1,126 @@ +# test taxonomy + +stopwords:fr: aux,au,de,le,du,la,a,et + +synonyms:en:passion fruit, passionfruit + +synonyms:fr:fruit de la passion, fruits de la passion, maracuja, passion + +en:yogurts, yoghurts +fr:yaourts, yoghourts, yogourts +nl:yoghurts +description:en: a yogurts of whatever type +description:fr: un yaourt de n'importe quel type +color:en: white +flavour:en: undef + +