Skip to content

Commit

Permalink
Correcting tests and adjusting code for toml-based shared DTO object
Browse files Browse the repository at this point in the history
  • Loading branch information
ArturOle committed Nov 13, 2024
1 parent 3c77d22 commit b61a921
Show file tree
Hide file tree
Showing 15 changed files with 129 additions and 130 deletions.
File renamed without changes.
5 changes: 0 additions & 5 deletions config.ini

This file was deleted.

15 changes: 15 additions & 0 deletions config.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@

[settings]
db_engine = "neo4j"

[OCR]
TESSERACT_PATH = "/usr/bin/tesseract"
POPPLER_PATH = "/usr/bin/pdftotext"

[neo4j]
NEO4J_URI = "neo4j://database:7687"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "StrongPsPsP5"

[qdrant]
in_memory = true
6 changes: 3 additions & 3 deletions docker/build_dev/dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM ubuntu:22.04
FROM ubuntu:24.04

ENV DEBIAN_FRONTEND=noninteractive
ENV POETRY_HOME="/root/.poetry"
Expand All @@ -7,9 +7,9 @@ ENV PATH="$POETRY_HOME/bin:$PATH"
CMD ["bash"]

RUN apt-get update
RUN apt-get install -y python3 python3-pip curl
RUN apt-get install -y python3 pipx curl
RUN curl -sSL https://install.python-poetry.org | python3 -

WORKDIR /ragger
COPY . /ragger
RUN make install
RUN poetry install
7 changes: 3 additions & 4 deletions docker/integration_tests/dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM ubuntu:22.04
FROM ubuntu:24.04

ENV DEBIAN_FRONTEND=noninteractive
ENV POETRY_HOME="/root/.poetry"
Expand All @@ -7,7 +7,7 @@ ENV PATH="$POETRY_HOME/bin:$PATH"
CMD ["bash"]

RUN apt-get update --fix-missing
RUN apt-get install -y python3 python3-pip git ca-certificates lsb-release ubuntu-keyring software-properties-common curl
RUN apt-get install -y python3 pipx git ca-certificates lsb-release ubuntu-keyring software-properties-common curl
RUN curl -sSL https://install.python-poetry.org | python3 -

RUN update-ca-certificates --fresh
Expand All @@ -17,9 +17,8 @@ RUN apt install -y libtesseract-dev
RUN apt install -y poppler-utils

WORKDIR /ragger
RUN pip install --upgrade pip
COPY . /ragger
RUN touch logs.log
RUN make install
RUN poetry install

RUN export GIT_PYTHON_GIT_EXECUTABLE=$(which git)
7 changes: 3 additions & 4 deletions docker/unit_tests/dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM ubuntu:22.04
FROM ubuntu:24.04

ENV DEBIAN_FRONTEND=noninteractive
ENV POETRY_HOME="/root/.poetry"
Expand All @@ -7,17 +7,16 @@ ENV PATH="$POETRY_HOME/bin:$PATH"
CMD ["bash"]

RUN apt-get update
RUN apt-get install -y python3 python3-pip git curl
RUN apt-get install -y python3 pipx git curl
RUN curl -sSL https://install.python-poetry.org | python3 -

RUN apt install -y tesseract-ocr
RUN apt install -y libtesseract-dev
RUN apt install -y poppler-utils

WORKDIR /ragger
RUN pip install --upgrade pip
COPY . /ragger
RUN touch logs.log
RUN make install
RUN poetry install

RUN export GIT_PYTHON_GIT_EXECUTABLE=$(which git)
10 changes: 5 additions & 5 deletions src/context_search/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@
from .data_manager import DataManager
from .reader import ReadManager
from .preprocessor import Preprocessor
from .communicator import Communicator
from .utils import setup_logger, config_variables
from .communicator import CommAdapterNeo
from .utils import setup_logger, EnvInterface

__all__ = [
'ContextSearch',
'DataManager',
'ReadManager',
'Preprocessor',
'Communicator',
'CommAdapterNeo',
'setup_logger',
'config_variables'
]
'EnvInterface'
]
4 changes: 2 additions & 2 deletions src/context_search/communicator/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@

from .communicator import Communicator
from .communicator import CommAdapterNeo

__all__ = ["Communicator"]
__all__ = ["CommAdapterNeo"]
38 changes: 19 additions & 19 deletions src/context_search/communicator/communicator.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from neo4j import GraphDatabase
from qdrant_client import QdrantClient
from abc import ABC, abstractmethod

from ..utils import setup_logger
from ..data_classes import LiteratureGraph
Expand All @@ -9,13 +9,6 @@
logger = setup_logger("Communicator Logger", "logs.log")


class DatabaseNotSupportedError(BaseException):
def __init__(self, db) -> None:
super().__init__(
""
)


class AbstractCommAdapter(ABC):

@abstractmethod
Expand All @@ -28,6 +21,15 @@ def connection():
pass


class DatabaseNotSupported(BaseException, AbstractCommAdapter):
def __init__(self, *args, **kwargs) -> None:
super(BaseException).__init__(
"The requested database is not supported.\n"
"Supported:\n"
" {Neo4j, Qdrant}"
)


class CommAdapterNeo(AbstractCommAdapter):
"""Communicator class for interacting with the Neo4j database.
Expand Down Expand Up @@ -82,7 +84,7 @@ def add_literature_subgraph(
session,
literature_graph: LiteratureGraph
):
session.write_transaction(
session.execute_write(
self._add_literature_subgraph,
literature_graph
)
Expand All @@ -92,7 +94,7 @@ def create_vector_indexes(self, session):
"""Creates vector indexes for chunks and tags.
This function is separated from the add_literature_subgraph
because the indexes cannot be created in the same transaction"""
session.write_transaction(self._index_ebeddables)
session.execute_write(self._index_ebeddables)

def _add_literature_subgraph(self, tx, literature_graph: LiteratureGraph):
"""Builds the the nodes and relationships based on the given
Expand All @@ -117,37 +119,37 @@ def _index_ebeddables(self, tx):

@connection
def get_literature(self, session, filename):
return session.read_transaction(QueryBuilder._get_literature, filename)
return session.execute_read(QueryBuilder._get_literature, filename)

@connection
def get_literature_chunks(self, session, filename):
return session.read_transaction(
return session.execute_read(
QueryBuilder._get_literature_chunks,
filename
)

@connection
def get_literature_tags(self, session, filename):
return session.read_transaction(
return session.execute_read(
QueryBuilder._get_literature_tags,
filename
)

@connection
def search_n_records(self, session, query, n):
return session.read_transaction(
return session.execute_read(
QueryBuilder._search_n_records,
query,
n
)

@connection
def get_all_literatures(self, session):
return session.read_transaction(QueryBuilder._get_all_literatures)
return session.execute_read(QueryBuilder._get_all_literatures)

@connection
def delete_literature(self, session, filename):
session.write_transaction(QueryBuilder._delete_literature, filename)
session.execute_write(QueryBuilder._delete_literature, filename)

def __del__(self):
if self._driver is not None:
Expand Down Expand Up @@ -210,7 +212,5 @@ class DatabaseManager:
def __init__(self, adapter: str):
self.database_adapter = self.supported_db.get(
adapter,
DatabaseNotSupportedError
DatabaseNotSupported
)

def
24 changes: 12 additions & 12 deletions src/context_search/data_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@

from .reader import ReadManager
from .preprocessor import Preprocessor
from .communicator import Communicator
from .utils import setup_logger, config_variables

from .communicator import CommAdapterNeo
from .utils import setup_logger, EnvInterface

logger = setup_logger('Data Manager Logger', 'logs.log')

Expand All @@ -15,23 +14,24 @@ class DataManager:
_communicator = None

def __init__(self):
EnvInterface().set_env_variables_from_config()
self.read_manager = ReadManager()
self.preprocessor = Preprocessor()

@property
def communicator(self):
if self._communicator is None:
neo4j_variables = config_variables.get_neo4j_variables()
self._communicator = Communicator(
uri=neo4j_variables[0],
user=neo4j_variables[1],
password=neo4j_variables[2]
neo4j_variables = EnvInterface.get_neo4j_vars()
self._communicator = CommAdapterNeo(
uri=neo4j_variables["NEO4J_URI"],
user=neo4j_variables["NEO4J_USER"],
password=neo4j_variables["NEO4J_PASSWORD"]
)
logger.info(f"""
logger.debug(f"""
Communicator created with:
uri: {neo4j_variables[0]}
user: {neo4j_variables[1]}
password: {neo4j_variables[2]}
uri: {neo4j_variables["NEO4J_URI"]}
user: {neo4j_variables["NEO4J_USER"]}
password: {neo4j_variables["NEO4J_PASSWORD"]}
""")
return self._communicator

Expand Down
19 changes: 7 additions & 12 deletions src/context_search/reader/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from typing import List

from ..data_classes import LiteratureDTO
from ..utils import setup_logger, config_variables
from ..utils import setup_logger, EnvInterface

current_directory = os.path.dirname(__file__)

Expand Down Expand Up @@ -106,18 +106,13 @@ def _setup_paths_from_config(self):
directory as the script that is being run with paths to tesseract
and poppler bin folder (NOT TO EXECUTABLES, BUT FOLDERS).
"""
if not os.getenv("POPPLER_PATH") or not os.getenv("TESSERACT_PATH"):
self.tesseract_path, self.poppler_path = config_variables.get_OCR_variables()
self.tesseract_path = os.getenv("TESSERACT_PATH")
self.poppler_path = os.getenv("POPPLER_PATH")

if os.getenv("POPPLER_PATH"):
self.poppler_path = os.getenv("POPPLER_PATH")
else:
os.environ["POPPLER_PATH"] = self.poppler_path

if os.getenv("TESSERACT_PATH"):
self.tesseract_path = os.getenv("TESSERACT_PATH")
else:
os.environ["TESSERACT_PATH"] = self.tesseract_path
if not self.tesseract_path or not self.poppler_path:
ocr_vars = EnvInterface.get_OCR_vars()
self.tesseract_path = ocr_vars.get("TESSERACT_PATH")
self.poppler_path = ocr_vars.get("POPPLER_PATH")

if os.name == "nt":
pytesseract.pytesseract.tesseract_cmd = os.path.join(
Expand Down
8 changes: 2 additions & 6 deletions src/context_search/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,9 @@
from .config_variables import (
get_OCR_variables,
get_neo4j_variables,
set_env_variables_from_config
EnvInterface
)
from .logger_setup import setup_logger

__all__ = [
"get_OCR_variables",
"get_neo4j_variables",
"set_env_variables_from_config",
"EnvInterface",
"setup_logger"
]
Loading

0 comments on commit b61a921

Please sign in to comment.