Skip to content

Commit

Permalink
Poetry build (#18)
Browse files Browse the repository at this point in the history
* Introduction of poetry to build process

* Correction of dockerfiles

* Deletion of requirements directory
  • Loading branch information
ArturOle authored Oct 22, 2024
1 parent 8b517a1 commit 0669432
Show file tree
Hide file tree
Showing 10 changed files with 3,139 additions and 45 deletions.
9 changes: 5 additions & 4 deletions docker/build_dev/dockerfile
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
FROM ubuntu:22.04

ENV DEBIAN_FRONTEND=noninteractive
ENV POETRY_HOME="/root/.poetry"
ENV PATH="$POETRY_HOME/bin:$PATH"

CMD ["bash"]

RUN apt-get update
RUN apt-get install -y python3 python3-pip
RUN pip3 install torch --index-url https://download.pytorch.org/whl/cpu
RUN apt-get install -y python3 python3-pip curl
RUN curl -sSL https://install.python-poetry.org | python3 -

COPY ./requirements /ragger/requirements
WORKDIR /ragger
RUN pip install -r requirements/test.txt
COPY . /ragger
RUN make install
2 changes: 1 addition & 1 deletion docker/integration_tests/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,5 @@ services:
environment:
- SSL_CERT_DIR=/etc/ssl/certs
command: >
bash -c "python3 -m pytest ./test/integration_tests || exit 1"
bash -c "poetry run pytest ./test/integration_tests || exit 1"
&& docker-compose down
11 changes: 6 additions & 5 deletions docker/integration_tests/dockerfile
Original file line number Diff line number Diff line change
@@ -1,24 +1,25 @@
FROM ubuntu:22.04

ENV DEBIAN_FRONTEND=noninteractive
ENV POETRY_HOME="/root/.poetry"
ENV PATH="$POETRY_HOME/bin:$PATH"

CMD ["bash"]

RUN apt-get update --fix-missing
RUN apt-get install -y python3 python3-pip git ca-certificates lsb-release ubuntu-keyring software-properties-common
RUN apt-get install -y python3 python3-pip git ca-certificates lsb-release ubuntu-keyring software-properties-common curl
RUN curl -sSL https://install.python-poetry.org | python3 -

RUN update-ca-certificates --fresh
RUN export SSL_CERT_DIR=/etc/ssl/certs
RUN apt install -y tesseract-ocr
RUN apt install -y libtesseract-dev
RUN apt install -y poppler-utils

COPY ./requirements /ragger/requirements
WORKDIR /ragger
RUN pip install --upgrade pip
RUN pip install -r requirements/test.txt
RUN python3 -m spacy download en_core_web_sm
COPY . /ragger
RUN touch logs.log
RUN pip install .
RUN make install

RUN export GIT_PYTHON_GIT_EXECUTABLE=$(which git)
2 changes: 1 addition & 1 deletion docker/unit_tests/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@ services:
dockerfile: docker/unit_tests/dockerfile
container_name: ragger
command: >
bash -c "python3 -m pytest ./test/unit_tests || exit 1"
bash -c "poetry run pytest ./test/unit_tests || exit 1"
&& docker-compose down
12 changes: 6 additions & 6 deletions docker/unit_tests/dockerfile
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
FROM ubuntu:22.04

ENV DEBIAN_FRONTEND=noninteractive
ENV POETRY_HOME="/root/.poetry"
ENV PATH="$POETRY_HOME/bin:$PATH"

CMD ["bash"]

RUN apt-get update
RUN apt-get install -y python3 python3-pip git
RUN pip3 install torch --index-url https://download.pytorch.org/whl/cpu
RUN apt-get install -y python3 python3-pip git curl
RUN curl -sSL https://install.python-poetry.org | python3 -

RUN apt install -y tesseract-ocr
RUN apt install -y libtesseract-dev
RUN apt install -y poppler-utils

COPY ./requirements /ragger/requirements
WORKDIR /ragger
RUN pip install --upgrade pip
RUN pip install -r requirements/test.txt
RUN python3 -m spacy download en_core_web_sm
COPY . /ragger
RUN touch logs.log
RUN pip install .
RUN make install

RUN export GIT_PYTHON_GIT_EXECUTABLE=$(which git)
40 changes: 40 additions & 0 deletions makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Use Poetry for dependency management and running tasks

# Target to install dependencies
install:
poetry install

# Target to update dependencies
update:
poetry update

# Target to export requirements.txt for compatibility
requirements:
poetry export -f requirements.txt --output requirements/base.txt --without-hashes

# Target to run linting
lint:
poetry run flake8 .

# Target to run the application
poetry run python src/main.py

# Clean up cache files, etc.
clean:
rm -rf .pytest_cache .mypy_cache
find . -name "*.pyc" -delete

# Target to build the package
build:
poetry build

# Help target to display available commands
help:
@echo "Available commands:"
@echo " install Install project dependencies"
@echo " update Update project dependencies"
@echo " requirements Export dependencies to requirements.txt"
@echo " lint Run linting"
@echo " run Run the application"
@echo " clean Remove cache files"
@echo " build Build the package"
3,050 changes: 3,050 additions & 0 deletions poetry.lock

Large diffs are not rendered by default.

41 changes: 30 additions & 11 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,21 +1,40 @@
[project]
[tool.poetry]
name = "ContextSearch"
version = "0.5.0"
description = "User friendly system for semantic search."
authors = [
{ name = "ArturOle", email = "[email protected]" }
"ArturOle"
]
license = { text = ["GPL-3.0"], file = "LICENSE" }
license = "GPL-3.0"
readme = "README.md"
keywords = ["semantic search", "ocr", "rag", "document-embedding", "contextual-search", "retrieval-augmented-generation", "pdf"]
dynamic = ["dependencies"]
packages = [{include = "context_search", from="src"}]

[build-system]
requires = ["setuptools>=42", "wheel"]
build-backend = "setuptools.build_meta"
[tool.poetry.dependencies]
python = "^3.10"
neo4j = "^5.25.0"
pdf2image = "^1.17.0"
pydantic = "^2.9.2"
PyMuPDF = "^1.24.10"
pytesseract = "^0.3.13"
pytextrank = "^3.3.0"
spacy = "^3.8.2"
tqdm = "^4.66.5"
transformers = "^4.45.1"
numpy = "^2.0.2"
torch = { version = "^2.3.1", source = "torch"}
en_core_web_sm = {url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0.tar.gz"}

[tool.poetry.dev-dependencies]
pytest = "^8.3.2"
pytest-mock = "^3.14.0"
pytest-cov = "^5.0.0"

[tool.setuptools.dynamic]
dependencies = {file = ["requirements\base.txt"]}
[[tool.poetry.source]]
name = "torch"
url = "https://download.pytorch.org/whl/cpu"
priority = "supplemental"

[tool.setuptools.packages.find]
where = ["src"]
[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
13 changes: 0 additions & 13 deletions requirements/base.txt

This file was deleted.

4 changes: 0 additions & 4 deletions requirements/test.txt

This file was deleted.

0 comments on commit 0669432

Please sign in to comment.