-
Notifications
You must be signed in to change notification settings - Fork 260
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge remote-tracking branch 'origin' into feat/make_modular
- Loading branch information
Showing
79 changed files
with
1,207 additions
and
390 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
name: Build docker GPU and push ECR | ||
|
||
on: | ||
push: | ||
tags: | ||
- "v*" | ||
branches: [main] | ||
|
||
env: | ||
AWS_REGION: eu-west-1 | ||
ECR_REPOSITORY: quivrhq/megaparse-gpu | ||
ECS_CLUSTER: megaparse | ||
ECS_TASK_DEFINITION: .aws/task_definition.json | ||
CONTAINER_NAME: megaparse | ||
|
||
permissions: | ||
contents: read | ||
|
||
jobs: | ||
deploy: | ||
name: Build docker-gpu | ||
runs-on: ubuntu-latest | ||
environment: production | ||
outputs: | ||
imageoutput: ${{ steps.build-image.outputs.imageoutput }} | ||
|
||
steps: | ||
- name: Checkout | ||
uses: actions/checkout@v3 | ||
|
||
- name: Configure AWS credentials | ||
uses: aws-actions/configure-aws-credentials@v4 | ||
with: | ||
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} | ||
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | ||
aws-region: us-east-1 | ||
|
||
- name: Login to Amazon ECR | ||
id: login-ecr | ||
uses: aws-actions/amazon-ecr-login@v1 | ||
with: | ||
registry-type: public | ||
|
||
- name: Build, tag, and push image to Amazon ECR | ||
id: build-image | ||
env: | ||
ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} | ||
IMAGE_TAG: ${{ github.sha }} | ||
run: | | ||
# Build a docker container and push it to ECR | ||
docker build -t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG -f Dockerfile.gpu . | ||
docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG | ||
# Tag the image as 'latest' and push | ||
docker tag $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG $ECR_REGISTRY/$ECR_REPOSITORY:latest | ||
docker push $ECR_REGISTRY/$ECR_REPOSITORY:latest | ||
echo "imageoutput=$ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG" >> $GITHUB_OUTPUT |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
on: | ||
pull_request: | ||
branches: | ||
- main | ||
|
||
name: Test build docker | ||
jobs: | ||
build-docker: | ||
runs-on: ubuntu-latest | ||
strategy: | ||
matrix: | ||
dockerfile: [Dockerfile, Dockerfile.gpu] | ||
steps: | ||
- name: Checkout repository | ||
uses: actions/checkout@v3 | ||
|
||
- name: Set up QEMU | ||
uses: docker/setup-qemu-action@v3 | ||
with: | ||
platforms: all | ||
|
||
- name: Set up Docker Buildx | ||
uses: docker/setup-buildx-action@v3 | ||
|
||
- name: Build Docker image with caching | ||
uses: docker/build-push-action@v4 | ||
with: | ||
context: . | ||
file: ${{ matrix.dockerfile }} | ||
push: false | ||
tags: quivrhq/megaparse:${{ matrix.dockerfile }} | ||
cache-from: type=gha | ||
cache-to: type=gha,mode=max |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
{ | ||
"libs/megaparse": "0.0.48", | ||
"libs/megaparse_sdk": "0.1.7" | ||
"libs/megaparse": "0.0.52", | ||
"libs/megaparse_sdk": "0.1.10" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
FROM nvidia/cuda:12.6.3-cudnn-devel-ubuntu20.04 | ||
|
||
WORKDIR /app | ||
|
||
ENV UV_COMPILE_BYTECODE=1 | ||
ENV UV_NO_CACHE=1 | ||
ENV DEBIAN_FRONTEND=noninteractive | ||
|
||
# Install runtime dependencies | ||
RUN apt-get update && apt-get install -y software-properties-common && \ | ||
add-apt-repository ppa:deadsnakes/ppa && \ | ||
apt-get update && apt-get install -y \ | ||
python3.11 \ | ||
python3.11-dev \ | ||
libgeos-dev \ | ||
libcurl4-openssl-dev \ | ||
libssl-dev \ | ||
binutils \ | ||
curl \ | ||
git \ | ||
autoconf \ | ||
automake \ | ||
libtool \ | ||
python3-pip \ | ||
build-essential \ | ||
wget \ | ||
gcc \ | ||
# Additional dependencies for document handling | ||
libmagic-dev \ | ||
poppler-utils \ | ||
tesseract-ocr \ | ||
libreoffice \ | ||
libpq-dev \ | ||
pandoc && \ | ||
rm -rf /var/lib/apt/lists/* && apt-get clean | ||
|
||
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1 && \ | ||
update-alternatives --set python3 /usr/bin/python3.11 | ||
|
||
COPY requirements.lock pyproject.toml README.md ./ | ||
COPY libs/megaparse/pyproject.toml libs/megaparse/README.md libs/megaparse/ | ||
COPY libs/megaparse_sdk/pyproject.toml libs/megaparse_sdk/README.md libs/megaparse_sdk/ | ||
|
||
RUN curl -LsSf https://astral.sh/uv/install.sh | sh | ||
ENV PATH="/root/.local/bin:$PATH" | ||
RUN uv pip install --no-cache --system -r requirements.lock | ||
|
||
RUN playwright install --with-deps | ||
RUN python3 - -m nltk.downloader all | ||
|
||
# FIXME: causes runtime link issues with onnxruntime_pybind_state.cc:507 unstructured | ||
# RUN python3 -c "from unstructured.nlp.tokenize import download_nltk_packages; download_nltk_packages()" && \ | ||
# RUN python3 -c "import nltk; nltk.download('punkt_tab'); nltk.download('averaged_perceptron_tagger_eng')" | ||
|
||
COPY . . | ||
|
||
RUN uv pip install --no-cache --system /app/libs/megaparse /app/libs/megaparse_sdk |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.