From 6c61d2baa804ff636eeaae91023743aa1162f9c7 Mon Sep 17 00:00:00 2001 From: judynah Date: Mon, 2 Dec 2024 12:42:09 +0100 Subject: [PATCH] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20Removed=20blank=20line?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 7 ++ docker/Dockerfile | 86 +++++++++---------- docker/docker-compose.yml | 42 ++++----- .../prefect/tasks/business_core.py | 2 +- 4 files changed, 72 insertions(+), 65 deletions(-) diff --git a/.gitignore b/.gitignore index 58e12c8ad..c8c70569a 100644 --- a/.gitignore +++ b/.gitignore @@ -190,3 +190,10 @@ profiles.yaml # Rye .rye/ + +#LAPP +data-platform-infra/ + +test.py + +data/ diff --git a/docker/Dockerfile b/docker/Dockerfile index c0351a093..0215ad036 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -92,64 +92,64 @@ USER ${USER} ### viadot-azure ### #################### -FROM base as viadot-azure +# FROM base as viadot-azure -ARG INSTALL_DATABRICKS=false +# ARG INSTALL_DATABRICKS=false -# Databricks source setup -RUN if [ "$INSTALL_DATABRICKS" = "true" ]; then \ - apt-get update && \ - apt-get install -y wget apt-transport-https && \ - mkdir -p /etc/apt/keyrings && \ - wget -O - https://packages.adoptium.net/artifactory/api/gpg/key/public | tee /etc/apt/keyrings/adoptium.asc && \ - echo "deb [signed-by=/etc/apt/keyrings/adoptium.asc] https://packages.adoptium.net/artifactory/deb $(awk -F= '/^VERSION_CODENAME/{print$2}' /etc/os-release) main" | tee /etc/apt/sources.list.d/adoptium.list && \ - apt-get update && \ - apt-get install -y temurin-11-jdk && \ - find /usr/bin/java -type d -exec chmod 777 {} \; ; \ - fi +# # Databricks source setup +# RUN if [ "$INSTALL_DATABRICKS" = "true" ]; then \ +# apt-get update && \ +# apt-get install -y wget apt-transport-https && \ +# mkdir -p /etc/apt/keyrings && \ +# wget -O - https://packages.adoptium.net/artifactory/api/gpg/key/public | tee /etc/apt/keyrings/adoptium.asc && \ +# echo "deb [signed-by=/etc/apt/keyrings/adoptium.asc] https://packages.adoptium.net/artifactory/deb $(awk -F= '/^VERSION_CODENAME/{print$2}' /etc/os-release) main" | tee /etc/apt/sources.list.d/adoptium.list && \ +# apt-get update && \ +# apt-get install -y temurin-11-jdk && \ +# find /usr/bin/java -type d -exec chmod 777 {} \; ; \ +# fi -ENV SPARK_HOME /usr/local/lib/python3.10/site-packages/pyspark +# ENV SPARK_HOME /usr/local/lib/python3.10/site-packages/pyspark -ARG INSTALL_DATABRICKS=false +# ARG INSTALL_DATABRICKS=false -# Turn off package manager caches. -ENV PIP_NO_CACHE_DIR=${PIP_NO_CACHE_DIR} -ENV UV_NO_CACHE=${UV_NO_CACHE} +# # Turn off package manager caches. +# ENV PIP_NO_CACHE_DIR=${PIP_NO_CACHE_DIR} +# ENV UV_NO_CACHE=${UV_NO_CACHE} -RUN rye lock --reset --features azure --pyproject viadot/pyproject.toml -RUN sed '/-e/d' ./viadot/requirements.lock > ./viadot/requirements.txt -RUN pip install -r ./viadot/requirements.txt +# RUN rye lock --reset --features azure --pyproject viadot/pyproject.toml +# RUN sed '/-e/d' ./viadot/requirements.lock > ./viadot/requirements.txt +# RUN pip install -r ./viadot/requirements.txt -RUN if [ "$INSTALL_DATABRICKS" = "true" ]; then \ - pip install ./viadot/.[databricks]; \ - fi +# RUN if [ "$INSTALL_DATABRICKS" = "true" ]; then \ +# pip install ./viadot/.[databricks]; \ +# fi -# Dependecy install -RUN pip install ./viadot/.[azure] +# # Dependecy install +# RUN pip install ./viadot/.[azure] -# Cleanup. -RUN rm -rf ./viadot +# # Cleanup. +# RUN rm -rf ./viadot -USER ${USER} +# USER ${USER} -################## -### viadot-aws ### -################## +# ################## +# ### viadot-aws ### +# ################## -FROM base as viadot-aws +# FROM base as viadot-aws -# Turn off package manager caches. -ENV PIP_NO_CACHE_DIR=${PIP_NO_CACHE_DIR} -ENV UV_NO_CACHE=${UV_NO_CACHE} +# # Turn off package manager caches. +# ENV PIP_NO_CACHE_DIR=${PIP_NO_CACHE_DIR} +# ENV UV_NO_CACHE=${UV_NO_CACHE} -RUN rye lock --reset --features aws --pyproject viadot/pyproject.toml -RUN sed '/-e/d' ./viadot/requirements.lock > ./viadot/requirements.txt -RUN pip install -r ./viadot/requirements.txt +# RUN rye lock --reset --features aws --pyproject viadot/pyproject.toml +# RUN sed '/-e/d' ./viadot/requirements.lock > ./viadot/requirements.txt +# RUN pip install -r ./viadot/requirements.txt -RUN pip install ./viadot/.[aws] +# RUN pip install ./viadot/.[aws] -# Cleanup. -RUN rm -rf ./viadot +# # Cleanup. +# RUN rm -rf ./viadot -USER ${USER} +# USER ${USER} diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 3ff4b0976..116ec8582 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -2,7 +2,7 @@ version: "3" services: viadot-lite: - image: ghcr.io/dyvenia/viadot/viadot-lite:latest + image: ghcr.io/dyvenia/viadot/viadot-lite:2.1.26 container_name: viadot-lite volumes: # - ${HOME}/.databricks-connect:/home/viadot/.databricks-connect @@ -11,23 +11,23 @@ services: shm_size: "4gb" entrypoint: sh ./docker/entrypoint.sh restart: "unless-stopped" - viadot-azure: - image: ghcr.io/dyvenia/viadot/viadot-azure:latest - container_name: viadot-azure - volumes: - # - ${HOME}/.databricks-connect:/home/viadot/.databricks-connect - # - ${HOME}/.config/viadot/config.yaml:/home/viadot/.config/viadot/config.yaml - - ../:/home/viadot - shm_size: "4gb" - entrypoint: sh ./docker/entrypoint.sh - restart: "unless-stopped" - viadot-aws: - image: ghcr.io/dyvenia/viadot/viadot-aws:latest - container_name: viadot-aws - volumes: - # - ${HOME}/.databricks-connect:/home/viadot/.databricks-connect - # - ${HOME}/.config/viadot/config.yaml:/home/viadot/.config/viadot/config.yaml - - ../:/home/viadot - shm_size: "4gb" - entrypoint: sh ./docker/entrypoint.sh - restart: "unless-stopped" + # viadot-azure: + # image: ghcr.io/dyvenia/viadot/viadot-azure:latest + # container_name: viadot-azure + # volumes: + # # - ${HOME}/.databricks-connect:/home/viadot/.databricks-connect + # # - ${HOME}/.config/viadot/config.yaml:/home/viadot/.config/viadot/config.yaml + # - ../:/home/viadot + # shm_size: "4gb" + # entrypoint: sh ./docker/entrypoint.sh + # restart: "unless-stopped" + # viadot-aws: + # image: ghcr.io/dyvenia/viadot/viadot-aws:latest + # container_name: viadot-aws + # volumes: + # # - ${HOME}/.databricks-connect:/home/viadot/.databricks-connect + # # - ${HOME}/.config/viadot/config.yaml:/home/viadot/.config/viadot/config.yaml + # - ../:/home/viadot + # shm_size: "4gb" + # entrypoint: sh ./docker/entrypoint.sh + # restart: "unless-stopped" diff --git a/src/viadot/orchestration/prefect/tasks/business_core.py b/src/viadot/orchestration/prefect/tasks/business_core.py index 19f3e59bf..df7ce55ba 100644 --- a/src/viadot/orchestration/prefect/tasks/business_core.py +++ b/src/viadot/orchestration/prefect/tasks/business_core.py @@ -64,4 +64,4 @@ def business_core_to_df( f"Successfully downloaded {nrows} rows and {ncols} columns of data to a DataFrame." ) - return df \ No newline at end of file + return df