From 31ed5837a93330619e0787912579e69f2b3fb9d6 Mon Sep 17 00:00:00 2001 From: Stormy Shippy Date: Sat, 2 Sep 2023 00:22:16 +0000 Subject: [PATCH] Split to separate steps --- .github/workflows/production-sync.yml | 2 +- .github/workflows/staging-sync.yml | 43 ++++++++++++++++++++++++--- .vscode/launch.json | 25 ++++++++++++++++ src/bloomreach_products.py | 2 +- src/graphql.py | 3 +- 5 files changed, 68 insertions(+), 7 deletions(-) create mode 100644 .vscode/launch.json diff --git a/.github/workflows/production-sync.yml b/.github/workflows/production-sync.yml index dd60c7a..0d4d58b 100644 --- a/.github/workflows/production-sync.yml +++ b/.github/workflows/production-sync.yml @@ -1,4 +1,4 @@ -name: Staging Catalog Sync +name: Production Catalog Sync on: # schedule: diff --git a/.github/workflows/staging-sync.yml b/.github/workflows/staging-sync.yml index 052b5c3..1367dd0 100644 --- a/.github/workflows/staging-sync.yml +++ b/.github/workflows/staging-sync.yml @@ -21,14 +21,49 @@ jobs: run: | python -m pip install --upgrade pip python -m pip install -r requirements.txt - - name: Update all records + - name: Get current date for runnum + id: date + run: echo "::set-output name=date::$(date +'%Y-%m-%d_%h%m%s')" + - name: Make run data directory + id: date + run: mkdir ${{ steps.date.outputs.date }} + # - name: Update all records + # run: | + # python src/main.py + # env: + # BR_SHOPIFY_URL: ${{ vars.BR_SHOPIFY_URL }} ${{ steps.date.outputs.date }} + # BR_SHOPIFY_PAT: ${{ secrets.BR_SHOPIFY_PAT }} + # BR_ENVIRONMENT_NAME: ${{ vars.BR_ENVIRONMENT_NAME }} + # BR_ACCOUNT_ID: ${{ vars.BR_ACCOUNT_ID }} + # BR_CATALOG_NAME: ${{ vars.BR_CATALOG_NAME }} + # BR_API_TOKEN: ${{ secrets.BR_API_TOKEN }} + # BR_OUTPUT_DIR: "." + - name: GraphQL Bulk Operation Export run: | - python src/main.py + python src/graphql.py env: BR_SHOPIFY_URL: ${{ vars.BR_SHOPIFY_URL }} BR_SHOPIFY_PAT: ${{ secrets.BR_SHOPIFY_PAT }} + BR_OUTPUT_DIR: "./${{ steps.date.outputs.date }}/" + - name: Transform to Shopify aggregated products + run: | + python src/shopify_products.py --input-file=./${{ steps.date.outputs.date }}/0_shopify_bulk_op.jsonl.gz --output-file=./${{ steps.date.outputs.date }}/1_shopify_products.jsonl.gz + - name: Transform to discovery generic products + run: | + python src/bloomreach_generics.py --input-file=./${{ steps.date.outputs.date }}/1_shopify_products.jsonl.gz --output-file=./${{ steps.date.outputs.date }}/2_bloomreach_base.jsonl.gz --pid-props="handle" --vid-props="sku" + - name: Transform to discovery catalog patch + run: | + python src/bloomreach_products.py --input-file=./${{ steps.date.outputs.date }}/2_bloomreach_base.jsonl.gz --output-file=./${{ steps.date.outputs.date }}/3_bloomreach_products.jsonl.gz --pid-props="handle" --vid-props="sku" + - name: Transform to discovery patch + run: | + python src/patch.py --input-file=./${{ steps.date.outputs.date }}/3_bloomreach_products.jsonl.gz --output-file=./${{ steps.date.outputs.date }}/4_bloomreach_patch.jsonl.gz + env: + BR_SHOPIFY_URL: ${{ vars.BR_SHOPIFY_URL }} + - name: Transform to discovery patch + run: | + python src/feed.py --input-file=./${{ steps.date.outputs.date }}/4_bloomreach_patch.jsonl.gz + env: BR_ENVIRONMENT_NAME: ${{ vars.BR_ENVIRONMENT_NAME }} BR_ACCOUNT_ID: ${{ vars.BR_ACCOUNT_ID }} BR_CATALOG_NAME: ${{ vars.BR_CATALOG_NAME }} - BR_API_TOKEN: ${{ secrets.BR_API_TOKEN }} - BR_OUTPUT_DIR: "." + BR_API_TOKEN: ${{ secrets.BR_API_TOKEN }} \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..36b9df0 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,25 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python: Current File", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": true + }, + { + "name": "Python: Bloomreach Products", + "type": "python", + "request": "launch", + "program": "src/bloomreach_products.py", + "console": "integratedTerminal", + "args": ["--input-file", "data/20230830_181453_3452125708561_2_generic_products.jsonl", "--output-file", "data/test/2_generic_products.jsonl", "--shopify-url", "example.com"], + "justMyCode": true + } + ] +} \ No newline at end of file diff --git a/src/bloomreach_products.py b/src/bloomreach_products.py index 19ffa0d..0625ca9 100644 --- a/src/bloomreach_products.py +++ b/src/bloomreach_products.py @@ -1,7 +1,7 @@ +import logging import gzip import json import jsonlines -import logging logger = logging.getLogger(__name__) diff --git a/src/graphql.py b/src/graphql.py index ea32f59..dab27f4 100644 --- a/src/graphql.py +++ b/src/graphql.py @@ -131,7 +131,8 @@ def get_shopify_jsonl_fp(shop_url, api_version, token, output_dir, run_num=""): jsonl_url = context["url"] job_id_short = job_id.split('/')[-1] - jsonl_fp = output_dir + "/" + run_num + "_" + job_id_short + "_0_shopify_bulk_op.jsonl.gz" + + jsonl_fp = output_dir + "/0_shopify_bulk_op.jsonl.gz" logger.info("Saving jsonl file to: %s", jsonl_fp) download_file(jsonl_url, jsonl_fp)