From afc00c1d7a42d4707c19e453e02eaef0c9f031ea Mon Sep 17 00:00:00 2001 From: Micah Halter Date: Thu, 12 Sep 2024 09:49:57 -0400 Subject: [PATCH] refactor: align repo with new ETL template --- .editorconfig | 5 ++ .github/workflows/cape.yml | 15 ++++++ .github/workflows/release.yml | 14 ++++++ .gitignore | 65 ++++++++++++++++++++++--- .prettierrc.yaml | 8 +++ etl_gphl_sequencing_alert.py => main.py | 21 ++++---- pyproject.toml | 9 ++++ pyrightconfig.json | 4 ++ 8 files changed, 125 insertions(+), 16 deletions(-) create mode 100644 .editorconfig create mode 100644 .github/workflows/cape.yml create mode 100644 .github/workflows/release.yml create mode 100644 .prettierrc.yaml rename etl_gphl_sequencing_alert.py => main.py (92%) create mode 100644 pyproject.toml create mode 100644 pyrightconfig.json diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..d9aeb5b --- /dev/null +++ b/.editorconfig @@ -0,0 +1,5 @@ +root = true +[*] +indent_style = space +indent_size = 4 +max_line_length = 80 diff --git a/.github/workflows/cape.yml b/.github/workflows/cape.yml new file mode 100644 index 0000000..273b936 --- /dev/null +++ b/.github/workflows/cape.yml @@ -0,0 +1,15 @@ +name: CAPE +on: + push: + branches: [main] + pull_request: + +jobs: + python: + name: Python + uses: cape-ph/.github/.github/workflows/python_checks.yml@main + with: + pytest: false + general: + name: General + uses: cape-ph/.github/.github/workflows/general_checks.yml@main diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..8fc79f3 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,14 @@ +name: Release + +on: + push: + tags: ["**"] + workflow_dispatch: + +permissions: + contents: write + +jobs: + Release: + uses: cape-ph/.github/.github/workflows/release.yml@main + secrets: inherit diff --git a/.gitignore b/.gitignore index 82f9275..9175c41 100644 --- a/.gitignore +++ b/.gitignore @@ -154,9 +154,62 @@ dmypy.json # Cython debug symbols cython_debug/ -# PyCharm -# JetBrains specific template is maintained in a separate JetBrains.gitignore that can -# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore -# and can be added to the global gitignore or merged into this file. For a more nuclear -# option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ +# JetBrains +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf +.idea/**/aws.xml +.idea/**/contentModel.xml +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml +.idea/**/gradle.xml +.idea/**/libraries +cmake-build-*/ +.idea/**/mongoSettings.xml +*.iws +out/ +.idea_modules/ +atlassian-ide-plugin.xml +.idea/replstate.xml +.idea/sonarlint/ +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties +.idea/httpRequests +.idea/caches/build_file_checksums.ser + +# Vagrant/Ansible/CI +.vagrant +playbook.retry +.vault +ansible.out + +# General +*.dat +*.out +*.pid +*.gz +*.tmp +*.bak +*.swp +*.csv# +logs/ +build/ + +# OS +.DS_Store +ehthumbs.db +Icon? +Thumbs.db + +# Other +sslcerts.pfx +scripts/*gz diff --git a/.prettierrc.yaml b/.prettierrc.yaml new file mode 100644 index 0000000..66fff2e --- /dev/null +++ b/.prettierrc.yaml @@ -0,0 +1,8 @@ +proseWrap: always +tabWidth: 4 +useTabs: false + +overrides: + - files: "*.md" + options: + parser: "markdown" diff --git a/etl_gphl_sequencing_alert.py b/main.py similarity index 92% rename from etl_gphl_sequencing_alert.py rename to main.py index 4ed2a47..4dd75c0 100644 --- a/etl_gphl_sequencing_alert.py +++ b/main.py @@ -3,6 +3,7 @@ import io import sys from datetime import datetime +from pathlib import Path import boto3 as boto3 from awsglue.context import GlueContext @@ -42,7 +43,7 @@ # NOTE: for now we'll take the alert object key and change out the file # extension for the clean data (leaving all namespacing and such). this # will probably need to change -clean_obj_key = alert_obj_key.replace(".pdf", ".csv") +clean_obj_key = str(Path(alert_obj_key).with_suffix(".csv")) # NOTE: May need some creds here s3_client = boto3.client("s3") @@ -79,13 +80,13 @@ reader = PdfReader(f) page = reader.pages[0] date_reported = page.extract_text().split("\n")[3].strip() - datetime.strptime(date_reported,'%m/%d/%Y') + datetime.strptime(date_reported, "%m/%d/%Y") except ValueError as err: err_message = ( - f"ERROR - Could not properly read sequencing report date. " - f"ETL will continue." - f"{err}" - ) + f"ERROR - Could not properly read sequencing report date. " + f"ETL will continue." + f"{err}" + ) logger.error(err_message) @@ -98,10 +99,10 @@ genes = tables[1] except (IndexError, KeyError) as err: err_message = ( - f"ERROR - Could not properly read sequencing PDF tables. " - f"ETL Cannot continue." - f"{err}" - ) + f"ERROR - Could not properly read sequencing PDF tables. " + f"ETL Cannot continue." + f"{err}" + ) logger.error(err_message) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..470b53c --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,9 @@ +[tool.black] +line-length = 80 + +[tool.isort] +profile = "black" +line_length = 80 + +[tool.ruff] +line-length = 80 diff --git a/pyrightconfig.json b/pyrightconfig.json new file mode 100644 index 0000000..c4cb691 --- /dev/null +++ b/pyrightconfig.json @@ -0,0 +1,4 @@ +{ + "autoImportCompletions": true, + "typeCheckingMode": "basic" +}