refactor: align repo with new ETL template

cape-ph · Sep 12, 2024 · afc00c1 · afc00c1
1 parent 3f9aa03
commit afc00c1
Show file tree

Hide file tree

Showing 8 changed files with 125 additions and 16 deletions.
diff --git a/.editorconfig b/.editorconfig
@@ -0,0 +1,5 @@
+root = true
+[*]
+indent_style = space
+indent_size = 4
+max_line_length = 80
diff --git a/.github/workflows/cape.yml b/.github/workflows/cape.yml
@@ -0,0 +1,15 @@
+name: CAPE
+on:
+    push:
+        branches: [main]
+    pull_request:
+
+jobs:
+    python:
+        name: Python
+        uses: cape-ph/.github/.github/workflows/python_checks.yml@main
+        with:
+            pytest: false
+    general:
+        name: General
+        uses: cape-ph/.github/.github/workflows/general_checks.yml@main
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -0,0 +1,14 @@
+name: Release
+
+on:
+    push:
+        tags: ["**"]
+    workflow_dispatch:
+
+permissions:
+    contents: write
+
+jobs:
+    Release:
+        uses: cape-ph/.github/.github/workflows/release.yml@main
+        secrets: inherit
diff --git a/.gitignore b/.gitignore
@@ -154,9 +154,62 @@ dmypy.json
 # Cython debug symbols
 cython_debug/
 
-# PyCharm
-#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
-#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
-#  and can be added to the global gitignore or merged into this file.  For a more nuclear
-#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
+# JetBrains
+.idea/**/workspace.xml
+.idea/**/tasks.xml
+.idea/**/usage.statistics.xml
+.idea/**/dictionaries
+.idea/**/shelf
+.idea/**/aws.xml
+.idea/**/contentModel.xml
+.idea/**/dataSources/
+.idea/**/dataSources.ids
+.idea/**/dataSources.local.xml
+.idea/**/sqlDataSources.xml
+.idea/**/dynamic.xml
+.idea/**/uiDesigner.xml
+.idea/**/dbnavigator.xml
+.idea/**/gradle.xml
+.idea/**/libraries
+cmake-build-*/
+.idea/**/mongoSettings.xml
+*.iws
+out/
+.idea_modules/
+atlassian-ide-plugin.xml
+.idea/replstate.xml
+.idea/sonarlint/
+com_crashlytics_export_strings.xml
+crashlytics.properties
+crashlytics-build.properties
+fabric.properties
+.idea/httpRequests
+.idea/caches/build_file_checksums.ser
+
+# Vagrant/Ansible/CI
+.vagrant
+playbook.retry
+.vault
+ansible.out
+
+# General
+*.dat
+*.out
+*.pid
+*.gz
+*.tmp
+*.bak
+*.swp
+*.csv#
+logs/
+build/
+
+# OS
+.DS_Store
+ehthumbs.db
+Icon?
+Thumbs.db
+
+# Other
+sslcerts.pfx
+scripts/*gz
diff --git a/.prettierrc.yaml b/.prettierrc.yaml
@@ -0,0 +1,8 @@
+proseWrap: always
+tabWidth: 4
+useTabs: false
+
+overrides:
+    - files: "*.md"
+      options:
+          parser: "markdown"
diff --git a/etl_gphl_sequencing_alert.py → main.py b/etl_gphl_sequencing_alert.py → main.py
@@ -3,6 +3,7 @@
 import io
 import sys
 from datetime import datetime
+from pathlib import Path
 
 import boto3 as boto3
 from awsglue.context import GlueContext
@@ -42,7 +43,7 @@
 # NOTE: for now we'll take the alert object key and change out the file
 #       extension for the clean data (leaving all namespacing and such). this
 #       will probably need to change
-clean_obj_key = alert_obj_key.replace(".pdf", ".csv")
+clean_obj_key = str(Path(alert_obj_key).with_suffix(".csv"))
 
 # NOTE: May need some creds here
 s3_client = boto3.client("s3")
@@ -79,13 +80,13 @@
     reader = PdfReader(f)
     page = reader.pages[0]
     date_reported = page.extract_text().split("\n")[3].strip()
-    datetime.strptime(date_reported,'%m/%d/%Y')
+    datetime.strptime(date_reported, "%m/%d/%Y")
 except ValueError as err:
     err_message = (
-            f"ERROR - Could not properly read sequencing report date. "
-            f"ETL will continue."
-            f"{err}"
-        )
+        f"ERROR - Could not properly read sequencing report date. "
+        f"ETL will continue."
+        f"{err}"
+    )
 
     logger.error(err_message)
 
@@ -98,10 +99,10 @@
     genes = tables[1]
 except (IndexError, KeyError) as err:
     err_message = (
-            f"ERROR - Could not properly read sequencing PDF tables. "
-            f"ETL Cannot continue."
-            f"{err}"
-        )
+        f"ERROR - Could not properly read sequencing PDF tables. "
+        f"ETL Cannot continue."
+        f"{err}"
+    )
 
     logger.error(err_message)
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,9 @@
+[tool.black]
+line-length = 80
+
+[tool.isort]
+profile = "black"
+line_length = 80
+
+[tool.ruff]
+line-length = 80
diff --git a/pyrightconfig.json b/pyrightconfig.json
@@ -0,0 +1,4 @@
+{
+    "autoImportCompletions": true,
+    "typeCheckingMode": "basic"
+}