Skip to content

Commit

Permalink
fix: update to capepy v2
Browse files Browse the repository at this point in the history
  • Loading branch information
mehalter committed Jan 28, 2025
1 parent 85d33ee commit 0e8fde9
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 11 deletions.
20 changes: 10 additions & 10 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,18 @@

etl_job = EtlJob()

# `raw` has the contents of the raw file passed into the script
raw = etl_job.get_raw_file()
# `src` has the contents of the source file passed into the script
src = etl_job.get_src_file()

# TODO: Here you want to clean the contents of the `raw` variable
# and produce the "cleaned" content to the `cleaned` variable
cleaned = None
# TODO: Here you want to clean the contents of the `src` variable
# and produce the "cleaned" content to the `sink` variable
sink = None

# TODO: Specify the name of the new clean file
# TODO: Specify the name of the new sink file
# We typically just want to replace the file extension with a new one
# Below is an example of this, update with the correct extension
clean_key = str(Path(etl_job.parameters["OBJECT_KEY"]).with_suffix(".csv"))
sink_key = str(Path(etl_job.parameters["OBJECT_KEY"]).with_suffix(".csv"))

# Put the new cleaned object into the clean bucket
if cleaned is not None:
etl_job.write_clean_file(cleaned, clean_key=clean_key)
# Put the new object into the sink bucket location
if sink is not None:
etl_job.write_sink_file(sink, sink_key=sink_key)
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
aws-glue-libs @ git+https://github.com/awslabs/aws-glue-libs@9d8293962e6ffc607e5dc328e246f40b24010fa8
boto3==1.34.103
capepy>=1.0.0,<2.0.0
capepy>=2.0.0,<3.0.0
pyspark==3.5.1

0 comments on commit 0e8fde9

Please sign in to comment.