Skip to content

Commit

Permalink
add delete_downloaded_files option
Browse files Browse the repository at this point in the history
  • Loading branch information
ladrians committed Aug 30, 2024
1 parent 594760c commit e760996
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 0 deletions.
2 changes: 2 additions & 0 deletions amazon_s3/s3_config.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ s3: # contact the provider for the following information
excluded_exts: # list of excluded extensions, by default it is suggested to include the following: raw, metadata
- !!str 'metadata'
- !!str 'raw'
verbose: !!bool True|False (default) # Add more detail to logger? (use it for troubleshooting)
delete_downloaded_files: !!bool True|False (default) # Delete downloaded files
saia:
base_url: !!str 'string' # GeneXus Enterprise AI Base URL
api_token: !!str 'string'
Expand Down
8 changes: 8 additions & 0 deletions saia_ingest/ingestor.py
Original file line number Diff line number Diff line change
Expand Up @@ -470,6 +470,7 @@ def ingest_s3(
source_doc_id = s3_level.get('source_doc_id', None)
download_dir = s3_level.get('download_dir', None)
verbose = s3_level.get('verbose', False)
delete_downloaded_files = s3_level.get('delete_downloaded_files', False)

# Saia
saia_level = config.get('saia', {})
Expand Down Expand Up @@ -560,6 +561,13 @@ def ingest_s3(
file_path = os.path.dirname(file_paths[0])
shutil.rmtree(file_path)

if delete_downloaded_files and len(file_paths) > 0:
for file in file_paths:
try:
os.remove(file)
except Exception as e:
logging.getLogger().error(f"Error deleting file {file}: {e}")

logging.getLogger().info(f"Success: {success_count} Skip: {loader.skip_count}")
logging.getLogger().info(f"Upload Failed: {failed_count} Download Failed: {loader.error_count}")
logging.getLogger().info(f"Total: {loader.total_count}")
Expand Down

0 comments on commit e760996

Please sign in to comment.