Skip to content

Commit

Permalink
Local file to S3 upload.
Browse files Browse the repository at this point in the history
  • Loading branch information
hrshdhgd committed Feb 6, 2024
1 parent 2c7d1cb commit 4b643de
Show file tree
Hide file tree
Showing 4 changed files with 608 additions and 366 deletions.
18 changes: 18 additions & 0 deletions example/download.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,24 @@
- url: https://drive.google.com/uc?id=10ojJffrPSl12OMcu4gyx0fak2CNu6qOs
local_name: gdrive_test_2.txt

- base_url: https://rest.uniprot.org/uniprotkb/
api: uniprot
url: 'gdrive:1Ai52w4fu4XPu5w4wdE8y3rymEgN0BDqe'
local_name: 'uniprot_genome_features'
fields: [
"organism_id",
"id",
"accession",
"protein_name",
"ec",
"ft_binding"
]
keywords: [
"Reference+proteome"
]
size: 500
batch_size: 1
test: True
# - url: https://www.ebi.ac.uk/chembl/elk/es/
# api: elasticsearch
# query_file: example/query.json
Expand Down
19 changes: 16 additions & 3 deletions kghub_downloader/download_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
from google.cloud.storage.blob import Blob
from typing import List, Optional
import gdown
import boto3
from botocore.exceptions import NoCredentialsError

GDOWN_MAP = {"gdrive": "https://drive.google.com/uc?id="}

Expand Down Expand Up @@ -188,9 +190,20 @@ def mirror_to_bucket(local_file, bucket_url, remote_file) -> None:
blob.upload_from_filename(local_file)

elif bucket_url.startswith("s3://"):
raise ValueError("Currently, only Google Cloud storage is supported.")
# bashCommand = f"aws s3 cp {outfile} {mirror}"
# subprocess.run(bashCommand.split())
# Create an S3 client
s3 = boto3.client('s3')

try:
# Upload the file
s3.upload_file(local_file, bucket_name, remote_file)
print(f"File {local_file} uploaded to {bucket_name}/{remote_file}")
return True
except FileNotFoundError:
print(f"The file {local_file} was not found")
return False
except NoCredentialsError:
print("Credentials not available")
return False

else:
raise ValueError("Currently, only Google Cloud storage is supported.")
Expand Down
Loading

0 comments on commit 4b643de

Please sign in to comment.