Skip to content

Commit

Permalink
Merge pull request #35 from monarch-initiative/gh-asset-download
Browse files Browse the repository at this point in the history
Enable Github Release Asset download
  • Loading branch information
hrshdhgd authored Jul 31, 2024
2 parents 234df9c + 78c28e6 commit bcb9f8a
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 1 deletion.
1 change: 1 addition & 0 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ Available options are:
- Google Cloud Storage (`gs://`)
- Google Drive (`gdrive://` or https://drive.google.com/...). The file must be publicly accessible.
- Amazon AWS S3 bucket (`s3://`)
- GitHub Release Assets (`git://RepositoryOwner/RepositoryName`)
- **local_name**: The name to save the file as locally
- **tag**: A tag to use to filter downloads
- **api**: The API to use to download the file. Currently supported: `elasticsearch`
Expand Down
4 changes: 4 additions & 0 deletions example/download.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@
- url: s3://monarch-kg-test/kghub_downloader_test_file.yaml
local_name: test_file.yaml

- url: git://Knowledge-Graph-Hub/kg-microbe/testfile.zip
tag: v0.0.1
local_name: testfile.zip

# - url: https://www.ebi.ac.uk/chembl/elk/es/
# api: elasticsearch
# query_file: example/query.json
Expand Down
61 changes: 60 additions & 1 deletion kghub_downloader/download_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from fnmatch import fnmatch
from ftplib import error_perm
from multiprocessing.sharedctypes import Value
import sys
from typing import List, Optional
from urllib.error import URLError
from urllib.request import Request, urlopen
Expand All @@ -16,6 +17,7 @@
import elasticsearch
import elasticsearch.helpers
import gdown
import requests
import yaml
from botocore.exceptions import NoCredentialsError
from google.cloud import storage
Expand Down Expand Up @@ -146,6 +148,61 @@ def download_from_yaml(
else:
# If the loop completes without breaking (i.e., no match found), throw an error
raise ValueError("Invalid URL")
elif url.startswith("git://"):
url_split = url.split("/")
repo_owner = url_split[-3]
repo_name = url_split[-2]
asset_name = url_split[-1]
asset_url = None
api_url = f"https://api.github.com/repos/{repo_owner}/{repo_name}/releases"
# Get the list of releases
response = requests.get(api_url)
response.raise_for_status()
releases = response.json()

if not releases:
print("No releases found for this repository.")
sys.exit(1)

# Check if a specific tag is provided
if "tag" in item:
# Find the release with the specified tag
tagged_release = next(
(
release
for release in releases
if release["tag_name"] == item["tag"]
),
None,
)
if tagged_release:
for asset in tagged_release.get("assets", []):
if asset["name"] == asset_name:
asset_url = asset["browser_download_url"]
break

# If no asset found in the specified tag or no tag provided, check other releases
if not asset_url:
for release in releases:
for asset in release.get("assets", []):
if asset["name"] == asset_name:
asset_url = asset["browser_download_url"]
break
if asset_url:
break

if not asset_url:
print(f"Asset '{asset_name}' not found in any release.")
sys.exit(1)

# Download the asset
response = requests.get(asset_url, stream=True)
response.raise_for_status()
with open(outfile, "wb") as file:
for chunk in response.iter_content(chunk_size=8192):
file.write(chunk)
print(f"Downloaded {asset_name}")

else:
req = Request(url, headers={"User-Agent": "Mozilla/5.0"})
try:
Expand Down Expand Up @@ -322,7 +379,9 @@ def download_via_ftp(ftp_server, current_dir, local_dir, glob_pattern=None):
items = ftp_server.nlst()

# Initialize tqdm progress bar
with tqdm(total=len(items), desc=f"Downloading from {current_dir} via ftp") as pbar:
with tqdm(
total=len(items), desc=f"Downloading from {current_dir} via ftp"
) as pbar:
for item in items:
# Check if the item is a directory
if is_directory(ftp_server, item):
Expand Down
1 change: 1 addition & 0 deletions test/integration/test_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ def test_download():
"test/output/test_file.yaml",
"test/output/gdrive_test_1.txt",
"test/output/gdrive_test_2.txt",
"test/output/testfile.zip",
]

for file in files:
Expand Down

0 comments on commit bcb9f8a

Please sign in to comment.