-
Notifications
You must be signed in to change notification settings - Fork 0
/
url_to_google_drive.py
90 lines (70 loc) · 3.31 KB
/
url_to_google_drive.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
"""url_to_google_drive.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1rzlJg7P0g20ejiCuCiYVq7EkMWlmHsJV
# **Download file from given url to Google Drive**
## Mount google drive to notebook
"""
from google.colab import drive
drive.mount('/content/drive')
"""## Specify File path
- Change data_path to the required folder where you want to save the downloaded
file
- Eg. Default named "dataset" change to it point to other folder location
or remove to save to google drive base path
"""
import os
base_path = '/content/drive/My Drive/'
data_path = base_path + "datasets"
if not os.path.exists(data_path):
!mkdir -p {data_path}
"""## Downloading file
- Usage request to get the file
- Uses tqdm to show download progress
"""
import requests
from tqdm.notebook import tqdm
def save_to_drive(url, file_path):
"""
Downloads a file from a given URL and saves it to Google Drive.
Args:
url (str): The URL of the file to download.
file_path (str): The path within Google Drive to save the file,
including the filename and extension
(e.g., "datasets/indian_food_dataset.zip").
Returns:
None
"""
try:
g_file_path = f"/content/drive/MyDrive/{file_path}".split("/")
g_file_path = "/".join(g_file_path[:-1])
if not os.path.exists(g_file_path):
!mkdir -p {g_file_path}
file_url = url
r = requests.get(file_url, stream = True)
total_size = int(r.headers.get('content-length', 0))
if total_size == 0:
print("Content-Length not available, progress bar might not be accurate.")
block_size = 1024
t=tqdm(total=total_size, unit='iB', unit_scale=True,
position=0,
leave=True)
with open(f"/content/drive/My Drive/{file_path}", "wb") as file:
for data in r.iter_content(1024):
t.update(len(data))
file.write(data)
t.close()
if total_size != 0 and t.n != total_size:
print("ERROR, something went wrong")
except FileNotFoundError:
print("\nFolder doesn't exist! Create filepath in gdrive")
except Exception as e:
print(e)
"""## Use
- **url**: Specify the url to download (must be public url)
- call **save_to_drive** function with params (url: string, file_path:string)
- file_path: must be pointed to folder location with filename (specify with extension) inside My Drive in gDrive
(Eg. dataset/food.zip)
"""
url = "https://storage.googleapis.com/kaggle-data-sets/1864/33884/bundle/archive.zip?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gcp-kaggle-com%40kaggle-161607.iam.gserviceaccount.com%2F20240703%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20240703T171808Z&X-Goog-Expires=259200&X-Goog-SignedHeaders=host&X-Goog-Signature=b7d13b4432d0af7f442c96dc351b4a24938a009a1117c4af8288ec9f1c9f5724563f8d6bec2e9d3b40b030690d23658282ee293b2759faba18c5c02ae2a3888129a33f4b537e782de8441e1fc6ea4fba0ddebf8de7af0d36f7c3247aeb03345051a0a7bfb6b41aa4e8e0a6e7a097876654715d4fdb3e951916aead3394a2cb7c89ffc4e1a8a4da57308e46b8ea9e0b8018717d796b22b9e18435e1df3b1df2e8a6a606afd4cecadfdc1fb7d101092ea3922284934a40caea0c88136b52976cfe93fc11153f935cd8a4538bb9426aefcfbef4401ab3da11668f9e8828bd9678326acc7e3245e3a282e4d7425b62b2ca46feeae535bada70441f04fb8e41948dbe"
save_to_drive(url, "datasets/food_data_6903.zip")