Skip to content

Commit

Permalink
enabled uploads to the pCloud repository.
Browse files Browse the repository at this point in the history
  • Loading branch information
felix committed Oct 24, 2024
1 parent e18453a commit 1ebe67b
Showing 1 changed file with 117 additions and 5 deletions.
122 changes: 117 additions & 5 deletions publications/2023-neurips/lcdb/db/_pcloud_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,31 @@

class PCloudRepository(Repository):

def __init__(self, repo_code):
def __init__(self, repo_code, token=None):
super().__init__()
self.repo_code = repo_code
response = requests.get(f"https://api.pcloud.com/showpublink?code={self.repo_code}")
self.content = response.json()
self.content = None
self.token = token

# update content
self.update_content()

def update_content(self):
self.content = requests.get(f"https://api.pcloud.com/showpublink?code={self.repo_code}").json()

def exists(self):
return self.content is not None and len(self.content) > 0

def authenticate(self, username, password):
url = f"https://api.pcloud.com/userinfo?getauth=1&logout=1&device=lcdbclient"
response = requests.post(url, {
"username": username,
"password": password
}).json()
self.token = response["auth"] if "auth" in response else None
if self.token is None:
raise ValueError(f"Authentication failed. Response from server was {response}.")

def read_result_file(self, file, usecols=None):

# get download link
Expand All @@ -50,8 +66,104 @@ def read_result_file(self, file, usecols=None):
else:
print(f"Failed to fetch the file. Status code: {response.status_code}")

def _get_folder_id(self, workflow=None, campaign=None, openmlid=None):
"""
Returns the folderid of the folder containing the data for this context.
Returns None if that folder does not exist
"""

# create query
query = "metadata .contents[? name == 'data'] | [0]"
if workflow is not None:
query += f".contents | [? name == '{workflow}'] | [0]"
if campaign is not None:
query += f".contents | [? name == '{campaign}'] | [0]"
if openmlid is not None:
query += f".contents | [? name == '{openmlid}'] | [0]"

elif openmlid is not None:
raise ValueError("openmlid can be only set if both a workflow and a campaign are given.")

elif campaign is not None or openmlid is not None:
raise ValueError("campaign or openmlid can be only set if a workflow is given.")

return jmespath.compile(f"{query}.folderid").search(self.content)

def _create_folder(self, parent_folder_id, name):
response = requests.get(
f"https://api.pcloud.com/createfolder?code={self.repo_code}&auth={self.token}&folderid={parent_folder_id}&name={name}"
).json()
if response is None:
raise ValueError(f"Could not create folder '{name}', received no response")
if "result" not in response or response["result"] != 0:
raise ValueError(f"Could not create folder '{name}', received invalid response: {response}")
self.update_content()
return response["metadata"]["folderid"]

def add_results(self, campaign, *result_files):
raise NotImplementedError
self.update_content() # make sure that we have the current file structure at pCloud
for result_file in result_files:

# read result file
if result_file.endswith((".gz", ".gzip")):
with gzip.GzipFile(result_file, "rb") as f:
df = pd.read_csv(f)
else:
df = pd.read_csv(result_file)

# decompose this dataframe so that we have results only for a single workflow/openmlid and seeds
for (workflow, openmlid, workflow_seed, valid_seed, test_seed), group in df.groupby(
["m:workflow", "m:openmlid", "m:workflow_seed", "m:valid_seed", "m:test_seed"]
):
name = f"{int(workflow_seed)}-{int(test_seed)}-{int(valid_seed)}.csv.gz"
print(f"Adding results for {workflow}/{campaign}/{openmlid}/{name}")
folder_id = self._get_folder_id(workflow=workflow, campaign=campaign, openmlid=openmlid)

# if the folder does not exist, create one
if folder_id is None:

folder_id_root = self._get_folder_id()

# create workflow folder if necessary
folder_id_workflow = self._get_folder_id(workflow=workflow)
if folder_id_workflow is None:
print(f"create workflow folder {workflow} in folder id {folder_id_root}")
folder_id_workflow = self._create_folder(parent_folder_id=folder_id_root, name=workflow)

# create campaign folder if necessary
folder_id_campaign = self._get_folder_id(workflow=workflow, campaign=campaign)
if folder_id_campaign is None:
print(f"create campaign folder inside {folder_id_workflow}")
folder_id_campaign = self._create_folder(parent_folder_id=folder_id_workflow, name=campaign)

# create dataset folder if necessary
folder_id_dataset = self._get_folder_id(workflow=workflow, campaign=campaign, openmlid=openmlid)
if folder_id_dataset is None:
print(f"create dataset folder inside {folder_id_campaign}")
folder_id = self._create_folder(parent_folder_id=folder_id_campaign, name=openmlid)

# Create a BytesIO object to hold the CSV in binary format
csv_buffer = io.BytesIO()

# Write the DataFrame to the buffer in CSV format, but use StringIO first to handle text conversion
csv_string = df.to_csv(index=False)

# Compress the CSV data using gzip
with gzip.GzipFile(fileobj=csv_buffer, mode='wb') as gz:
gz.write(csv_string.encode('utf-8')) # Compress the CSV string (convert it to bytes first)

# Reset the buffer's position to the beginning
csv_buffer.seek(0)

# upload the file
url = f"https://api.pcloud.com/uploadfile?code={self.repo_code}&auth={self.token}&folderid={folder_id}&filename={name}"
status = requests.post(url, files={'file': (name, csv_buffer, 'application/gzip')}).json()
if not isinstance(status, dict):
raise ValueError(
f"Could not add result. Object received from pCloud should be a dict but is {type(status)}"
)
if status["result"] != 0:
raise ValueError(f"Could not add result. Received an error response from pCloud: {status}")

def get_workflows(self):
return jmespath.compile("metadata.contents[? name == 'data'] | [0] .contents | [*].name").search(self.content)
Expand Down Expand Up @@ -143,7 +255,7 @@ def get_result_files_of_workflow_in_campaign(

def get_result_files_of_workflow(
self,
workflow,
workflow=None,
campaigns=None,
openmlids=None,
workflow_seeds=None,
Expand Down

0 comments on commit 1ebe67b

Please sign in to comment.