Skip to content

Commit

Permalink
debug method fixed, pcloud repo updated
Browse files Browse the repository at this point in the history
  • Loading branch information
Andreas Paraskeva committed Dec 5, 2024
1 parent 2df8cfd commit d0ac699
Show file tree
Hide file tree
Showing 6 changed files with 57 additions and 59 deletions.
2 changes: 0 additions & 2 deletions publications/2023-neurips/lcdb/analysis/debug.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@

import pandas as pd
import gzip
from . import deserialize_dataframe


class TracebackExtractor:

Expand Down
23 changes: 10 additions & 13 deletions publications/2023-neurips/lcdb/db/_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
import pathlib
import re
import trace
import pandas as pd
from lcdb.db._repository import Repository
from lcdb.db._util import get_path_to_lcdb, CountAwareGenerator
Expand Down Expand Up @@ -45,7 +46,7 @@ def create(self, config=None):

# create default config file
default_config = {"repositories": {
"official": "pcloud://kZK9f70Zxwwjkt54zA8FY6kBUFB5PXoAYT9k",
"official": "pcloud://kZeWywZRr6lScWSloHlzwk6Uxq3GyRtuBaX",
"local": ".lcdb/data"}
}
if config is not None:
Expand Down Expand Up @@ -248,30 +249,26 @@ def generator():
if "m:traceback" in df.columns:
traceback_rows = df[df["m:traceback"].notna()]

# extract corresponding configuration parameters
if not traceback_rows.empty:
# print(traceback_rows)
for index, traceback_row in traceback_rows.iterrows():
traceback_str = traceback_row["m:traceback"]
traceback_frame = traceback_row.to_frame().T
traceback_indices = traceback_rows.index.tolist()
config_cols = [c for c in df.columns if c.startswith("p:")]
# corresponding_configs = df.loc[traceback_rows.index]
# configs.append(corresponding_configs)
corresponding_configs_reset = df.loc[traceback_indices, config_cols].drop_duplicates().reset_index(drop=True)
config_cols = [c for c in traceback_frame.columns if c.startswith("p:")]
corresponding_configs_reset = traceback_rows.loc[traceback_indices, config_cols].drop_duplicates().reset_index(drop=True)
configs.append(corresponding_configs_reset)

tracebacks.append(traceback_rows["m:traceback"])

# extract errors from traceback messages str format first
traceback_str = str(traceback_rows["m:traceback"].iloc[0])
try:
error_message = re.search(r'(\w+Error): (.*)', traceback_str).group(0)
except:
error_message = traceback_str
tracebacks.append(traceback_str)
errors.append(error_message)

else:
print("Error: no traceback column in dataframe")

return {
"configs": pd.concat(configs, ignore_index=True) if configs else None,
"tracebacks": pd.concat(tracebacks, ignore_index=True) if tracebacks else None,
"tracebacks": pd.Series(tracebacks) if tracebacks else None,
"errors": pd.Series(errors) if errors else None
}
11 changes: 6 additions & 5 deletions publications/2023-neurips/lcdb/db/_pcloud_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ def __init__(self, repo_code, token=None):
self.update_content()

def update_content(self):
self.content = requests.get(f"https://api.pcloud.com/showpublink?code={self.repo_code}").json()
self.content = requests.get(f"https://eapi.pcloud.com/showpublink?code={self.repo_code}").json()


def exists(self):
return self.content is not None and len(self.content) > 0
Expand All @@ -41,7 +42,7 @@ def authenticate(self, username, password, device="lcdbclient", authexpire=300):
:param authexpire: time in seconds after which the received token will expire
:return:
"""
url = f"https://api.pcloud.com/userinfo?getauth=1&logout=1&device={device}&authexpire={authexpire}"
url = f"https://eapi.pcloud.com/userinfo?getauth=1&logout=1&device={device}&authexpire={authexpire}"
response = requests.post(url, {
"username": username,
"password": password
Expand All @@ -53,7 +54,7 @@ def authenticate(self, username, password, device="lcdbclient", authexpire=300):
def read_result_file(self, file, usecols=None):

# get download link
response = requests.get(f"https://api.pcloud.com/getpublinkdownload?code={self.repo_code}&fileid={file}").json()
response = requests.get(f"https://eapi.pcloud.com/getpublinkdownload?code={self.repo_code}&fileid={file}").json()
download_link = "https://" + response["hosts"][0] + response["path"]

# download file
Expand Down Expand Up @@ -100,7 +101,7 @@ def _get_folder_id(self, workflow=None, campaign=None, openmlid=None):

def _create_folder(self, parent_folder_id, name):
response = requests.get(
f"https://api.pcloud.com/createfolder?code={self.repo_code}&auth={self.token}&folderid={parent_folder_id}&name={name}"
f"https://eapi.pcloud.com/createfolder?code={self.repo_code}&auth={self.token}&folderid={parent_folder_id}&name={name}"
).json()
if response is None:
raise ValueError(f"Could not create folder '{name}', received no response")
Expand Down Expand Up @@ -165,7 +166,7 @@ def add_results(self, campaign, *result_files):
csv_buffer.seek(0)

# upload the file
url = f"https://api.pcloud.com/uploadfile?code={self.repo_code}&auth={self.token}&folderid={folder_id}&filename={name}"
url = f"https://eapi.pcloud.com/uploadfile?code={self.repo_code}&auth={self.token}&folderid={folder_id}&filename={name}"
status = requests.post(url, files={'file': (name, csv_buffer, 'application/gzip')}).json()
if not isinstance(status, dict):
raise ValueError(
Expand Down
10 changes: 5 additions & 5 deletions publications/2023-neurips/use cases/1 - lc crossing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:17<00:00, 3.50s/it]\n"
"0it [00:00, ?it/s]\n"
]
}
],
Expand All @@ -38,7 +38,7 @@
" \"lcdb.workflow.sklearn.TreesEnsembleWorkflow\"\n",
"]\n",
"\n",
"openmlid = 11\n",
"openmlid = 12\n",
"workflow = workflows[1]\n",
"\n",
"# retrieve learning curve objects\n",
Expand Down Expand Up @@ -693,9 +693,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python (LCDB)",
"display_name": "lcdb-dev",
"language": "python",
"name": "lcdb"
"name": "python3"
},
"language_info": {
"codemirror_mode": {
Expand All @@ -707,7 +707,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.5"
"version": "3.10.15"
}
},
"nbformat": 4,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,18 @@
"name": "stderr",
"output_type": "stream",
"text": [
"100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 25/25 [00:38<00:00, 1.54s/it]\n"
"0it [00:00, ?it/s]\n"
]
},
{
"ename": "AttributeError",
"evalue": "'NoneType' object has no attribute 'columns'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[2], line 27\u001b[0m\n\u001b[1;32m 13\u001b[0m df \u001b[38;5;241m=\u001b[39m lcdb\u001b[38;5;241m.\u001b[39mquery(\n\u001b[1;32m 14\u001b[0m openmlids\u001b[38;5;241m=\u001b[39m[openmlid],\n\u001b[1;32m 15\u001b[0m workflows\u001b[38;5;241m=\u001b[39m[workflow],\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 23\u001b[0m show_progress\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m 24\u001b[0m )\n\u001b[1;32m 26\u001b[0m \u001b[38;5;66;03m# group by configurations\u001b[39;00m\n\u001b[0;32m---> 27\u001b[0m config_cols \u001b[38;5;241m=\u001b[39m [c \u001b[38;5;28;01mfor\u001b[39;00m c \u001b[38;5;129;01min\u001b[39;00m \u001b[43mdf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcolumns\u001b[49m \u001b[38;5;28;01mif\u001b[39;00m c\u001b[38;5;241m.\u001b[39mstartswith(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mp:\u001b[39m\u001b[38;5;124m\"\u001b[39m)]\n\u001b[1;32m 28\u001b[0m df \u001b[38;5;241m=\u001b[39m df\u001b[38;5;241m.\u001b[39mgroupby(config_cols)\u001b[38;5;241m.\u001b[39magg({\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlearning_curve\u001b[39m\u001b[38;5;124m\"\u001b[39m: merge_curves})\n",
"\u001b[0;31mAttributeError\u001b[0m: 'NoneType' object has no attribute 'columns'"
]
}
],
Expand Down Expand Up @@ -63,7 +74,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"id": "4233002b",
"metadata": {},
"outputs": [],
Expand All @@ -73,7 +84,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"id": "1d542197",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -106,7 +117,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": null,
"id": "1670eeaf",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -140,7 +151,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "lcdb-dev",
"language": "python",
"name": "python3"
},
Expand All @@ -154,7 +165,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
"version": "3.10.15"
}
},
"nbformat": 4,
Expand Down
47 changes: 19 additions & 28 deletions publications/2023-neurips/use cases/6 - oob.ipynb

Large diffs are not rendered by default.

0 comments on commit d0ac699

Please sign in to comment.