Skip to content

Commit

Permalink
[ENH] Load tsc problems from zenodo as a back up to tsc.com (#1531)
Browse files Browse the repository at this point in the history
* zenodo

* add IDs

* add IDs

* add IDs

* add MTSC IDs

* add MTSC IDs
  • Loading branch information
TonyBagnall authored May 17, 2024
1 parent cdf7370 commit 5831a2d
Show file tree
Hide file tree
Showing 2 changed files with 200 additions and 17 deletions.
53 changes: 36 additions & 17 deletions aeon/datasets/_data_loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
get_downloaded_tsc_tsr_datasets,
get_downloaded_tsf_datasets,
)
from aeon.datasets.tsc_datasets import tsc_zenodo
from aeon.datasets.tser_datasets import tser_monash, tser_soton
from aeon.utils.conversion import convert_collection

Expand Down Expand Up @@ -1176,7 +1177,7 @@ def load_regression(
# Check the status code of the response
if response.status != 200:
try_monash = True
except HTTPError:
except (URLError, HTTPError):
# If there is an HTTP it might mean the file does not exist
try_monash = True
else:
Expand Down Expand Up @@ -1344,30 +1345,48 @@ def load_classification(
url = f"https://timeseriesclassification.com/aeon-toolkit/{name}.zip"
# Test if file exists to generate more informative error
req = Request(url, method="HEAD")
msg = (
try_zenodo = False
error_str = (
f"Invalid dataset name ={name} that is not available on extract path "
f"={extract_path}. Nor is it available on "
f"https://timeseriesclassification.com/."
f"https://timeseriesclassification.com/ or zenodo."
)
try:
# Perform the request
response = urlopen(req, timeout=60)
# Check the status code of the response, if 200 incorrect input args
if response.status != 200:
raise ValueError(msg)
except Exception as e:
raise e
try:
_download_and_extract(
url,
extract_path=extract_path,
)
except zipfile.BadZipFile:
raise ValueError(
f"Invalid dataset name ={name} is available on extract path ="
f"{extract_path} or https://timeseriesclassification.com/ but it "
f"is not correctly formatted.",
)
try_zenodo = True
except (URLError, HTTPError):
# If there is an HTTP it might mean the file does not exist
try_zenodo = True
else:
try:
_download_and_extract(
url,
extract_path=extract_path,
)
except zipfile.BadZipFile:
try_zenodo = True
if try_zenodo:
# Try on ZENODO
if name in tsc_zenodo.keys():
id = tsc_zenodo[name]
url_train = f"https://zenodo.org/record/{id}/files/{name}_TRAIN.ts"
url_test = f"https://zenodo.org/record/{id}/files/{name}_TEST.ts"
full_path = os.path.join(path, name)
if not os.path.exists(full_path):
os.makedirs(full_path)
train_save = f"{full_path}/{name}_TRAIN.ts"
test_save = f"{full_path}/{name}_TEST.ts"
try:
urlretrieve(url_train, train_save)
urlretrieve(url_test, test_save)
except Exception:
raise ValueError(error_str)
else:
raise ValueError(error_str)

# Test for discrete version (first suffix _disc), always use that if it exists
dir_name = name
# If there exists a version with _discr, load that
Expand Down
164 changes: 164 additions & 0 deletions aeon/datasets/tsc_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,170 @@
"JapaneseVowels",
"SpokenArabicDigits",
}

# 158 tsml time series classification problems
tsc_zenodo = {
"ACSF1": 11184893,
"Adiac": 11179788,
"AllGestureWiimoteX": 11185036,
"AllGestureWiimoteY": 11185107,
"AllGestureWiimoteZ": 11185136,
"ArrowHead": 11185163,
"Beef": 11185190,
"BeetleFly": 11185218,
"BirdChicken": 11185259,
"BME": 11185291,
"Car": 11185322,
"CBF": 11186181,
"Chinatown": 11186207,
"ChlorineConcentration": 11186229,
"CinCECGTorso": 11186247,
"Coffee": 11186266,
"Computers": 11186293,
"CricketX": 11186304,
"CricketY": 11186320,
"CricketZ": 11186333,
"Crop": 11186344,
"DiatomSizeReduction": 11186365,
"DistalPhalanxOutlineAgeGroup": 11186386,
"DistalPhalanxOutlineCorrect": 11186597,
"DistalPhalanxTW": 11186610,
"DodgerLoopDay": 11186618,
"DodgerLoopGame": 11186628,
"DodgerLoopWeekend": 11186647,
"Earthquakes": 11186659,
"ECG200": 11186675,
"ECG5000": 11186692,
"ECGFiveDays": 11186702,
"ElectricDevices": 11190880,
"EOGHorizontalSignal": 11190930,
"EOGVerticalSignal": 11190951,
"EthanolLevel": 11190985,
"FaceAll": 11191011,
"FaceFour": 11191042,
"FacesUCR": 11191065,
"FiftyWords": 11191097,
"Fish": 11191141,
"FordA": 11191164,
"FordB": 11191172,
"FreezerRegularTrain": 11191184,
"FreezerSmallTrain": 11191211,
"Fungi": 11191230,
"GestureMidAirD1": 11197478,
"GestureMidAirD2": 11197490,
"GestureMidAirD3": 11197504,
"GesturePebbleZ1": 11197515,
"GesturePebbleZ2": 11197520,
"GunPoint": 11191244,
"GunPointAgeSpan": 11194425,
"GunPointMaleVersusFemale": 11194429,
"GunPointOldVersusYoung": 11194437,
"Ham": 11197526,
"HandOutlines": 11197528,
"Haptics": 11197538,
"Herring": 11197540,
"HouseTwenty": 11197555,
"InlineSkate": 11197575,
"InsectEPGRegularTrain": 11197587,
"InsectEPGSmallTrain": 11197608,
"InsectWingbeatSound": 11197635,
"ItalyPowerDemand": 11197656,
"LargeKitchenAppliances": 11197689,
"Lightning2": 11197697,
"Lightning7": 11197706,
"Mallat": 11197731,
"Meat": 11197742,
"MedicalImages": 11197752,
"MelbournePedestrian": 11197762,
"MiddlePhalanxOutlineAgeGroup": 11197771,
"MiddlePhalanxOutlineCorrect": 11197782,
"MiddlePhalanxTW": 11197799,
"MixedShapesRegularTrain": 11197803,
"MixedShapesSmallTrain": 11197811,
"MoteStrain": 11197817,
"NonInvasiveFetalECGThorax1": 11197817,
"NonInvasiveFetalECGThorax2": 11197831,
"OliveOil": 11197843,
"OSULeaf": 11197848,
"PhalangesOutlinesCorrect": 11197875,
"Phoneme": 11197891,
"PickupGestureWiimoteZ": 11197898,
"PigAirwayPressure": 11197911,
"PigArtPressure": 11197920,
"PigCVP": 11197924,
"PLAID": 11197936,
"Plane": 11197940,
"PowerCons": 11197948,
"ProximalPhalanxOutlineAgeGroup": 11197960,
"ProximalPhalanxOutlineCorrect": 11197968,
"ProximalPhalanxTW": 11197973,
"RefrigerationDevices": 11197996,
"Rock": 11198001,
"ScreenType": 11198182,
"SemgHandGenderCh2": 11198193,
"SemgHandMovementCh2": 11198197,
"SemgHandSubjectCh2": 11198203,
"ShakeGestureWiimoteZ": 11198219,
"ShapeletSim": 11198235,
"ShapesAll": 11198237,
"SmallKitchenAppliances": 11198251,
"SmoothSubspace": 11198271,
"SonyAIBORobotSurface1": 11198277,
"SonyAIBORobotSurface2": 11198290,
"StarLightCurves": 11198308,
"Strawberry": 11198313,
"SwedishLeaf": 11198315,
"Symbols": 11198322,
"SyntheticControl": 11198330,
"ToeSegmentation1": 11198338,
"ToeSegmentation2": 11198342,
"Trace": 11198344,
"TwoLeadECG": 11198352,
"TwoPatterns": 11198356,
"UMD": 11198362,
"UWaveGestureLibraryAll": 11198366,
"UWaveGestureLibraryX": 11198374,
"UWaveGestureLibraryY": 11198382,
"UWaveGestureLibraryZ": 11198384,
"Wafer": 11198387,
"Wine": 11198391,
"WordSynonyms": 11198396,
"Worms": 11198402,
"WormsTwoClass": 11198406,
"Yoga": 11198408,
"ArticularyWordRecognition": 11204924,
"AtrialFibrillation": 11206175,
"BasicMotions": 11206179,
"CharacterTrajectories": 11206183,
"Cricket": 11206185,
"DuckDuckGeese": 11206189,
"EigenWorms": 11206196,
"Epilepsy": 11206204,
"EthanolConcentration": 11206212,
"ERing": 11206210,
"FaceDetection": 11206216,
"FingerMovements": 11206220,
"HandMovementDirection": 11206224,
"Handwriting": 11206227,
"Heartbeat": 11206229,
"InsectWingbeat": 11206234,
"JapaneseVowels": 11206237,
"Libras": 11206239,
"LSST": 11206243,
"MotorImagery": 11206246,
"NATOPS": 11206248,
"PenDigits": 11206259,
"PEMS-SF": 11206252,
"PhonemeSpectra": 11206261,
"RacketSports": 11206263,
"SelfRegulationSCP1": 11206265,
"SelfRegulationSCP2": 11206269,
"SpokenArabicDigits": 11206274,
"StandWalkJump": 11206278,
"UWaveGestureLibrary": 11206282,
}


# TODO: Add the following datasets:
# 30 new univariate classification problems used in the bake off [5]. Some are new,
# some are discrete versions of regression problems, some are equal length versions
Expand Down

0 comments on commit 5831a2d

Please sign in to comment.