diff --git a/erna/automatic_processing/database_utils.py b/erna/automatic_processing/database_utils.py index 79d65db..1f9bcc2 100644 --- a/erna/automatic_processing/database_utils.py +++ b/erna/automatic_processing/database_utils.py @@ -3,7 +3,7 @@ import os from tqdm import tqdm -from .database import( +from .database import ( RawDataFile, DrsFile, Job, ProcessingState, Jar, XML, requires_database_connection @@ -34,7 +34,6 @@ def fill_data_runs(df, database): }, inplace=True, ) - df.drop('fDrsStep', axis=1, inplace=True) with database.atomic(): query = ( RawDataFile @@ -182,11 +181,18 @@ def insert_new_job( @requires_database_connection def insert_new_jobs(raw_data_files, jar, xml, queue, progress=True, **kwargs): + failed_files = [] for f in tqdm(raw_data_files, total=raw_data_files.count(), disable=not progress): try: insert_new_job(f, jar=jar, xml=xml, queue=queue, **kwargs) except peewee.IntegrityError: - log.warning('Job already submitted: {} {}'.format(f.night, f.run_id)) + log.warning('Job already submitted: {}_{:03d}'.format(f.night, f.run_id)) + except ValueError as e: + log.warning('Could not submit {}_{:03d}: {}'.format( + f.night, f.run_id, e, + )) + failed_files.append(f) + return failed_files @requires_database_connection diff --git a/erna/scripts/fill_database.py b/erna/scripts/fill_database.py index 94c1853..8228146 100644 --- a/erna/scripts/fill_database.py +++ b/erna/scripts/fill_database.py @@ -56,9 +56,23 @@ def main(start, end, config): runs['fNight'] = pd.to_datetime(runs.fNight.astype(str), format='%Y%m%d') # fill all non drs runs into raw_data_files - fill_data_runs(runs.query('fDrsStep != 2'), database=database) + data_runs = runs.query('fDrsStep != 2').drop('fDrsStep', axis=1) + nan_entries = data_runs.isnull().any(axis=1) + if len(data_runs[nan_entries]) != 0: + print('Found invalid entries, skipping:') + print(data_runs[nan_entries]) + data_runs.dropna(inplace=True) + + fill_data_runs(data_runs, database=database) + # fill all drs runs into drs_files - fill_drs_runs(runs.query('(fRunTypeKey == 2) & (fDrsStep == 2)'), database=database) + drs_runs = runs.query('(fRunTypeKey == 2) & (fDrsStep == 2)') + nan_entries = drs_runs.isnull().any(axis=1) + if len(drs_runs[nan_entries]) != 0: + print('Found invalid entries, skipping:') + print(drs_runs[nan_entries]) + drs_runs.dropna(inplace=True) + fill_drs_runs(drs_runs, database=database) database.close()