Skip to content

Commit

Permalink
Merge pull request #46 from fact-project/fix_nans
Browse files Browse the repository at this point in the history
Do not add runs with nans to the erna database
  • Loading branch information
maxnoe authored Oct 23, 2017
2 parents 7155096 + ccff5cc commit 234da34
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 5 deletions.
12 changes: 9 additions & 3 deletions erna/automatic_processing/database_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import os
from tqdm import tqdm

from .database import(
from .database import (
RawDataFile, DrsFile, Job,
ProcessingState, Jar, XML,
requires_database_connection
Expand Down Expand Up @@ -34,7 +34,6 @@ def fill_data_runs(df, database):
},
inplace=True,
)
df.drop('fDrsStep', axis=1, inplace=True)
with database.atomic():
query = (
RawDataFile
Expand Down Expand Up @@ -182,11 +181,18 @@ def insert_new_job(
@requires_database_connection
def insert_new_jobs(raw_data_files, jar, xml, queue, progress=True, **kwargs):

failed_files = []
for f in tqdm(raw_data_files, total=raw_data_files.count(), disable=not progress):
try:
insert_new_job(f, jar=jar, xml=xml, queue=queue, **kwargs)
except peewee.IntegrityError:
log.warning('Job already submitted: {} {}'.format(f.night, f.run_id))
log.warning('Job already submitted: {}_{:03d}'.format(f.night, f.run_id))
except ValueError as e:
log.warning('Could not submit {}_{:03d}: {}'.format(
f.night, f.run_id, e,
))
failed_files.append(f)
return failed_files


@requires_database_connection
Expand Down
18 changes: 16 additions & 2 deletions erna/scripts/fill_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,23 @@ def main(start, end, config):
runs['fNight'] = pd.to_datetime(runs.fNight.astype(str), format='%Y%m%d')

# fill all non drs runs into raw_data_files
fill_data_runs(runs.query('fDrsStep != 2'), database=database)
data_runs = runs.query('fDrsStep != 2').drop('fDrsStep', axis=1)
nan_entries = data_runs.isnull().any(axis=1)
if len(data_runs[nan_entries]) != 0:
print('Found invalid entries, skipping:')
print(data_runs[nan_entries])
data_runs.dropna(inplace=True)

fill_data_runs(data_runs, database=database)

# fill all drs runs into drs_files
fill_drs_runs(runs.query('(fRunTypeKey == 2) & (fDrsStep == 2)'), database=database)
drs_runs = runs.query('(fRunTypeKey == 2) & (fDrsStep == 2)')
nan_entries = drs_runs.isnull().any(axis=1)
if len(drs_runs[nan_entries]) != 0:
print('Found invalid entries, skipping:')
print(drs_runs[nan_entries])
drs_runs.dropna(inplace=True)
fill_drs_runs(drs_runs, database=database)

database.close()

Expand Down

0 comments on commit 234da34

Please sign in to comment.