From baf798cc302752bd0544dd1b4217842cfdb483e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20N=C3=B6the?= Date: Wed, 10 May 2017 16:07:01 +0200 Subject: [PATCH 1/4] Use datacheck file --- erna/scripts/gather_fits.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/erna/scripts/gather_fits.py b/erna/scripts/gather_fits.py index 3bd4750..bc73bd1 100644 --- a/erna/scripts/gather_fits.py +++ b/erna/scripts/gather_fits.py @@ -95,7 +95,11 @@ def main(xml_name, ft_version, outputfile, config, start, end, source, datacheck 'fSourceName = "{}"'.format(source), ] if datacheck is not None: - conditions.extend(datacheck_conditions[datacheck]) + if os.path.isfile(datacheck): + with open(datacheck, 'r') as f: + conditions.extend(f.read().splitlines()) + else: + conditions.extend(datacheck_conditions[datacheck]) runs = get_runs(fact_db, conditions=conditions).set_index(['night', 'run_id']) jobs = jobs.join(runs, on=['night', 'run_id'], how='inner') From 99e630ecd5ac740dfe3039509d621aa778b02449 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20N=C3=B6the?= Date: Wed, 10 May 2017 16:34:43 +0200 Subject: [PATCH 2/4] Fix datacheck fi --- erna/scripts/gather_fits.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/erna/scripts/gather_fits.py b/erna/scripts/gather_fits.py index bc73bd1..52a7544 100644 --- a/erna/scripts/gather_fits.py +++ b/erna/scripts/gather_fits.py @@ -46,11 +46,12 @@ def main(xml_name, ft_version, outputfile, config, start, end, source, datacheck database.init(**config['processing_database']) database.connect() - if datacheck is not None and datacheck not in datacheck_conditions: - print('Conditions must be any of: ') - for key in datacheck_conditions: - print(key) - sys.exit(1) + if datacheck is not None: + if not (datacheck in datacheck_conditions or os.path.isfile(datacheck)): + print('Conditions must be a file or any of: ') + for key in datacheck_conditions: + print(key) + sys.exit(1) processing_db = create_mysql_engine(**config['processing_database']) fact_db = create_mysql_engine(**config['fact_database']) From 606f562057c9c8beac844d87ad51291f21f69e5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20N=C3=B6the?= Date: Wed, 10 May 2017 16:35:06 +0200 Subject: [PATCH 3/4] List available xmls and ft versions if wrong --- erna/scripts/gather_fits.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/erna/scripts/gather_fits.py b/erna/scripts/gather_fits.py index 52a7544..b8dd3f3 100644 --- a/erna/scripts/gather_fits.py +++ b/erna/scripts/gather_fits.py @@ -56,14 +56,26 @@ def main(xml_name, ft_version, outputfile, config, start, end, source, datacheck processing_db = create_mysql_engine(**config['processing_database']) fact_db = create_mysql_engine(**config['fact_database']) - jar = ( - Jar - .select(Jar.id, Jar.version) - .where(Jar.version == ft_version) - .get() - ) - - xml = XML.get(jar=jar, name=xml_name) + try: + jar = ( + Jar + .select(Jar.id, Jar.version) + .where(Jar.version == ft_version) + .get() + ) + except Jar.DoesNotExist: + print('FACT-Tools version not found, avaliable jars are') + for jar in Jar.select(Jar.version): + print(jar.version) + sys.exit(1) + + try: + xml = XML.get(jar=jar, name=xml_name) + except XML.DoesNotExist: + print('XML not found, avaliable xmls are:') + for xml in XML.select(XML.name).join(Jar).where(Jar.version == ft_version): + print(xml.name) + sys.exit(1) job_query = ( Job From b3e28d533e22a066911543cc14ffa7fef85fe314 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20N=C3=B6the?= Date: Wed, 10 May 2017 22:28:44 +0200 Subject: [PATCH 4/4] Add runlist option to gather fits --- erna/scripts/gather_fits.py | 46 ++++++++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 8 deletions(-) diff --git a/erna/scripts/gather_fits.py b/erna/scripts/gather_fits.py index b8dd3f3..ec13cce 100644 --- a/erna/scripts/gather_fits.py +++ b/erna/scripts/gather_fits.py @@ -28,8 +28,9 @@ @click.option('--end', '-e', help='Last night to get data from') @click.option('--source', default='Crab') @click.option('--datacheck', help='The name of a condition set for the datacheck') +@click.option('--runlist', help='A csv file with columns night, run_id, the runs to get') @click.option('-r', '--run-type', default='data', help='The runtype to consider') -def main(xml_name, ft_version, outputfile, config, start, end, source, datacheck, run_type): +def main(xml_name, ft_version, outputfile, config, start, end, source, datacheck, runlist, run_type): ''' Gather the fits outputfiles of the erna automatic processing into a hdf5 file. The hdf5 file is written using h5py and contains the level 2 features in the @@ -46,6 +47,10 @@ def main(xml_name, ft_version, outputfile, config, start, end, source, datacheck database.init(**config['processing_database']) database.connect() + if datacheck and runlist: + print('Only one of datacheck or runlist allowed') + sys.exit(1) + if datacheck is not None: if not (datacheck in datacheck_conditions or os.path.isfile(datacheck)): print('Conditions must be a file or any of: ') @@ -102,11 +107,19 @@ def main(xml_name, ft_version, outputfile, config, start, end, source, datacheck sql, params = job_query.sql() jobs = pd.read_sql_query(sql, processing_db, params=params) - conditions = [ - 'fNight <= {}'.format(jobs.night.max()), - 'fNight >= {}'.format(jobs.night.min()), - 'fSourceName = "{}"'.format(source), - ] + if runlist is None: + conditions = [ + 'fNight <= {}'.format(jobs.night.max()), + 'fNight >= {}'.format(jobs.night.min()), + 'fSourceName = "{}"'.format(source), + ] + else: + wanted_runs = pd.read_csv(runlist) + conditions = [ + 'fNight <= {}'.format(wanted_runs.night.max()), + 'fNight >= {}'.format(wanted_runs.night.min()), + ] + if datacheck is not None: if os.path.isfile(datacheck): with open(datacheck, 'r') as f: @@ -116,13 +129,30 @@ def main(xml_name, ft_version, outputfile, config, start, end, source, datacheck runs = get_runs(fact_db, conditions=conditions).set_index(['night', 'run_id']) jobs = jobs.join(runs, on=['night', 'run_id'], how='inner') - successful_jobs = jobs.query('status == "success"') + if runlist is not None: + jobs = wanted_runs.join( + jobs.set_index(['night', 'run_id']), + on=['night', 'run_id'], + how='inner', + ) + + successful_jobs = jobs.query('status == "success"') total = len(jobs) successful = len(successful_jobs) + + if runlist is not None: + if len(wanted_runs) != successful: + click.confirm( + 'Only {} of {} runs available, continue?:'.format( + total, len(wanted_runs) + ), + abort=True, + ) + if total != successful: click.confirm( - 'Only {} of {} jobs finished, continue? [y, N] :'.format(successful, total), + 'Only {} of {} jobs finished, continue?'.format(successful, total), abort=True, )