Merge pull request #34 from fact-project/convert_theta

Rename columns and convert theta
fact-project · May 19, 2017 · 0fcf917 · 0fcf917
2 parents 4629970 + d10c2d7
commit 0fcf917
Show file tree

Hide file tree

Showing 4 changed files with 63 additions and 31 deletions.
diff --git a/erna/datacheck.py b/erna/datacheck.py
@@ -1,13 +1,12 @@
-from .utils import load_config
 import pandas as pd
 
-
 default_columns = (
     'fNight AS night',
     'fRunID AS run_id',
     'fSourceName AS source',
     'TIMESTAMPDIFF(SECOND, fRunStart, fRunStop) * fEffectiveOn AS ontime',
     'fZenithDistanceMean AS zenith',
+    'fAzimuthMean AS azimuth',
     'fRunStart AS run_start',
     'fRunStop AS run_stop',
 )

diff --git a/erna/hdf_utils.py b/erna/hdf_utils.py
@@ -4,12 +4,43 @@
 from tqdm import tqdm
 import sys
 from fact.io import append_to_h5py, initialize_h5py
+from fact.instrument import camera_distance_mm_to_deg
+import re
+from numpy.lib import recfunctions
+import numpy as np
 
 log = logging.getLogger(__name__)
 
 native_byteorder = {'little': '<', 'big': '>'}[sys.byteorder]
 
 
+theta_columns = tuple(
+    ['theta'] + ['theta_off_{}'.format(i) for i in range(1, 6)]
+)
+
+theta_deg_columns = tuple(
+    ['theta_deg'] + ['theta_deg_off_{}'.format(i) for i in range(1, 6)]
+)
+
+snake_re_1 = re.compile('(.)([A-Z][a-z]+)')
+snake_re_2 = re.compile('([a-z0-9])([A-Z])')
+
+
+renames = {'RUNID': 'run_id', 'COGx': 'cog_x', 'COGy': 'cog_y'}
+
+
+def camel2snake(key):
+    ''' see http://stackoverflow.com/a/1176023/3838691 '''
+    s1 = snake_re_1.sub(r'\1_\2', key)
+    s2 = snake_re_2.sub(r'\1_\2', s1).lower().replace('__', '_')
+    s3 = re.sub('^m_', '', s2)
+    return s3.replace('.f_', '_')
+
+
+def rename_columns(columns):
+    return [camel2snake(renames.get(col, col)) for col in columns]
+
+
 def write_fits_to_hdf5(
         outputfile,
         inputfiles,
@@ -28,13 +59,27 @@ def write_fits_to_hdf5(
                 if len(f) < 2:
                     continue
 
+                array = np.array(f[1].data[:])
+
+                # convert all names to snake case
+                array.dtype.names = rename_columns(array.dtype.names)
+
+                # add columns with theta in degrees
+                for in_col, out_col in zip(theta_columns, theta_deg_columns):
+                    if in_col in array.dtype.names:
+                        recfunctions.append_fields(
+                            array,
+                            out_col,
+                            camera_distance_mm_to_deg(array[in_col])
+                        )
+
                 if not initialized:
                     initialize_h5py(
                         hdf_file,
-                        f[1].data.dtype,
+                        array.dtype,
                         key=key,
                         compression=compression,
                     )
                     initialized = True
 
-                append_to_h5py(hdf_file, f[1].data, key=key)
+                append_to_h5py(hdf_file, array, key=key)
diff --git a/erna/scripts/gather_fits.py b/erna/scripts/gather_fits.py
@@ -5,7 +5,7 @@
 import sys
 import os
 import numpy as np
-from fact.io import append_to_h5py, initialize_h5py
+from fact.io import to_h5py
 
 from ..automatic_processing.database import (
     database, Job, RawDataFile, Jar, XML, ProcessingState
@@ -113,36 +113,24 @@ def main(xml_name, ft_version, outputfile, config, start, end, source, datacheck
         len(jobs), jobs.ontime.sum()/3600
     ))
 
-    runs_array = np.core.rec.fromarrays(
-        [
-            successful_jobs['night'],
-            successful_jobs['run_id'],
-            successful_jobs['source'].values.astype('S'),
-            successful_jobs['ontime'],
-            successful_jobs['zenith'],
-            successful_jobs['run_start'].values.astype('S'),
-            successful_jobs['run_stop'].values.astype('S'),
-        ],
-        names=(
-            'night',
-            'run_id',
-            'source',
-            'ontime',
-            'zenith',
-            'run_start',
-            'run_stop',
-        )
-    )
-
     if os.path.isfile(outputfile):
         a = input('Outputfile exists! Overwrite? [y, N]: ')
         if not a.lower().startswith('y'):
             sys.exit()
 
-    with h5py.File(outputfile, 'w') as f:
-        initialize_h5py(f, dtypes=runs_array.dtype, key='runs')
-        append_to_h5py(f, runs_array, key='runs')
+    columns = [
+        'night',
+        'run_id',
+        'source',
+        'ontime',
+        'zenith',
+        'azimuth',
+        'run_start',
+        'run_stop',
+    ]
+    to_h5py(outputfile, successful_jobs[columns], key='runs', mode='w')
 
+    with h5py.File(outputfile, 'a') as f:
         f['runs'].attrs['datacheck'] = ' AND '.join(conditions)
 
     write_fits_to_hdf5(outputfile, successful_jobs.result_file, mode='a')
diff --git a/setup.py b/setup.py
@@ -2,7 +2,7 @@
 
 setup(
     name='erna',
-    version='0.3.0',
+    version='0.4.0',
     description='Easy RuN Access. Tools that help to do batch processing of FACT data',
     url='https://github.com/fact-project/erna',
     author='Kai Brügge, Jens Buss, Maximilian Nöthe',
@@ -26,7 +26,7 @@
         'PyMySQL',          # in anaconda
         'pytz',             # in anaconda
         'tables',           # needs to be installed by pip for some reason
-        'pyfact>=0.9.4',
+        'pyfact>=0.10.5',
         'astropy',
         'h5py',
         # 'hdf5',