From b87f26338dd43f20a301075f2c909edeb192a4e4 Mon Sep 17 00:00:00 2001 From: Dominik Neise Date: Thu, 24 Aug 2017 09:23:28 +0200 Subject: [PATCH 01/18] bump version --- fact/VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fact/VERSION b/fact/VERSION index ac454c6..34a8361 100644 --- a/fact/VERSION +++ b/fact/VERSION @@ -1 +1 @@ -0.12.0 +0.12.1 From 174a70dedeb91e48f67e9ebdfce0b34fa477ca8c Mon Sep 17 00:00:00 2001 From: Dominik Neise Date: Thu, 24 Aug 2017 09:24:03 +0200 Subject: [PATCH 02/18] initial commit path utils --- fact/path/__init__.py | 104 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 fact/path/__init__.py diff --git a/fact/path/__init__.py b/fact/path/__init__.py new file mode 100644 index 0000000..6d7deef --- /dev/null +++ b/fact/path/__init__.py @@ -0,0 +1,104 @@ +import os.path +import re +from collections import namedtuple + + +class TemplateToPath: + '''Turn a template like '/fac/raw/{Y}/{M}/{D}/{N}_{R}.fits.fz' into a path. + ''' + + def __init__(self, template): + self.template = template + + def __call__(self, night, run=None, **kwargs): + night = str(night) + d = dict(**kwargs) + d['N'] = night + d['Y'] = night[0:4] + d['M'] = night[4:6] + d['D'] = night[6:8] + if run is not None: + run = '{:03d}'.format(int(run)) + d['R'] = run + return self.template.format(**d) + + +def run2tree_path(base_dir, suffix, night, run=None): + '''Make a tree_path from a run + + base_dir: eg. '/fact/raw' or '/fact/aux' + suffix: eg. '.fits.fz' or '.log' or '.AUX_FOO.fits' + night: eg. 20160101 or '20160101' (int or string accepted) + run: eg. 11 or '011' or None (int, string or None accepted) + + output: + eg. '/fact/raw/2016/01/01/20160101_011.fits.fz' or + '/fact/raw/2016/01/01/20160101.log' + + ''' + if run is not None: + base_name = '{N}_{R}' + else: + base_name = '{N}' + + template = os.path.join( + base_dir, + '{Y}', + '{M}', + '{D}', + base_name + suffix) + return TemplateToPath(template)(night, run) + + +class TreePath: + '''Convenience class for run2tree_path() for people who don't like partials + ''' + def __init__(self, base_dir, suffix): + self.base_dir = base_dir + self.suffix = suffix + + def __call__(self, night, run=None): + return run2tree_path(self.base_dir, self.suffix, night, run) + +path_regex = re.compile( + r'(?P.*?)' + + r'((/\d{4})(/\d{2})(/\d{2}))?/' + + r'(?P\d{8})' + + r'(_?(?P\d{3}))?' + + r'(?P.*)' +) + + +def parse(path): + '''return a dict with relevant parts of the path + for input paths like these: + '/fact/raw/2016/01/01/20160101_011.fits.fz', + '/fact/aux/2016/01/01/20160101.FSC_CONTROL_TEMPERATURE.fits', + '/fact/aux/2016/01/01/20160101.log', + '/home/guest/tbretz/gainanalysis.20130725/files/fit_bt2b/20140115_079_079.root' + + it returns dicts like these: + + {'prefix': '/fact/raw', + 'night': 20160101, + 'run': 011, + 'suffix': '.fits.fz'} + {'prefix': '/fact/aux', + 'night': 20160101, + 'run': None, + 'suffix': '.FSC_CONTROL_TEMPERATURE.fits'} + {'prefix': '/fact/aux', + 'night': 20160101, + 'run': None, + 'suffix': '.log'} + {'prefix': + '/home/guest/tbretz/gainanalysis.20130725/files/fit_bt2b', + 'night': 20140115, + 'run': 079, + 'suffix': '_079.root'} + ''' + d = path_regex.match(path).groupdict() + if d['run'] is not None: + d['run'] = int(d['run']) + d['night'] = int(d['night']) + return d From f0faef45273d22ef2890dcdcad26aa274eb17353 Mon Sep 17 00:00:00 2001 From: Dominik Neise Date: Thu, 24 Aug 2017 09:30:36 +0200 Subject: [PATCH 03/18] add new package --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 6b1e833..810b28f 100644 --- a/setup.py +++ b/setup.py @@ -21,6 +21,7 @@ 'fact.factdb', 'fact.analysis', 'fact.instrument', + 'fact.path', ], package_data={ '': [ From 4d67b095569541694401479c5f9cbff0e69f6ef4 Mon Sep 17 00:00:00 2001 From: Dominik Neise Date: Thu, 24 Aug 2017 10:06:05 +0200 Subject: [PATCH 04/18] add this __all__ list --- fact/path/__init__.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fact/path/__init__.py b/fact/path/__init__.py index 6d7deef..baf56d2 100644 --- a/fact/path/__init__.py +++ b/fact/path/__init__.py @@ -2,6 +2,12 @@ import re from collections import namedtuple +__all__ = [ + 'parse' + 'run2tree_path', + 'TemplateToPath', + 'TreePath', +] class TemplateToPath: '''Turn a template like '/fac/raw/{Y}/{M}/{D}/{N}_{R}.fits.fz' into a path. From 44d6813ba26bf40ad3e00ffc7249510d91b8672a Mon Sep 17 00:00:00 2001 From: Dominik Neise Date: Thu, 24 Aug 2017 10:06:30 +0200 Subject: [PATCH 05/18] whitespace and remove unneeded import --- fact/path/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fact/path/__init__.py b/fact/path/__init__.py index baf56d2..91f6961 100644 --- a/fact/path/__init__.py +++ b/fact/path/__init__.py @@ -1,6 +1,5 @@ import os.path import re -from collections import namedtuple __all__ = [ 'parse' @@ -9,6 +8,7 @@ 'TreePath', ] + class TemplateToPath: '''Turn a template like '/fac/raw/{Y}/{M}/{D}/{N}_{R}.fits.fz' into a path. ''' From 8d1f7d6bfd95084d2e29d02a67fffc025d9bd7cc Mon Sep 17 00:00:00 2001 From: Dominik Neise Date: Thu, 24 Aug 2017 10:08:02 +0200 Subject: [PATCH 06/18] initial commit of path examples --- examples/path_utils.ipynb | 297 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 297 insertions(+) create mode 100644 examples/path_utils.ipynb diff --git a/examples/path_utils.ipynb b/examples/path_utils.ipynb new file mode 100644 index 0000000..39f0d23 --- /dev/null +++ b/examples/path_utils.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## fact.path Examples\n", + "\n", + "# path deconstruction\n", + "\n", + "Sometimes one wants to iterate over a bunch of file paths and get the (night, run) integer tuple from the path. Often in order to retrieve information for each file from the RunInfo DB. \n", + "\n", + "Often the paths we get from something like:\n", + "\n", + " paths = glob('/fact/raw/*/*/*/*')\n", + " \n", + "Below I have defined a couple of example paths, which I want to deconstruct.\n", + "Note that not all of the `paths_for_parsing` contain the typical \"yyyy/mm/dd\" part.\n", + "Still the `night` and `run` are found just fine. " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import fact.path\n", + "\n", + "paths_for_parsing = [\n", + " '/fact/raw/2016/01/01/20160101_011.fits.fz',\n", + " '/fact/aux/2016/01/01/20160101.FSC_CONTROL_TEMPERATURE.fits',\n", + " '/fact/aux/2016/01/01/20160101.log',\n", + " '/home/guest/tbretz/gainanalysis.20130725/files/fit_bt2b/20140115_079_079.root'\n", + "]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/fact/raw/2016/01/01/20160101_011.fits.fz\n", + "{'prefix': '/fact/raw', 'night': 20160101, 'run': 11, 'suffix': '.fits.fz'}\n", + "\n", + "/fact/aux/2016/01/01/20160101.FSC_CONTROL_TEMPERATURE.fits\n", + "{'prefix': '/fact/aux', 'night': 20160101, 'run': None, 'suffix': '.FSC_CONTROL_TEMPERATURE.fits'}\n", + "\n", + "/fact/aux/2016/01/01/20160101.log\n", + "{'prefix': '/fact/aux', 'night': 20160101, 'run': None, 'suffix': '.log'}\n", + "\n", + "/home/guest/tbretz/gainanalysis.20130725/files/fit_bt2b/20140115_079_079.root\n", + "{'prefix': '/home/guest/tbretz/gainanalysis.20130725/files/fit_bt2b', 'night': 20140115, 'run': 79, 'suffix': '_079.root'}\n", + "\n" + ] + } + ], + "source": [ + "for path in paths_for_parsing:\n", + " print(path)\n", + " print(fact.path.parse(path))\n", + " print()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The slowest run took 6.27 times longer than the fastest. This could mean that an intermediate result is being cached.\n", + "100000 loops, best of 3: 3.05 µs per loop\n" + ] + } + ], + "source": [ + "%timeit fact.path.parse(paths_for_parsing[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Parsing is quicker than 10µs, but at the moment we have in the order of 250k runs, so parsing all paths in the raw folder might take as long as 2.5 seconds.\n", + "\n", + "However, usually `glob` is taking much longer to actually get all the paths in the first place, so speed should not be an issue.\n", + "\n", + "----\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Path construction\n", + "\n", + "Equally often, people already have runs from the RunInfo DB, and want to find the according files. Be it raw files or aux-files or other files, that happen to sit in a similar tree-like directory structure, like for example the photon-stream files.\n", + "\n", + "the typical task starts with the (night, run) tuple and wants to create a path like\n", + "\"/gpfs0/fact/processing/photon-stream/yyyy/mm/dd/night_run.phs.jsonl.gz\"\n", + "\n", + "Or similar." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from fact.path import TreePath\n", + "\n", + "night_run_tuples = [\n", + " (20160101, 1),\n", + " (20160101, 2),\n", + " (20130506, 3),\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "photon_stream_path = TreePath(\n", + " base_dir='/gpfs0/fact/processing/photon-stream',\n", + " suffix='.phs.jsonl.gz'\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/gpfs0/fact/processing/photon-stream/2016/01/01/20160101_001.phs.jsonl.gz\n", + "/gpfs0/fact/processing/photon-stream/2016/01/01/20160101_002.phs.jsonl.gz\n", + "/gpfs0/fact/processing/photon-stream/2013/05/06/20130506_003.phs.jsonl.gz\n" + ] + } + ], + "source": [ + "for night, run in night_run_tuples:\n", + " print(photon_stream_path(night, run))" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/fact/aux/2016/01/01/20160101.FSC_CONTROL_TEMPERATURE.fits\n", + "/fact/aux/2016/01/01/20160101.FSC_CONTROL_TEMPERATURE.fits\n", + "/fact/aux/2013/05/06/20130506.FSC_CONTROL_TEMPERATURE.fits\n" + ] + } + ], + "source": [ + "aux_path = TreePath('/fact/aux', '.FSC_CONTROL_TEMPERATURE.fits')\n", + "for night, run in night_run_tuples:\n", + " print(aux_path(night))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "But what about more special cases? I sometime copy files from ISDC or La Palma to my machine in order to work with them locally and try something out. In the past I often did not bother to recreate the yyyy/mm/dd file structure, since I copied the files e.g. like this:\n", + "\n", + " scp isdc:/fact/aux/*/*/*/*.FSC_CONTROL_TEMPERATURE.fits ~/fact/aux_toy/.\n", + " \n", + "In this case I cannot make use of the `TreePath` thing, so I have to roll my own solution again?\n", + "\n", + "Nope! We have you covered. Assume you have a quite sepcialized path format like e.g. this:\n", + "\n", + " '/home/guest/tbretz/gainanalysis.20130725/files/fit_bt2b/20140115_079_079.root'\n", + "\n", + " * yyyy/mm/dd tree structure missing, and \n", + " * file name contains **two** not one run id.\n", + " \n", + "Just define a template for this filename, e.g. like this:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home/guest/tbretz/gainanalysis.20130725/files/fit_bt2b/20160101_001_001.root\n", + "/home/guest/tbretz/gainanalysis.20130725/files/fit_bt2b/20160101_002_002.root\n", + "/home/guest/tbretz/gainanalysis.20130725/files/fit_bt2b/20130506_003_003.root\n" + ] + } + ], + "source": [ + "from fact.path import TemplateToPath\n", + "\n", + "single_pe_path = TemplateToPath(\n", + " '/home/guest/tbretz/gainanalysis.20130725/files/fit_bt2b/{N}_{R}_{R}.root'\n", + ")\n", + "\n", + "for night, run in night_run_tuples:\n", + " print(single_pe_path(night, run))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Okay but what if the 2nd run id is not always the same as the first?\n", + "\n", + "In that case you'll have to type a bit more:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home/guest/tbretz/gainanalysis.20130725/files/fit_bt2b/20160101_001_003.root\n", + "/home/guest/tbretz/gainanalysis.20130725/files/fit_bt2b/20160101_002_004.root\n", + "/home/guest/tbretz/gainanalysis.20130725/files/fit_bt2b/20130506_003_005.root\n" + ] + } + ], + "source": [ + "single_pe_path_2runs = TemplateToPath(\n", + " '/home/guest/tbretz/gainanalysis.20130725/files/fit_bt2b/{N}_{R}_{run2:03d}.root'\n", + ")\n", + "\n", + "for night, run in night_run_tuples:\n", + " print(single_pe_path_2runs(night, run, run2=run+2))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 7a7698f8db35fe1294c53412e7eac3d6887a96ec Mon Sep 17 00:00:00 2001 From: Dominik Neise Date: Thu, 24 Aug 2017 10:09:31 +0200 Subject: [PATCH 07/18] ignore ipynb checkpoints --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 2e7d6f4..ab2d61b 100644 --- a/.gitignore +++ b/.gitignore @@ -56,3 +56,6 @@ docs/_build/ # PyBuilder target/ + +# Jupyter Notebook +.ipynb_checkpoints From 01569e1d7e5236981a4546df64ff609127a2ef86 Mon Sep 17 00:00:00 2001 From: Dominik Neise Date: Thu, 24 Aug 2017 10:35:08 +0200 Subject: [PATCH 08/18] show how AuxServer might use the new fact.path.TreePath utility Note: `TreePath.__call__()` eats either ints or strings, no datetime objects yet. Therefore this ugly `.format` call in `read_date` --- fact/auxservices/base.py | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/fact/auxservices/base.py b/fact/auxservices/base.py index 234c4e6..655e0aa 100644 --- a/fact/auxservices/base.py +++ b/fact/auxservices/base.py @@ -1,6 +1,6 @@ from astropy.table import Table from astropy.units import UnitsWarning -import os +from ..path import TreePath import warnings @@ -12,14 +12,9 @@ class AuxService: basename = 'AUX_SERVICE' def __init__(self, auxdir='/fact/aux'): - self.auxdir = auxdir - - @property - def filename_template(self): - return os.path.join( - self.auxdir, '{date:%Y}', '{date:%m}', '{date:%d}', - '{date:%Y%m%d}.' + self.basename + '.fits' - ) + self.path = TreePath( + base_dir=auxdir, + suffix='.' + self.basename + '.fits') @classmethod def read_file(cls, filename): @@ -49,6 +44,4 @@ def read_file(cls, filename): return df def read_date(self, date): - - filename = self.filename_template.format(date=date) - return self.read_file(filename) + return self.read_file(self.path('{:%Y%m%d}'.format(date))) From 81d53e0733127363eea587d1b29396277de6b259 Mon Sep 17 00:00:00 2001 From: Dominik Neise Date: Tue, 29 Aug 2017 14:01:17 +0200 Subject: [PATCH 09/18] allow night to be also namedtuple --- fact/path/__init__.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fact/path/__init__.py b/fact/path/__init__.py index 91f6961..54b23e1 100644 --- a/fact/path/__init__.py +++ b/fact/path/__init__.py @@ -17,15 +17,19 @@ def __init__(self, template): self.template = template def __call__(self, night, run=None, **kwargs): - night = str(night) d = dict(**kwargs) + + if hasattr(night, 'fNight') and hasattr(night, 'fRunID'): + night, run = night.fNight, night.fRunID + + night = str(night) + if run is not None: + d['R'] = '{:03d}'.format(int(run)) + d['N'] = night d['Y'] = night[0:4] d['M'] = night[4:6] d['D'] = night[6:8] - if run is not None: - run = '{:03d}'.format(int(run)) - d['R'] = run return self.template.format(**d) From d72db3ab98404df796e54eda3055d225a6512e8f Mon Sep 17 00:00:00 2001 From: Dominik Neise Date: Tue, 29 Aug 2017 18:41:56 +0200 Subject: [PATCH 10/18] remove stupid classes --- fact/path/__init__.py | 47 ++++++++++++------------------------------- 1 file changed, 13 insertions(+), 34 deletions(-) diff --git a/fact/path/__init__.py b/fact/path/__init__.py index 54b23e1..5147912 100644 --- a/fact/path/__init__.py +++ b/fact/path/__init__.py @@ -3,37 +3,26 @@ __all__ = [ 'parse' - 'run2tree_path', - 'TemplateToPath', - 'TreePath', + 'template_to_path', + 'tree_path', ] -class TemplateToPath: +def template_to_path(template, night, run=None, **kwargs): '''Turn a template like '/fac/raw/{Y}/{M}/{D}/{N}_{R}.fits.fz' into a path. ''' + night = str(night) + if run is not None: + kwargs['R'] = '{:03d}'.format(int(run)) - def __init__(self, template): - self.template = template - - def __call__(self, night, run=None, **kwargs): - d = dict(**kwargs) - - if hasattr(night, 'fNight') and hasattr(night, 'fRunID'): - night, run = night.fNight, night.fRunID - - night = str(night) - if run is not None: - d['R'] = '{:03d}'.format(int(run)) - - d['N'] = night - d['Y'] = night[0:4] - d['M'] = night[4:6] - d['D'] = night[6:8] - return self.template.format(**d) + kwargs['N'] = night + kwargs['Y'] = night[0:4] + kwargs['M'] = night[4:6] + kwargs['D'] = night[6:8] + return template.format(**kwargs) -def run2tree_path(base_dir, suffix, night, run=None): +def tree_path(base_dir, suffix, night, run=None): '''Make a tree_path from a run base_dir: eg. '/fact/raw' or '/fact/aux' @@ -57,18 +46,8 @@ def run2tree_path(base_dir, suffix, night, run=None): '{M}', '{D}', base_name + suffix) - return TemplateToPath(template)(night, run) - - -class TreePath: - '''Convenience class for run2tree_path() for people who don't like partials - ''' - def __init__(self, base_dir, suffix): - self.base_dir = base_dir - self.suffix = suffix + return template_to_path(template, night, run=None) - def __call__(self, night, run=None): - return run2tree_path(self.base_dir, self.suffix, night, run) path_regex = re.compile( r'(?P.*?)' + From 5f4123ae0648d88173e0cf581162d34a6aedbdf7 Mon Sep 17 00:00:00 2001 From: Dominik Neise Date: Tue, 29 Aug 2017 18:51:19 +0200 Subject: [PATCH 11/18] use the new path interface --- fact/auxservices/base.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fact/auxservices/base.py b/fact/auxservices/base.py index 655e0aa..f360c61 100644 --- a/fact/auxservices/base.py +++ b/fact/auxservices/base.py @@ -1,6 +1,7 @@ from astropy.table import Table from astropy.units import UnitsWarning -from ..path import TreePath +from ..path import tree_path +from functools import partial import warnings @@ -12,7 +13,8 @@ class AuxService: basename = 'AUX_SERVICE' def __init__(self, auxdir='/fact/aux'): - self.path = TreePath( + self.path = partial( + tree_path, base_dir=auxdir, suffix='.' + self.basename + '.fits') From 2f9b0a29f1af0c5818d77af78ee64451e03d9c0d Mon Sep 17 00:00:00 2001 From: Dominik Neise Date: Wed, 30 Aug 2017 10:01:12 +0200 Subject: [PATCH 12/18] update __doc__s --- fact/path/__init__.py | 69 +++++++++++++++++++------------------------ 1 file changed, 31 insertions(+), 38 deletions(-) diff --git a/fact/path/__init__.py b/fact/path/__init__.py index 5147912..e2a9bfd 100644 --- a/fact/path/__init__.py +++ b/fact/path/__init__.py @@ -9,7 +9,23 @@ def template_to_path(template, night, run=None, **kwargs): - '''Turn a template like '/fac/raw/{Y}/{M}/{D}/{N}_{R}.fits.fz' into a path. + '''Make path from template and (night, run) using kwargs existing. + + template: string + e.g. "/foo/bar/{Y}/baz/{R}_{M}_{D}.gz.{N}" + night: int or string + e.g. night = 20160102 (int) + is used to create Y,M,D,N template values as: + Y = "2016" + M = "01" + D = "02" + N = "20160101" + run: int or string + e.g. run = 1 or run = "000000001" + is used to create template value R = "001" + kwargs: + if template contains other place holders than Y,M,D,N,R + kwargs are used to format these. ''' night = str(night) if run is not None: @@ -22,18 +38,17 @@ def template_to_path(template, night, run=None, **kwargs): return template.format(**kwargs) -def tree_path(base_dir, suffix, night, run=None): - '''Make a tree_path from a run - - base_dir: eg. '/fact/raw' or '/fact/aux' - suffix: eg. '.fits.fz' or '.log' or '.AUX_FOO.fits' - night: eg. 20160101 or '20160101' (int or string accepted) - run: eg. 11 or '011' or None (int, string or None accepted) - - output: - eg. '/fact/raw/2016/01/01/20160101_011.fits.fz' or - '/fact/raw/2016/01/01/20160101.log' +def tree_path(prefix, suffix, night, run=None): + '''Make a tree_path from a (night, run) for given prefix, suffix + prefix: string + eg. '/fact/raw' or '/fact/aux' + suffix: string + eg. '.fits.fz' or '.log' or '.AUX_FOO.fits' + night: int or string + eg. 20160101 or '20160101' + run: int or string + eg. 11 or '011' or None (int, string or None accepted) ''' if run is not None: base_name = '{N}_{R}' @@ -41,7 +56,7 @@ def tree_path(base_dir, suffix, night, run=None): base_name = '{N}' template = os.path.join( - base_dir, + prefix, '{Y}', '{M}', '{D}', @@ -59,32 +74,10 @@ def tree_path(base_dir, suffix, night, run=None): def parse(path): - '''return a dict with relevant parts of the path - for input paths like these: - '/fact/raw/2016/01/01/20160101_011.fits.fz', - '/fact/aux/2016/01/01/20160101.FSC_CONTROL_TEMPERATURE.fits', - '/fact/aux/2016/01/01/20160101.log', - '/home/guest/tbretz/gainanalysis.20130725/files/fit_bt2b/20140115_079_079.root' - - it returns dicts like these: + '''Return a dict with {prefix, suffix, night, run} parsed from path. - {'prefix': '/fact/raw', - 'night': 20160101, - 'run': 011, - 'suffix': '.fits.fz'} - {'prefix': '/fact/aux', - 'night': 20160101, - 'run': None, - 'suffix': '.FSC_CONTROL_TEMPERATURE.fits'} - {'prefix': '/fact/aux', - 'night': 20160101, - 'run': None, - 'suffix': '.log'} - {'prefix': - '/home/guest/tbretz/gainanalysis.20130725/files/fit_bt2b', - 'night': 20140115, - 'run': 079, - 'suffix': '_079.root'} + path: string + any (absolute) path should be fine. ''' d = path_regex.match(path).groupdict() if d['run'] is not None: From b5a02856183266ef86b751fcc7779f2cd47b6204 Mon Sep 17 00:00:00 2001 From: Dominik Neise Date: Wed, 30 Aug 2017 10:02:39 +0200 Subject: [PATCH 13/18] flat is better than nested --- fact/{path/__init__.py => path.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename fact/{path/__init__.py => path.py} (100%) diff --git a/fact/path/__init__.py b/fact/path.py similarity index 100% rename from fact/path/__init__.py rename to fact/path.py From 4b20c6f7e440ad2dd502b1b47225da53a4fe9e90 Mon Sep 17 00:00:00 2001 From: Dominik Neise Date: Wed, 30 Aug 2017 11:22:36 +0200 Subject: [PATCH 14/18] remove nonexistend package --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index 810b28f..6b1e833 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,6 @@ 'fact.factdb', 'fact.analysis', 'fact.instrument', - 'fact.path', ], package_data={ '': [ From 0e6e1a96dee7c61012851098306b58b291d62249 Mon Sep 17 00:00:00 2001 From: Dominik Neise Date: Wed, 30 Aug 2017 11:29:56 +0200 Subject: [PATCH 15/18] reorder arguments, to make using partial easier --- fact/path.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/fact/path.py b/fact/path.py index e2a9bfd..ba6e2f5 100644 --- a/fact/path.py +++ b/fact/path.py @@ -8,11 +8,9 @@ ] -def template_to_path(template, night, run=None, **kwargs): +def template_to_path(night, run, template, **kwargs): '''Make path from template and (night, run) using kwargs existing. - template: string - e.g. "/foo/bar/{Y}/baz/{R}_{M}_{D}.gz.{N}" night: int or string e.g. night = 20160102 (int) is used to create Y,M,D,N template values as: @@ -23,6 +21,8 @@ def template_to_path(template, night, run=None, **kwargs): run: int or string e.g. run = 1 or run = "000000001" is used to create template value R = "001" + template: string + e.g. "/foo/bar/{Y}/baz/{R}_{M}_{D}.gz.{N}" kwargs: if template contains other place holders than Y,M,D,N,R kwargs are used to format these. @@ -38,17 +38,17 @@ def template_to_path(template, night, run=None, **kwargs): return template.format(**kwargs) -def tree_path(prefix, suffix, night, run=None): +def tree_path(night, run, prefix, suffix): '''Make a tree_path from a (night, run) for given prefix, suffix - prefix: string - eg. '/fact/raw' or '/fact/aux' - suffix: string - eg. '.fits.fz' or '.log' or '.AUX_FOO.fits' night: int or string eg. 20160101 or '20160101' run: int or string eg. 11 or '011' or None (int, string or None accepted) + prefix: string + eg. '/fact/raw' or '/fact/aux' + suffix: string + eg. '.fits.fz' or '.log' or '.AUX_FOO.fits' ''' if run is not None: base_name = '{N}_{R}' @@ -61,7 +61,7 @@ def tree_path(prefix, suffix, night, run=None): '{M}', '{D}', base_name + suffix) - return template_to_path(template, night, run=None) + return template_to_path(night, run, template) path_regex = re.compile( From 89adcea872ae18ca6590fc0e5f6ccb9495672aaf Mon Sep 17 00:00:00 2001 From: Dominik Neise Date: Wed, 30 Aug 2017 11:36:12 +0200 Subject: [PATCH 16/18] adjust example to class-less path interface --- examples/path_utils.ipynb | 172 +++++++++++++++++++++++++++----------- 1 file changed, 124 insertions(+), 48 deletions(-) diff --git a/examples/path_utils.ipynb b/examples/path_utils.ipynb index 39f0d23..d9aea99 100644 --- a/examples/path_utils.ipynb +++ b/examples/path_utils.ipynb @@ -22,24 +22,47 @@ { "cell_type": "code", "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on function parse in module fact.path:\n", + "\n", + "parse(path)\n", + " Return a dict with {prefix, suffix, night, run} parsed from path.\n", + " \n", + " path: string\n", + " any (absolute) path should be fine.\n", + "\n" + ] + } + ], + "source": [ + "from fact.path import parse\n", + "help(parse)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ - "import fact.path\n", - "\n", "paths_for_parsing = [\n", " '/fact/raw/2016/01/01/20160101_011.fits.fz',\n", " '/fact/aux/2016/01/01/20160101.FSC_CONTROL_TEMPERATURE.fits',\n", " '/fact/aux/2016/01/01/20160101.log',\n", " '/home/guest/tbretz/gainanalysis.20130725/files/fit_bt2b/20140115_079_079.root'\n", - "]\n" + "]" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -64,26 +87,26 @@ "source": [ "for path in paths_for_parsing:\n", " print(path)\n", - " print(fact.path.parse(path))\n", + " print(parse(path))\n", " print()" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "The slowest run took 6.27 times longer than the fastest. This could mean that an intermediate result is being cached.\n", - "100000 loops, best of 3: 3.05 µs per loop\n" + "The slowest run took 4.25 times longer than the fastest. This could mean that an intermediate result is being cached.\n", + "100000 loops, best of 3: 3.08 µs per loop\n" ] } ], "source": [ - "%timeit fact.path.parse(paths_for_parsing[0])" + "%timeit parse(paths_for_parsing[0])" ] }, { @@ -113,13 +136,42 @@ }, { "cell_type": "code", - "execution_count": 4, - "metadata": { - "collapsed": true - }, + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on function tree_path in module fact.path:\n", + "\n", + "tree_path(night, run, prefix, suffix)\n", + " Make a tree_path from a (night, run) for given prefix, suffix\n", + " \n", + " night: int or string\n", + " eg. 20160101 or '20160101'\n", + " run: int or string\n", + " eg. 11 or '011' or None (int, string or None accepted)\n", + " prefix: string\n", + " eg. '/fact/raw' or '/fact/aux'\n", + " suffix: string\n", + " eg. '.fits.fz' or '.log' or '.AUX_FOO.fits'\n", + "\n" + ] + } + ], + "source": [ + "from fact.path import tree_path\n", + "help(tree_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, "outputs": [], "source": [ - "from fact.path import TreePath\n", + "from functools import partial\n", "\n", "night_run_tuples = [\n", " (20160101, 1),\n", @@ -130,21 +182,7 @@ }, { "cell_type": "code", - "execution_count": 5, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "photon_stream_path = TreePath(\n", - " base_dir='/gpfs0/fact/processing/photon-stream',\n", - " suffix='.phs.jsonl.gz'\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -158,13 +196,17 @@ } ], "source": [ + "photon_stream_path = partial(tree_path,\n", + " prefix='/gpfs0/fact/processing/photon-stream',\n", + " suffix='.phs.jsonl.gz'\n", + ")\n", "for night, run in night_run_tuples:\n", " print(photon_stream_path(night, run))" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -178,7 +220,12 @@ } ], "source": [ - "aux_path = TreePath('/fact/aux', '.FSC_CONTROL_TEMPERATURE.fits')\n", + "aux_path = partial(\n", + " tree_path,\n", + " prefix='/fact/aux',\n", + " suffix='.FSC_CONTROL_TEMPERATURE.fits',\n", + " run=None\n", + ")\n", "for night, run in night_run_tuples:\n", " print(aux_path(night))" ] @@ -206,7 +253,45 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on function template_to_path in module fact.path:\n", + "\n", + "template_to_path(night, run, template, **kwargs)\n", + " Make path from template and (night, run) using kwargs existing.\n", + " \n", + " night: int or string\n", + " e.g. night = 20160102 (int)\n", + " is used to create Y,M,D,N template values as:\n", + " Y = \"2016\"\n", + " M = \"01\"\n", + " D = \"02\"\n", + " N = \"20160101\"\n", + " run: int or string\n", + " e.g. run = 1 or run = \"000000001\"\n", + " is used to create template value R = \"001\"\n", + " template: string\n", + " e.g. \"/foo/bar/{Y}/baz/{R}_{M}_{D}.gz.{N}\"\n", + " kwargs:\n", + " if template contains other place holders than Y,M,D,N,R\n", + " kwargs are used to format these.\n", + "\n" + ] + } + ], + "source": [ + "from fact.path import template_to_path\n", + "help(template_to_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -220,10 +305,9 @@ } ], "source": [ - "from fact.path import TemplateToPath\n", - "\n", - "single_pe_path = TemplateToPath(\n", - " '/home/guest/tbretz/gainanalysis.20130725/files/fit_bt2b/{N}_{R}_{R}.root'\n", + "single_pe_path = partial(\n", + " template_to_path,\n", + " template='/home/guest/tbretz/gainanalysis.20130725/files/fit_bt2b/{N}_{R}_{R}.root'\n", ")\n", "\n", "for night, run in night_run_tuples:\n", @@ -241,7 +325,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -255,22 +339,14 @@ } ], "source": [ - "single_pe_path_2runs = TemplateToPath(\n", - " '/home/guest/tbretz/gainanalysis.20130725/files/fit_bt2b/{N}_{R}_{run2:03d}.root'\n", + "single_pe_path_2runs = partial(\n", + " template_to_path,\n", + " template='/home/guest/tbretz/gainanalysis.20130725/files/fit_bt2b/{N}_{R}_{run2:03d}.root'\n", ")\n", "\n", "for night, run in night_run_tuples:\n", " print(single_pe_path_2runs(night, run, run2=run+2))" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] } ], "metadata": { From 9f30beba62d5e05dc572389d06a012b789ed5d6a Mon Sep 17 00:00:00 2001 From: Dominik Neise Date: Wed, 30 Aug 2017 11:49:25 +0200 Subject: [PATCH 17/18] add tests --- tests/test_path.py | 81 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 tests/test_path.py diff --git a/tests/test_path.py b/tests/test_path.py new file mode 100644 index 0000000..35574a6 --- /dev/null +++ b/tests/test_path.py @@ -0,0 +1,81 @@ +from fact.path import parse, template_to_path, tree_path +from functools import partial + + +def test_parse(): + + input_paths = [ + '/fact/raw/2016/01/01/20160101_011.fits.fz', + '/fact/aux/2016/01/01/20160101.FSC_CONTROL_TEMPERATURE.fits', + '/fact/aux/2016/01/01/20160101.log', + '/hackypateng/20140115_079_079.root' + ] + + result_dicts = [ + {'prefix': '/fact/raw', + 'night': 20160101, + 'run': 11, + 'suffix': '.fits.fz'}, + {'prefix': '/fact/aux', + 'night': 20160101, + 'run': None, + 'suffix': '.FSC_CONTROL_TEMPERATURE.fits'}, + {'prefix': '/fact/aux', + 'night': 20160101, + 'run': None, + 'suffix': '.log'}, + {'prefix': + '/hackypateng', + 'night': 20140115, + 'run': 79, + 'suffix': '_079.root'}, + ] + + for path, expected in zip(input_paths, result_dicts): + parsed = parse(path) + assert parsed == expected + + +def test_tree_path(): + + night_run_tuples = [ + (20160101, 1), + (20160101, 2), + (20130506, 3), + ] + + result_paths = [ + '/bar/2016/01/01/20160101_001.phs.jsonl.gz', + '/bar/2016/01/01/20160101_002.phs.jsonl.gz', + '/bar/2013/05/06/20130506_003.phs.jsonl.gz', + ] + + photon_stream_path = partial( + tree_path, + prefix='/bar', + suffix='.phs.jsonl.gz' + ) + for night_run, result in zip(night_run_tuples, result_paths): + assert result == photon_stream_path(*night_run) + + +def test_template_to_path(): + night_run_tuples = [ + (20160101, 1), + (20160101, 2), + (20130506, 3), + ] + + single_pe_path_2runs = partial( + template_to_path, + template='/foo/{N}_{R}_{run2:03d}.root' + ) + + result_paths = [ + '/foo/20160101_001_003.root', + '/foo/20160101_002_004.root', + '/foo/20130506_003_005.root', + ] + + for night_run, result in zip(night_run_tuples, result_paths): + assert result == single_pe_path_2runs(*night_run) From 73738e7d2a90688988120fc2519206d2c8df586d Mon Sep 17 00:00:00 2001 From: Dominik Neise Date: Wed, 30 Aug 2017 11:50:16 +0200 Subject: [PATCH 18/18] fix failing test --- tests/test_path.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_path.py b/tests/test_path.py index 35574a6..42270ed 100644 --- a/tests/test_path.py +++ b/tests/test_path.py @@ -78,4 +78,6 @@ def test_template_to_path(): ] for night_run, result in zip(night_run_tuples, result_paths): - assert result == single_pe_path_2runs(*night_run) + assert result == single_pe_path_2runs( + *night_run, + run2=night_run[1]+2)