From ccddc2a1d28762ed811c7043a0da92fe929b080b Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Tue, 21 Jun 2016 21:02:39 +0200 Subject: [PATCH] outputdir is a standard parameter on StandardWorkflowComponent as well as Task --- README.rst | 15 ++++++--------- luiginlp/engine.py | 1 + luiginlp/modules/ocr.py | 2 -- luiginlp/modules/ucto.py | 4 ---- 4 files changed, 7 insertions(+), 15 deletions(-) diff --git a/README.rst b/README.rst index f4bed2c..9262787 100644 --- a/README.rst +++ b/README.rst @@ -376,8 +376,9 @@ everything. #always return the last task(s) return ucto +------------------------------------ Dynamic dependencies aka Inception ------------------------------------ +------------------------------------ Workflows are static in the sense that based on the format of the input file and all given parameters, all workflow components and tasks are assembled @@ -454,14 +455,10 @@ won't result in parallisation, you must really yield an entire list (or tuple). Note that we added an ``outputdir`` parameter to the Ucto component which we hadn't implemented yet. This is necessary to ensure all individual output files end up in the directory that groups our output. The Ucto component should -simply pass this parameter on to the ``Ucto_txt2tok`` task, and there we -just add it as an optional parameter as follows. The ``outputfrominput()`` -method automatically supports this parameter. - -.. code-block:: python - - class Ucto_txt2tok(Task): - outputdir = Parameter(default="") +simply pass this parameter on to the ``Ucto_txt2tok`` task. The outputdir +parameter is implicitly present on all tasks as well as on +``StandardWorkflowComponent``, the ``outputfrominput()`` method automatically +supports this parameter. Assuming you have a collecting of text files in a directory ``corpus.txtdir/``, you can now invoke LuigiNLP as follows and end up with a ``corpus.tokdir/`` diff --git a/luiginlp/engine.py b/luiginlp/engine.py index fcab8e9..551f1b7 100644 --- a/luiginlp/engine.py +++ b/luiginlp/engine.py @@ -328,6 +328,7 @@ class StandardWorkflowComponent(WorkflowComponent): """A workflow component that takes one inputfile""" inputfile = luigi.Parameter() + outputdir = luigi.Parameter(default="") class TargetInfo(sciluigi.TargetInfo): pass diff --git a/luiginlp/modules/ocr.py b/luiginlp/modules/ocr.py index 4acad0f..f44f768 100644 --- a/luiginlp/modules/ocr.py +++ b/luiginlp/modules/ocr.py @@ -15,7 +15,6 @@ class Tesseract(Task): executable = 'tesseract' language = Parameter() - outputdir = Parameter(default="") in_tiff = None #input slot @@ -32,7 +31,6 @@ def run(self): class OCR_singlepage(StandardWorkflowComponent): language = Parameter() tiff_extension=Parameter(default='tif') - outputdir = Parameter(default="") def autosetup(self): return Tesseract diff --git a/luiginlp/modules/ucto.py b/luiginlp/modules/ucto.py index f4ce023..9d3c4b1 100644 --- a/luiginlp/modules/ucto.py +++ b/luiginlp/modules/ucto.py @@ -49,8 +49,6 @@ class Ucto_folia2folia(Task): #Parameters for this module (all mandatory!) language = Parameter() - #Optional parameter - outputdir = Parameter(default="") in_folia = None #will be linked to an out_* slot of another module in the workflow specification @@ -76,7 +74,6 @@ class Ucto(StandardWorkflowComponent): language = Parameter() tok_input_sentenceperline = BoolParameter(default=False) tok_output_sentenceperline = BoolParameter(default=False) - outputdir = Parameter(default="") def autosetup(self): return (Ucto_txt2folia, Ucto_folia2folia) @@ -136,7 +133,6 @@ class Ucto_dir(StandardWorkflowComponent): language = Parameter() tok_input_sentenceperline = BoolParameter(default=False) tok_output_sentenceperline = BoolParameter(default=False) - outputdir = Parameter(default="") def autosetup(self): return (Ucto_txt2folia_dir, Ucto_folia2folia_dir)