WIP:

* add separate log parsing and filtering methods of LogViewer class that return generatorss * start implementing find_log() function
aodn · Dec 14, 2017 · 4e95ee4 · 4e95ee4
1 parent 163bff3
commit 4e95ee4
Showing 1 changed file with 93 additions and 45 deletions.
diff --git a/aodncore/bin/logview.py b/aodncore/bin/logview.py
@@ -6,6 +6,7 @@
 
 import argparse
 from collections import OrderedDict
+import os
 import re
 import sys
 
@@ -34,87 +35,134 @@ class LogViewer(object):
     """
 
     def __init__(self, logfile):
+        assert logfile, 'No log file specified!'
+        assert os.path.isfile(logfile), '{logfile}: no such file!'.format(logfile=logfile)
         self.logfile = logfile
 
-    def show(self, task_id=None, errors=False, warnings=False, pattern=None, fmt=DEFAULT_FORMAT):
-        """
-        Print a filtered & re-formatted view of the log to stdout
+    def log_entries(self):
+        """Parse the log and return a tuple (raw, data) for one log entry at a time, where
+        raw is te full text from the log, and data is a dictionary of extracted fields as
+        per INPUT_REGEX.
 
-        :param str task_id: only include log for given task uuid
-        :param bool errors: only include error log lines
-        :param bool warnings: only include warning & error lines
-        :param str pattern: only include log messages matching pattern (regular expression)
-        :param str fmt: output format (fmt.format() applied to dict of LOG_FIELDS extracted from log)
         """
-
-        levels = None
-        if errors:
-            levels = ('ERROR', 'CRITICAL')
-        if warnings:
-            levels = ('WARNING', 'ERROR', 'CRITICAL')
-        if pattern:
-            pattern = re.compile(pattern)
-
+        # TODO: option to read from stdin
         with open(self.logfile) as log:
             for line in log:
                 line = line.strip()
-
-                # parse a line of log data
                 m = INPUT_REGEX.match(line)
                 if m is None:
                     # TODO: deal with unformatted lines
                     continue
                 data = m.groupdict()
 
-                # filter -- should we include this line?
-                if task_id and data['task_id'] != task_id:
-                    continue
-                if levels and data['level'] not in levels:
-                    continue
-                if pattern and not pattern.search(data['message']):
-                    continue
-                # TODO: filter by handler step?
+                yield line, data
 
-                # format & print the line
-                line_out = fmt.format(**data)
-                try:
-                    sys.stdout.write(line_out)
-                    sys.stdout.flush()
-                except IOError:
-                    # this can happen if output is piped to `head` or `less`
-                    pass
+    def filtered_entries(self, task_id=None, levels=None, pattern=None):
+        """
+        Filter the tuples returned by log_entries according to the filters specified.
+
+        :param str task_id: only include log for given task uuid
+        :param bool errors: only include error log lines
+        :param bool warnings: only include warning & error lines
+        :param str pattern: only include log messages matching pattern (regular expression)
+        :return: tuple (raw, data) as for log_entries
+
+        """
+        if pattern:
+            pattern = re.compile(pattern)
+
+        for raw, data in self.log_entries():
+            if task_id and data['task_id'] != task_id:
+                continue
+            if levels and data['level'] not in levels:
+                continue
+            if pattern and not pattern.search(data['message']):
+                continue
+            # TODO: filter by handler step?
+            yield raw, data
+
+    def show(self, task_id=None, levels=None, pattern=None, fmt=DEFAULT_FORMAT):
+        """
+        Print a filtered & re-formatted view of the log to stdout
+
+        :param str task_id: only include log for given task uuid
+        :param bool errors: only include error log lines
+        :param bool warnings: only include warning & error lines
+        :param str pattern: only include log messages matching pattern (regular expression)
+        :param str fmt: output format (fmt.format() applied to dict of LOG_FIELDS extracted from log)
+
+        """
+        for raw, data in self.filtered_entries(task_id=task_id, levels=levels, pattern=pattern):
+            line_out = fmt.format(**data)
+            try:
+                sys.stdout.write(line_out)
+                sys.stdout.flush()
+            except IOError:
+                # this can happen if output is piped to `head` or `less`
+                pass
 
 
 def find_log(input_file):
     """
-    Given the name of an uploaded file, find the log file from the pipeline process that handled it.
+    Given the name of an uploaded file, find the log file(s) from the pipeline process that handled it.
 
     :param str input_file: Name of uploaded file
-    :return: Full path to log file.
+    :return: List of full paths to log files
+
     """
     # TODO: implement find_log
     # Things to try:
-    #   Read LOG_WATCH file and find the file name
     #   Read all process logs in LOGDIR_PROCESS and use pattern match
-    return ''
+
+    # first, if the input file name includes a task_id at the end, remove it
+    filename = os.path.basename(input_file)
+    assert filename, 'No input file name provided!'
+    match = re.match(
+        r"(.+?)([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})?$",
+        filename
+    )
+    filename, task_id = match.groups()
+    task_name_pattern = re.compile(r"task_name='(tasks.\w+)'.*pathname='.*{fn}'".format(fn=filename))
+
+    logfiles = []
+    # read LOG_WATCH file and find the file name
+    with open(LOG_WATCH) as watchlog:
+        for line in watchlog:
+            match = task_name_pattern.search(line)
+            if match:
+                logfiles.append(
+                    os.path.join(LOGDIR_PROCESS, '{}.log'.format(match.group(1)))
+                )
+
+    return None
 
 
 def parse_args():
     """Parse the command line"""
     parser = argparse.ArgumentParser()
-    parser.add_argument('logfile', help='pipeline task log file')
-    parser.add_argument('-t', '--task_id', help='lines for task_id', metavar='ID')
+    parser.add_argument('-l', '--logfile', help='path to pipeline log file')
+    parser.add_argument('-t', '--task_name', help='log for pipeline task')
+    parser.add_argument('-i', '--task_id', help='filter by task_id', metavar='ID')
     parser.add_argument('-e', '--errors', help='error lines only', action='store_true')
     parser.add_argument('-w', '--warnings', help='warning & error lines only', action='store_true')
     parser.add_argument('-p', '--pattern', help='lines matching regex pattern', metavar='REGEX')
     parser.add_argument('-f', '--file', help='name of processed file')
 
     args = parser.parse_args()
 
-    print('Args: {}\n'.format(args))
+    if not args.logfile:
+        if args.task_name:
+            args.logfile = os.path.join(LOGDIR_PROCESS, 'tasks.{}.log'.format(args.task_name))
+        if args.file:
+            args.logfile = find_log(args.file)
 
-    if args.file:
-        args.logfile = find_log(args.file)
+    args.levels = None
+    if args.errors:
+        args.levels = ('ERROR', 'CRITICAL')
+    if args.warnings:
+        args.levels = ('WARNING', 'ERROR', 'CRITICAL')
+
+    print('Args: {}\n'.format(args))
 
     return args
 
@@ -125,6 +173,6 @@ def parse_args():
     # TODO: filter by file name (parent or child)
 
     lv = LogViewer(args.logfile)
-    lv.show(task_id=args.task_id, errors=args.errors, warnings=args.warnings, pattern=args.pattern)
+    lv.show(task_id=args.task_id, levels=args.levels, pattern=args.pattern)
 
     exit(0)