Skip to content

Commit

Permalink
WIP:
Browse files Browse the repository at this point in the history
* add separate log parsing and filtering methods of LogViewer class that return generatorss
* start implementing find_log() function
  • Loading branch information
mhidas committed Dec 14, 2017
1 parent 163bff3 commit 4e95ee4
Showing 1 changed file with 93 additions and 45 deletions.
138 changes: 93 additions & 45 deletions aodncore/bin/logview.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import argparse
from collections import OrderedDict
import os
import re
import sys

Expand Down Expand Up @@ -34,87 +35,134 @@ class LogViewer(object):
"""

def __init__(self, logfile):
assert logfile, 'No log file specified!'
assert os.path.isfile(logfile), '{logfile}: no such file!'.format(logfile=logfile)
self.logfile = logfile

def show(self, task_id=None, errors=False, warnings=False, pattern=None, fmt=DEFAULT_FORMAT):
"""
Print a filtered & re-formatted view of the log to stdout
def log_entries(self):
"""Parse the log and return a tuple (raw, data) for one log entry at a time, where
raw is te full text from the log, and data is a dictionary of extracted fields as
per INPUT_REGEX.
:param str task_id: only include log for given task uuid
:param bool errors: only include error log lines
:param bool warnings: only include warning & error lines
:param str pattern: only include log messages matching pattern (regular expression)
:param str fmt: output format (fmt.format() applied to dict of LOG_FIELDS extracted from log)
"""

levels = None
if errors:
levels = ('ERROR', 'CRITICAL')
if warnings:
levels = ('WARNING', 'ERROR', 'CRITICAL')
if pattern:
pattern = re.compile(pattern)

# TODO: option to read from stdin
with open(self.logfile) as log:
for line in log:
line = line.strip()

# parse a line of log data
m = INPUT_REGEX.match(line)
if m is None:
# TODO: deal with unformatted lines
continue
data = m.groupdict()

# filter -- should we include this line?
if task_id and data['task_id'] != task_id:
continue
if levels and data['level'] not in levels:
continue
if pattern and not pattern.search(data['message']):
continue
# TODO: filter by handler step?
yield line, data

# format & print the line
line_out = fmt.format(**data)
try:
sys.stdout.write(line_out)
sys.stdout.flush()
except IOError:
# this can happen if output is piped to `head` or `less`
pass
def filtered_entries(self, task_id=None, levels=None, pattern=None):
"""
Filter the tuples returned by log_entries according to the filters specified.
:param str task_id: only include log for given task uuid
:param bool errors: only include error log lines
:param bool warnings: only include warning & error lines
:param str pattern: only include log messages matching pattern (regular expression)
:return: tuple (raw, data) as for log_entries
"""
if pattern:
pattern = re.compile(pattern)

for raw, data in self.log_entries():
if task_id and data['task_id'] != task_id:
continue
if levels and data['level'] not in levels:
continue
if pattern and not pattern.search(data['message']):
continue
# TODO: filter by handler step?
yield raw, data

def show(self, task_id=None, levels=None, pattern=None, fmt=DEFAULT_FORMAT):
"""
Print a filtered & re-formatted view of the log to stdout
:param str task_id: only include log for given task uuid
:param bool errors: only include error log lines
:param bool warnings: only include warning & error lines
:param str pattern: only include log messages matching pattern (regular expression)
:param str fmt: output format (fmt.format() applied to dict of LOG_FIELDS extracted from log)
"""
for raw, data in self.filtered_entries(task_id=task_id, levels=levels, pattern=pattern):
line_out = fmt.format(**data)
try:
sys.stdout.write(line_out)
sys.stdout.flush()
except IOError:
# this can happen if output is piped to `head` or `less`
pass


def find_log(input_file):
"""
Given the name of an uploaded file, find the log file from the pipeline process that handled it.
Given the name of an uploaded file, find the log file(s) from the pipeline process that handled it.
:param str input_file: Name of uploaded file
:return: Full path to log file.
:return: List of full paths to log files
"""
# TODO: implement find_log
# Things to try:
# Read LOG_WATCH file and find the file name
# Read all process logs in LOGDIR_PROCESS and use pattern match
return ''

# first, if the input file name includes a task_id at the end, remove it
filename = os.path.basename(input_file)
assert filename, 'No input file name provided!'
match = re.match(
r"(.+?)([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})?$",
filename
)
filename, task_id = match.groups()
task_name_pattern = re.compile(r"task_name='(tasks.\w+)'.*pathname='.*{fn}'".format(fn=filename))

logfiles = []
# read LOG_WATCH file and find the file name
with open(LOG_WATCH) as watchlog:
for line in watchlog:
match = task_name_pattern.search(line)
if match:
logfiles.append(
os.path.join(LOGDIR_PROCESS, '{}.log'.format(match.group(1)))
)

return None


def parse_args():
"""Parse the command line"""
parser = argparse.ArgumentParser()
parser.add_argument('logfile', help='pipeline task log file')
parser.add_argument('-t', '--task_id', help='lines for task_id', metavar='ID')
parser.add_argument('-l', '--logfile', help='path to pipeline log file')
parser.add_argument('-t', '--task_name', help='log for pipeline task')
parser.add_argument('-i', '--task_id', help='filter by task_id', metavar='ID')
parser.add_argument('-e', '--errors', help='error lines only', action='store_true')
parser.add_argument('-w', '--warnings', help='warning & error lines only', action='store_true')
parser.add_argument('-p', '--pattern', help='lines matching regex pattern', metavar='REGEX')
parser.add_argument('-f', '--file', help='name of processed file')

args = parser.parse_args()

print('Args: {}\n'.format(args))
if not args.logfile:
if args.task_name:
args.logfile = os.path.join(LOGDIR_PROCESS, 'tasks.{}.log'.format(args.task_name))
if args.file:
args.logfile = find_log(args.file)

if args.file:
args.logfile = find_log(args.file)
args.levels = None
if args.errors:
args.levels = ('ERROR', 'CRITICAL')
if args.warnings:
args.levels = ('WARNING', 'ERROR', 'CRITICAL')

print('Args: {}\n'.format(args))

return args

Expand All @@ -125,6 +173,6 @@ def parse_args():
# TODO: filter by file name (parent or child)

lv = LogViewer(args.logfile)
lv.show(task_id=args.task_id, errors=args.errors, warnings=args.warnings, pattern=args.pattern)
lv.show(task_id=args.task_id, levels=args.levels, pattern=args.pattern)

exit(0)

0 comments on commit 4e95ee4

Please sign in to comment.