diff --git a/es_stream_logs.py b/es_stream_logs.py index 2a6ff3b..08e9d63 100644 --- a/es_stream_logs.py +++ b/es_stream_logs.py @@ -643,6 +643,14 @@ def to_raw_es_query(query): def parse_doc_timestamp(timestamp: str): """ Parse the timestamp of an elasticsearch document. """ + + sub_second_split = timestamp.split(sep=".", maxsplit=1) + if len(sub_second_split) > 1 and len(sub_second_split[1]) > 7: + # sub second part too long, e.g. .1234567Z and strptime supports only + # up to 6 places (plus 'Z' timezone part) + sub_second_shortened = sub_second_split[1][:6] + sub_second_split[1][-1] + timestamp = sub_second_split[0] + "." + sub_second_shortened + try: parsed = datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%S.%fZ') except ValueError: diff --git a/test_es_stream_logs.py b/test_es_stream_logs.py index 23e5136..8e04d0f 100644 --- a/test_es_stream_logs.py +++ b/test_es_stream_logs.py @@ -1,7 +1,8 @@ +import datetime import time import unittest -from es_stream_logs import parse_timestamp +from es_stream_logs import parse_doc_timestamp, parse_timestamp class ParseTimestampTestCase(unittest.TestCase): @@ -44,3 +45,26 @@ def test_relative_days(self): def test_epoch_millis(self): self.assertEqual(0, parse_timestamp("0")) self.assertEqual(1635774591, parse_timestamp("1635774591000")) + + +class ParseDocTimestampTestCase(unittest.TestCase): + def test_full(self): + self.assertEqual(datetime.datetime(1970, 1, 1, 0, 0), + parse_doc_timestamp('1970-01-01T00:00:00Z')) + self.assertEqual(datetime.datetime(1970, 1, 1, 0, 0), + parse_doc_timestamp('1970-01-01T00:00:00.000Z')) + self.assertEqual(datetime.datetime(1970, 1, 1, 0, 0, 0, 123000), + parse_doc_timestamp('1970-01-01T00:00:00.123Z')) + self.assertEqual(datetime.datetime(1970, 1, 1, 0, 0, 0, 123456), + parse_doc_timestamp('1970-01-01T00:00:00.123456Z')) + + def test_too_long(self): + self.assertEqual(datetime.datetime(1970, 1, 1, 0, 0, 0, 123456), + parse_doc_timestamp('1970-01-01T00:00:00.123456999Z')) + self.assertEqual(datetime.datetime(1970, 1, 1, 0, 0, 0, 123456), + parse_doc_timestamp('1970-01-01T00:00:00.1234569999999999999999Z')) + + def test_invalid(self): + self.assertRaises(ValueError, lambda: parse_doc_timestamp("not a timestamp")) + + self.assertRaises(ValueError, lambda: parse_doc_timestamp('1970-01-01T00:00:00+01:00'))