From 4b55db92fbe2946215ed45269fa0652296c00a41 Mon Sep 17 00:00:00 2001 From: Anand Date: Sun, 14 Apr 2019 15:52:07 +0530 Subject: [PATCH] Issue #SC-961 feat: Include message and output timezone for Channel and Date parser --- pom.xml | 6 +- .../pinterest/secor/common/SecorConfig.java | 5 + .../parser/ChannelDateMessageParser.java | 30 ++-- .../parser/PatternDateMessageParser.java | 21 ++- .../parser/ChannelDateMessageParserTest.java | 136 ++++++++++++++++++ .../parser/PatternDateMessageParserTest.java | 135 +++++++++++++++++ 6 files changed, 311 insertions(+), 22 deletions(-) create mode 100644 src/test/java/com/pinterest/secor/parser/ChannelDateMessageParserTest.java create mode 100644 src/test/java/com/pinterest/secor/parser/PatternDateMessageParserTest.java diff --git a/pom.xml b/pom.xml index 5739dd482..8cf11abe9 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ com.pinterest secor - 0.24-SNAPSHOT + 0.25-SNAPSHOT jar secor Kafka to s3/gs/swift logs exporter @@ -47,8 +47,8 @@ - 1.6 - 1.6 + 1.8 + 1.8 UTF-8 UTF-8 1.9.0 diff --git a/src/main/java/com/pinterest/secor/common/SecorConfig.java b/src/main/java/com/pinterest/secor/common/SecorConfig.java index 52213160b..eb4b152d7 100644 --- a/src/main/java/com/pinterest/secor/common/SecorConfig.java +++ b/src/main/java/com/pinterest/secor/common/SecorConfig.java @@ -533,6 +533,11 @@ public TimeZone getTimeZone() { return Strings.isNullOrEmpty(timezone) ? TimeZone.getTimeZone("UTC") : TimeZone.getTimeZone(timezone); } + public TimeZone getMessageTimeZone() { + String timezone = getString("secor.message.timezone"); + return Strings.isNullOrEmpty(timezone) ? TimeZone.getTimeZone("UTC") : TimeZone.getTimeZone(timezone); + } + public boolean getBoolean(String name, boolean defaultValue) { return mProperties.getBoolean(name, defaultValue); } diff --git a/src/main/java/com/pinterest/secor/parser/ChannelDateMessageParser.java b/src/main/java/com/pinterest/secor/parser/ChannelDateMessageParser.java index 7363f43df..3bf11c7d0 100644 --- a/src/main/java/com/pinterest/secor/parser/ChannelDateMessageParser.java +++ b/src/main/java/com/pinterest/secor/parser/ChannelDateMessageParser.java @@ -18,10 +18,7 @@ */ import java.text.SimpleDateFormat; -import java.util.Arrays; -import java.util.Date; -import java.util.HashMap; -import java.util.Map; +import java.util.*; import java.util.Map.Entry; import net.minidev.json.JSONObject; @@ -56,10 +53,16 @@ public class ChannelDateMessageParser extends MessageParser { protected static final String defaultFormatter = "yyyy-MM-dd"; private Map partitionPrefixMap; private static final String channelScrubRegex = "[^a-zA-Z0-9._$-]"; + private SimpleDateFormat outputFormatter; + private TimeZone messageTimeZone; public ChannelDateMessageParser(SecorConfig config) { super(config); - partitionPrefixMap = new HashMap(); + messageTimeZone = mConfig.getMessageTimeZone(); + outputFormatter = new SimpleDateFormat( + StringUtils.defaultIfBlank(mConfig.getPartitionOutputDtFormat(), defaultFormatter)); + outputFormatter.setTimeZone(mConfig.getTimeZone()); + partitionPrefixMap = new HashMap<>(); String partitionMapping = config.getPartitionPrefixMapping(); if (null != partitionMapping) { JSONObject jsonObject = (JSONObject) JSONValue.parse(partitionMapping); @@ -74,7 +77,7 @@ public String[] extractPartitions(Message message) { JSONObject jsonObject = (JSONObject) JSONValue.parse(message.getPayload()); boolean prefixEnabled = mConfig.isPartitionPrefixEnabled(); - String result[] = { defaultDate }; + String result[] = {defaultDate}; if (jsonObject != null) { @@ -86,15 +89,18 @@ public String[] extractPartitions(Message message) { Object eventValue = jsonObject.get(mConfig.getPartitionPrefixIdentifier()); Object inputPattern = mConfig.getMessageTimestampInputPattern(); - if (fieldValue != null && inputPattern != null) { + if (inputPattern != null) { try { + /* SimpleDateFormat outputFormatter = new SimpleDateFormat( StringUtils.defaultIfBlank(mConfig.getPartitionOutputDtFormat(), defaultFormatter)); - Date dateFormat = null; + */ + Date dateFormat; if (fieldValue instanceof Number) { dateFormat = new Date(((Number) fieldValue).longValue()); } else { SimpleDateFormat inputFormatter = new SimpleDateFormat(inputPattern.toString()); + inputFormatter.setTimeZone(messageTimeZone); dateFormat = inputFormatter.parse(fieldValue.toString()); } @@ -107,7 +113,7 @@ public String[] extractPartitions(Message message) { return result; } catch (Exception e) { e.printStackTrace(); - LOG.warn("Unable to get path: " + e.getMessage() +" - " + message.getPayload()); + LOG.warn("Unable to get path: " + e.getMessage() + " - " + message.getPayload()); } } } @@ -131,15 +137,15 @@ private String getChannel(JSONObject jsonObject) { String rawChannelStr = ""; Map dimensions = (HashMap) jsonObject.get("dimensions"); Map context = (HashMap) jsonObject.get("context"); - + String channel = (String) jsonObject.get("channel"); if (channel != null && !channel.isEmpty()) { rawChannelStr = channel; } else if (dimensions != null && dimensions.get("channel") != null) { rawChannelStr = (String) dimensions.get("channel"); - } else if(context != null && context.get("channel") != null){ + } else if (context != null && context.get("channel") != null) { rawChannelStr = (String) context.get("channel"); - }else { + } else { rawChannelStr = "in.ekstep"; } return rawChannelStr.replaceAll(channelScrubRegex, ""); diff --git a/src/main/java/com/pinterest/secor/parser/PatternDateMessageParser.java b/src/main/java/com/pinterest/secor/parser/PatternDateMessageParser.java index 2045845f6..1c551ccf8 100644 --- a/src/main/java/com/pinterest/secor/parser/PatternDateMessageParser.java +++ b/src/main/java/com/pinterest/secor/parser/PatternDateMessageParser.java @@ -17,9 +17,7 @@ package com.pinterest.secor.parser; import java.text.SimpleDateFormat; -import java.util.Date; -import java.util.HashMap; -import java.util.Map; +import java.util.*; import java.util.Map.Entry; import net.minidev.json.JSONObject; @@ -53,10 +51,16 @@ public class PatternDateMessageParser extends MessageParser { protected static final String defaultDate = "1970-01-01"; protected static final String defaultFormatter = "yyyy-MM-dd"; private Map partitionPrefixMap; + private SimpleDateFormat outputFormatter; + private TimeZone messageTimeZone; public PatternDateMessageParser(SecorConfig config) { super(config); - partitionPrefixMap = new HashMap(); + messageTimeZone = config.getMessageTimeZone(); + outputFormatter = new SimpleDateFormat( + StringUtils.defaultIfBlank(mConfig.getPartitionOutputDtFormat(), defaultFormatter)); + outputFormatter.setTimeZone(config.getTimeZone()); + partitionPrefixMap = new HashMap<>(); String partitionMapping = config.getPartitionPrefixMapping(); if (null != partitionMapping) { JSONObject jsonObject = (JSONObject) JSONValue.parse(partitionMapping); @@ -73,7 +77,6 @@ public String[] extractPartitions(Message message) { boolean prefixEnabled = mConfig.isPartitionPrefixEnabled(); String result[] = { prefixEnabled ? partitionPrefixMap.get("DEFAULT") + defaultDate : defaultDate }; if (jsonObject != null) { - Object fieldValue = jsonObject.get(mConfig.getMessageTimestampName()); if (fieldValue == null) fieldValue = jsonObject.get(mConfig.getFallbackMessageTimestampName()); @@ -82,21 +85,25 @@ public String[] extractPartitions(Message message) { Object eventValue = jsonObject.get(mConfig.getPartitionPrefixIdentifier()); Object inputPattern = mConfig.getMessageTimestampInputPattern(); - if (fieldValue != null && inputPattern != null) { + if (inputPattern != null) { try { + /* SimpleDateFormat outputFormatter = new SimpleDateFormat( StringUtils.defaultIfBlank(mConfig.getPartitionOutputDtFormat(), defaultFormatter)); - Date dateFormat = null; + */ + Date dateFormat; if (fieldValue instanceof Number) { dateFormat = new Date(((Number) fieldValue).longValue()); } else { SimpleDateFormat inputFormatter = new SimpleDateFormat(inputPattern.toString()); + inputFormatter.setTimeZone(messageTimeZone); dateFormat = inputFormatter.parse(fieldValue.toString()); } result[0] = prefixEnabled ? getPrefix(eventValue.toString()) + outputFormatter.format(dateFormat) : outputFormatter.format(dateFormat); return result; } catch (Exception e) { + e.printStackTrace(); LOG.warn("Unable to get path: " + e.getMessage()); } } diff --git a/src/test/java/com/pinterest/secor/parser/ChannelDateMessageParserTest.java b/src/test/java/com/pinterest/secor/parser/ChannelDateMessageParserTest.java new file mode 100644 index 000000000..61fc57be3 --- /dev/null +++ b/src/test/java/com/pinterest/secor/parser/ChannelDateMessageParserTest.java @@ -0,0 +1,136 @@ +package com.pinterest.secor.parser; + +import com.pinterest.secor.common.SecorConfig; +import com.pinterest.secor.message.Message; +import junit.framework.TestCase; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mockito; +import org.powermock.modules.junit4.PowerMockRunner; + +import java.util.TimeZone; + +@RunWith(PowerMockRunner.class) +public class ChannelDateMessageParserTest extends TestCase { + + private SecorConfig mConfig; + private Message mFormat1; + private Message mFormat2; + private Message mFormat3; + private Message mInvalidDate; + private Message mISOFormat; + private Message mNanosecondISOFormat; + private Message mNestedISOFormat; + private long timestamp; + + @Override + public void setUp() throws Exception { + mConfig = Mockito.mock(SecorConfig.class); + Mockito.when(mConfig.getTimeZone()).thenReturn(TimeZone.getTimeZone("Asia/Kolkata")); + Mockito.when(mConfig.getMessageTimeZone()).thenReturn(TimeZone.getTimeZone("UTC")); + Mockito.when(mConfig.isPartitionPrefixEnabled()).thenReturn(true); + Mockito.when(mConfig.getPartitionPrefixIdentifier()).thenReturn("eid"); + + timestamp = System.currentTimeMillis(); + + byte format1[] = "{\"timestamp\":\"2014-07-30 22:53:20\",\"eid\":\"ME_WORKFLOW_SUMMARY\",\"dimensions\":{\"channel\":\"test-channel\"},\"id\":0,\"guid\":\"0436b17b-e78a-4e82-accf-743bf1f0b884\",\"isActive\":false,\"balance\":\"$3,561.87\",\"picture\":\"http://placehold.it/32x32\",\"age\":23,\"eyeColor\":\"green\",\"name\":\"Mercedes Brewer\",\"gender\":\"female\",\"company\":\"MALATHION\",\"email\":\"mercedesbrewer@malathion.com\",\"phone\":\"+1 (848) 471-3000\",\"address\":\"786 Gilmore Court, Brule, Maryland, 3200\",\"about\":\"Quis nostrud Lorem deserunt esse ut reprehenderit aliqua nisi et sunt mollit est. Cupidatat incididunt minim anim eiusmod culpa elit est dolor ullamco. Aliqua cillum eiusmod ullamco nostrud Lorem sit amet Lorem aliquip esse esse velit.\\r\\n\",\"registered\":\"2014-01-14T13:07:28 +08:00\",\"latitude\":47.672012,\"longitude\":102.788623,\"tags\":[\"amet\",\"amet\",\"dolore\",\"eu\",\"qui\",\"fugiat\",\"laborum\"],\"friends\":[{\"id\":0,\"name\":\"Rebecca Hardy\"},{\"id\":1,\"name\":\"Sutton Briggs\"},{\"id\":2,\"name\":\"Dena Campos\"}],\"greeting\":\"Hello, Mercedes Brewer! You have 7 unread messages.\",\"favoriteFruit\":\"strawberry\"}" + .getBytes("UTF-8"); + mFormat1 = new Message("test", 0, 0, null, format1, timestamp); + + byte format2[] = "{\"timestamp\":\"2014/10/25\",\"eid\":\"IMPRESSION\",\"dimensions\":{\"channel\":\"test-channel\"},\"id\":0,\"guid\":\"0436b17b-e78a-4e82-accf-743bf1f0b884\",\"isActive\":false,\"balance\":\"$3,561.87\",\"picture\":\"http://placehold.it/32x32\",\"age\":23,\"eyeColor\":\"green\",\"name\":\"Mercedes Brewer\",\"gender\":\"female\",\"company\":\"MALATHION\",\"email\":\"mercedesbrewer@malathion.com\",\"phone\":\"+1 (848) 471-3000\",\"address\":\"786 Gilmore Court, Brule, Maryland, 3200\",\"about\":\"Quis nostrud Lorem deserunt esse ut reprehenderit aliqua nisi et sunt mollit est. Cupidatat incididunt minim anim eiusmod culpa elit est dolor ullamco. Aliqua cillum eiusmod ullamco nostrud Lorem sit amet Lorem aliquip esse esse velit.\\r\\n\",\"registered\":\"2014-01-14T13:07:28 +08:00\",\"latitude\":47.672012,\"longitude\":102.788623,\"tags\":[\"amet\",\"amet\",\"dolore\",\"eu\",\"qui\",\"fugiat\",\"laborum\"],\"friends\":[{\"id\":0,\"name\":\"Rebecca Hardy\"},{\"id\":1,\"name\":\"Sutton Briggs\"},{\"id\":2,\"name\":\"Dena Campos\"}],\"greeting\":\"Hello, Mercedes Brewer! You have 7 unread messages.\",\"favoriteFruit\":\"strawberry\"}" + .getBytes("UTF-8"); + mFormat2 = new Message("test", 0, 0, null, format2, timestamp); + + byte format3[] = "{\"timestamp\":\"02001.July.04 AD 12:08 PM\",\"eid\":\"ME_WORKFLOW_SUMMARY\",\"dimensions\":{\"channel\":\"test-channel\"},\"id\":0,\"guid\":\"0436b17b-e78a-4e82-accf-743bf1f0b884\",\"isActive\":false,\"balance\":\"$3,561.87\",\"picture\":\"http://placehold.it/32x32\",\"age\":23,\"eyeColor\":\"green\",\"name\":\"Mercedes Brewer\",\"gender\":\"female\",\"company\":\"MALATHION\",\"email\":\"mercedesbrewer@malathion.com\",\"phone\":\"+1 (848) 471-3000\",\"address\":\"786 Gilmore Court, Brule, Maryland, 3200\",\"about\":\"Quis nostrud Lorem deserunt esse ut reprehenderit aliqua nisi et sunt mollit est. Cupidatat incididunt minim anim eiusmod culpa elit est dolor ullamco. Aliqua cillum eiusmod ullamco nostrud Lorem sit amet Lorem aliquip esse esse velit.\\r\\n\",\"registered\":\"2014-01-14T13:07:28 +08:00\",\"latitude\":47.672012,\"longitude\":102.788623,\"tags\":[\"amet\",\"amet\",\"dolore\",\"eu\",\"qui\",\"fugiat\",\"laborum\"],\"friends\":[{\"id\":0,\"name\":\"Rebecca Hardy\"},{\"id\":1,\"name\":\"Sutton Briggs\"},{\"id\":2,\"name\":\"Dena Campos\"}],\"greeting\":\"Hello, Mercedes Brewer! You have 7 unread messages.\",\"favoriteFruit\":\"strawberry\"}" + .getBytes("UTF-8"); + mFormat3 = new Message("test", 0, 0, null, format3, timestamp); + + byte invalidDate[] = "{\"timestamp\":\"11111111\",\"eid\":\"ME_WORKFLOW_SUMMARY\",\"dimensions\":{\"channel\":\"test-channel\"},\"id\":0,\"guid\":\"0436b17b-e78a-4e82-accf-743bf1f0b884\",\"isActive\":false,\"balance\":\"$3,561.87\",\"picture\":\"http://placehold.it/32x32\",\"age\":23,\"eyeColor\":\"green\",\"name\":\"Mercedes Brewer\",\"gender\":\"female\",\"company\":\"MALATHION\",\"email\":\"mercedesbrewer@malathion.com\",\"phone\":\"+1 (848) 471-3000\",\"address\":\"786 Gilmore Court, Brule, Maryland, 3200\",\"about\":\"Quis nostrud Lorem deserunt esse ut reprehenderit aliqua nisi et sunt mollit est. Cupidatat incididunt minim anim eiusmod culpa elit est dolor ullamco. Aliqua cillum eiusmod ullamco nostrud Lorem sit amet Lorem aliquip esse esse velit.\\r\\n\",\"registered\":\"2014-01-14T13:07:28 +08:00\",\"latitude\":47.672012,\"longitude\":102.788623,\"tags\":[\"amet\",\"amet\",\"dolore\",\"eu\",\"qui\",\"fugiat\",\"laborum\"],\"friends\":[{\"id\":0,\"name\":\"Rebecca Hardy\"},{\"id\":1,\"name\":\"Sutton Briggs\"},{\"id\":2,\"name\":\"Dena Campos\"}],\"greeting\":\"Hello, Mercedes Brewer! You have 7 unread messages.\",\"favoriteFruit\":\"strawberry\"}" + .getBytes("UTF-8"); + mInvalidDate = new Message("test", 0, 0, null, invalidDate, timestamp); + + byte isoFormat[] = "{\"timestamp\":\"2006-01-02T15:04:05Z\",\"eid\":\"ME_WORKFLOW_SUMMARY\",\"dimensions\":{\"channel\":\"test-channel\"},\"id\":0,\"guid\":\"0436b17b-e78a-4e82-accf-743bf1f0b884\",\"isActive\":false,\"balance\":\"$3,561.87\",\"picture\":\"http://placehold.it/32x32\",\"age\":23,\"eyeColor\":\"green\",\"name\":\"Mercedes Brewer\",\"gender\":\"female\",\"company\":\"MALATHION\",\"email\":\"mercedesbrewer@malathion.com\",\"phone\":\"+1 (848) 471-3000\",\"address\":\"786 Gilmore Court, Brule, Maryland, 3200\",\"about\":\"Quis nostrud Lorem deserunt esse ut reprehenderit aliqua nisi et sunt mollit est. Cupidatat incididunt minim anim eiusmod culpa elit est dolor ullamco. Aliqua cillum eiusmod ullamco nostrud Lorem sit amet Lorem aliquip esse esse velit.\\r\\n\",\"registered\":\"2014-01-14T13:07:28 +08:00\",\"latitude\":47.672012,\"longitude\":102.788623,\"tags\":[\"amet\",\"amet\",\"dolore\",\"eu\",\"qui\",\"fugiat\",\"laborum\"],\"friends\":[{\"id\":0,\"name\":\"Rebecca Hardy\"},{\"id\":1,\"name\":\"Sutton Briggs\"},{\"id\":2,\"name\":\"Dena Campos\"}],\"greeting\":\"Hello, Mercedes Brewer! You have 7 unread messages.\",\"favoriteFruit\":\"strawberry\"}" + .getBytes("UTF-8"); + mISOFormat = new Message("test", 0, 0, null, isoFormat, timestamp); + + byte nanosecondISOFormat[] = "{\"timestamp\":\"2006-01-02T23:59:59.999999999Z\",\"eid\":\"ME_WORKFLOW_SUMMARY\",\"dimensions\":{\"channel\":\"test-channel\"}}" + .getBytes("UTF-8"); + mNanosecondISOFormat = new Message("test", 0, 0, null, nanosecondISOFormat, timestamp); + + byte nestedISOFormat[] = "{\"meta_data\":{\"created\":\"2016-01-11T11:50:28.647Z\"},\"eid\":\"ME_WORKFLOW_SUMMARY\",\"dimensions\":{\"channel\":\"test-channel\"},\"id\":0,\"guid\":\"0436b17b-e78a-4e82-accf-743bf1f0b884\",\"isActive\":false,\"balance\":\"$3,561.87\",\"picture\":\"http://placehold.it/32x32\",\"age\":23,\"eyeColor\":\"green\",\"name\":\"Mercedes Brewer\",\"gender\":\"female\",\"company\":\"MALATHION\",\"email\":\"mercedesbrewer@malathion.com\",\"phone\":\"+1 (848) 471-3000\",\"address\":\"786 Gilmore Court, Brule, Maryland, 3200\",\"about\":\"Quis nostrud Lorem deserunt esse ut reprehenderit aliqua nisi et sunt mollit est. Cupidatat incididunt minim anim eiusmod culpa elit est dolor ullamco. Aliqua cillum eiusmod ullamco nostrud Lorem sit amet Lorem aliquip esse esse velit.\\r\\n\",\"registered\":\"2014-01-14T13:07:28 +08:00\",\"latitude\":47.672012,\"longitude\":102.788623,\"tags\":[\"amet\",\"amet\",\"dolore\",\"eu\",\"qui\",\"fugiat\",\"laborum\"],\"friends\":[{\"id\":0,\"name\":\"Rebecca Hardy\"},{\"id\":1,\"name\":\"Sutton Briggs\"},{\"id\":2,\"name\":\"Dena Campos\"}],\"greeting\":\"Hello, Mercedes Brewer! You have 7 unread messages.\",\"favoriteFruit\":\"strawberry\"}" + .getBytes("UTF-8"); + mNestedISOFormat = new Message("test", 0, 0, null, nestedISOFormat, timestamp); + } + + @Test + public void testExtractDateUsingInputPattern() throws Exception { + Mockito.when(mConfig.getMessageTimestampName()).thenReturn("timestamp"); + Mockito.when(mConfig.getPartitionPrefixMapping()).thenReturn("{\"ME_WORKFLOW_SUMMARY\":\"summary\",\"DEFAULT\":\"raw\"}"); + + Mockito.when(mConfig.getMessageTimestampInputPattern()).thenReturn("yyyy-MM-dd HH:mm:ss"); + assertEquals("test-channel/summary/2014-07-31", new ChannelDateMessageParser(mConfig).extractPartitions(mFormat1)[0]); + + Mockito.when(mConfig.getMessageTimestampInputPattern()).thenReturn("yyyy/MM/d"); + assertEquals("test-channel/raw/2014-10-25", new ChannelDateMessageParser(mConfig).extractPartitions(mFormat2)[0]); + + Mockito.when(mConfig.getMessageTimestampInputPattern()).thenReturn("yyyyy.MMMMM.dd GGG hh:mm aaa"); + assertEquals("test-channel/summary/2001-07-04", new ChannelDateMessageParser(mConfig).extractPartitions(mFormat3)[0]); + + Mockito.when(mConfig.getMessageTimestampInputPattern()).thenReturn("yyyy-MM-dd'T'HH:mm:ss'Z'"); + assertEquals("test-channel/summary/2006-01-02", new ChannelDateMessageParser(mConfig).extractPartitions(mISOFormat)[0]); + + Mockito.when(mConfig.getMessageTimestampInputPattern()).thenReturn("yyyy-MM-dd'T'HH:mm:ss"); + assertEquals("test-channel/summary/2006-01-02", new ChannelDateMessageParser(mConfig).extractPartitions(mISOFormat)[0]); + + Mockito.when(mConfig.getMessageTimestampInputPattern()).thenReturn("yyyy-MM-dd'T'HH:mm:ss"); + assertEquals("test-channel/summary/2006-01-03", new ChannelDateMessageParser(mConfig).extractPartitions(mNanosecondISOFormat)[0]); + } + + @Test + public void testExtractDateWhenPrefixIsNotSet() throws Exception { + Mockito.when(mConfig.getMessageTimestampName()).thenReturn("timestamp"); + Mockito.when(mConfig.isPartitionPrefixEnabled()).thenReturn(false); + + Mockito.when(mConfig.getMessageTimestampInputPattern()).thenReturn("yyyy-MM-dd HH:mm:ss"); + assertEquals("test-channel/2014-07-31", new ChannelDateMessageParser(mConfig).extractPartitions(mFormat1)[0]); + } + + @Test + public void testExtractDateWithWrongEntries() throws Exception { + Mockito.when(mConfig.getMessageTimestampName()).thenReturn("timestamp"); + Mockito.when(mConfig.getString("secor.partition.output_dt_format", "yyyy-MM-dd")).thenReturn("yyyy-MM-dd"); + Mockito.when(mConfig.getMessageTimestampName()).thenReturn("timestamp"); + Mockito.when(mConfig.getPartitionPrefixMapping()).thenReturn("{\"ME_WORKFLOW_SUMMARY\":\"summary\",\"DEFAULT\":\"raw\"}"); + + // invalid date + Mockito.when(mConfig.getMessageTimestampInputPattern()).thenReturn("yyyy-MM-dd HH:mm:ss"); // any pattern + assertEquals("raw/" + ChannelDateMessageParser.defaultDate, new PatternDateMessageParser( + mConfig).extractPartitions(mInvalidDate)[0]); + + // invalid pattern + Mockito.when(mConfig.getMessageTimestampInputPattern()).thenReturn("yyy-MM-dd :s"); + assertEquals("raw/" + ChannelDateMessageParser.defaultDate, new PatternDateMessageParser( + mConfig).extractPartitions(mFormat1)[0]); + } + + @Test + public void testDatePrefix() throws Exception { + Mockito.when(mConfig.getMessageTimestampName()).thenReturn("timestamp"); + Mockito.when(mConfig.getPartitionPrefixMapping()).thenReturn("{\"ME_WORKFLOW_SUMMARY\":\"summary\",\"DEFAULT\":\"raw\"}"); + Mockito.when(mConfig.getMessageTimestampInputPattern()).thenReturn("yyyy-MM-dd HH:mm:ss"); + Mockito.when(mConfig.getString("secor.partition.output_dt_format", "yyyy-MM-dd")).thenReturn("yyyy-MM-dd"); + + assertEquals("test-channel/summary/2014-07-31", new ChannelDateMessageParser(mConfig).extractPartitions(mFormat1)[0]); + } + + @Test + public void testCustomDateFormat() throws Exception { + Mockito.when(mConfig.getMessageTimestampName()).thenReturn("timestamp"); + Mockito.when(mConfig.getMessageTimestampInputPattern()).thenReturn("yyyy-MM-dd HH:mm:ss"); + Mockito.when(mConfig.getPartitionPrefixMapping()).thenReturn("{\"ME_WORKFLOW_SUMMARY\":\"summary\",\"DEFAULT\":\"raw\"}"); + Mockito.when(mConfig.getPartitionOutputDtFormat()).thenReturn("'yr='yyyy'/mo='MM'/dy='dd'/hr='HH"); + + assertEquals("test-channel/summary/yr=2014/mo=07/dy=31/hr=04", new ChannelDateMessageParser(mConfig).extractPartitions(mFormat1)[0]); + } + +} diff --git a/src/test/java/com/pinterest/secor/parser/PatternDateMessageParserTest.java b/src/test/java/com/pinterest/secor/parser/PatternDateMessageParserTest.java new file mode 100644 index 000000000..ba902b1ac --- /dev/null +++ b/src/test/java/com/pinterest/secor/parser/PatternDateMessageParserTest.java @@ -0,0 +1,135 @@ +package com.pinterest.secor.parser; + +import com.pinterest.secor.common.SecorConfig; +import com.pinterest.secor.message.Message; +import junit.framework.TestCase; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mockito; +import org.powermock.modules.junit4.PowerMockRunner; + +import java.util.TimeZone; + +@RunWith(PowerMockRunner.class) +public class PatternDateMessageParserTest extends TestCase { + + private SecorConfig mConfig; + private Message mFormat1; + private Message mFormat2; + private Message mFormat3; + private Message mInvalidDate; + private Message mISOFormat; + private Message mNanosecondISOFormat; + private Message mNestedISOFormat; + private long timestamp; + + @Override + public void setUp() throws Exception { + mConfig = Mockito.mock(SecorConfig.class); + Mockito.when(mConfig.getTimeZone()).thenReturn(TimeZone.getTimeZone("Asia/Kolkata")); + Mockito.when(mConfig.getMessageTimeZone()).thenReturn(TimeZone.getTimeZone("UTC")); + Mockito.when(mConfig.isPartitionPrefixEnabled()).thenReturn(true); + Mockito.when(mConfig.getPartitionPrefixIdentifier()).thenReturn("eid"); + + timestamp = System.currentTimeMillis(); + + byte format1[] = "{\"timestamp\":\"2014-07-30 22:53:20\",\"eid\":\"ME_WORKFLOW_SUMMARY\",\"id\":0,\"guid\":\"0436b17b-e78a-4e82-accf-743bf1f0b884\",\"isActive\":false,\"balance\":\"$3,561.87\",\"picture\":\"http://placehold.it/32x32\",\"age\":23,\"eyeColor\":\"green\",\"name\":\"Mercedes Brewer\",\"gender\":\"female\",\"company\":\"MALATHION\",\"email\":\"mercedesbrewer@malathion.com\",\"phone\":\"+1 (848) 471-3000\",\"address\":\"786 Gilmore Court, Brule, Maryland, 3200\",\"about\":\"Quis nostrud Lorem deserunt esse ut reprehenderit aliqua nisi et sunt mollit est. Cupidatat incididunt minim anim eiusmod culpa elit est dolor ullamco. Aliqua cillum eiusmod ullamco nostrud Lorem sit amet Lorem aliquip esse esse velit.\\r\\n\",\"registered\":\"2014-01-14T13:07:28 +08:00\",\"latitude\":47.672012,\"longitude\":102.788623,\"tags\":[\"amet\",\"amet\",\"dolore\",\"eu\",\"qui\",\"fugiat\",\"laborum\"],\"friends\":[{\"id\":0,\"name\":\"Rebecca Hardy\"},{\"id\":1,\"name\":\"Sutton Briggs\"},{\"id\":2,\"name\":\"Dena Campos\"}],\"greeting\":\"Hello, Mercedes Brewer! You have 7 unread messages.\",\"favoriteFruit\":\"strawberry\"}" + .getBytes("UTF-8"); + mFormat1 = new Message("test", 0, 0, null, format1, timestamp); + + byte format2[] = "{\"timestamp\":\"2014/10/25\",\"eid\":\"IMPRESSION\",\"id\":0,\"guid\":\"0436b17b-e78a-4e82-accf-743bf1f0b884\",\"isActive\":false,\"balance\":\"$3,561.87\",\"picture\":\"http://placehold.it/32x32\",\"age\":23,\"eyeColor\":\"green\",\"name\":\"Mercedes Brewer\",\"gender\":\"female\",\"company\":\"MALATHION\",\"email\":\"mercedesbrewer@malathion.com\",\"phone\":\"+1 (848) 471-3000\",\"address\":\"786 Gilmore Court, Brule, Maryland, 3200\",\"about\":\"Quis nostrud Lorem deserunt esse ut reprehenderit aliqua nisi et sunt mollit est. Cupidatat incididunt minim anim eiusmod culpa elit est dolor ullamco. Aliqua cillum eiusmod ullamco nostrud Lorem sit amet Lorem aliquip esse esse velit.\\r\\n\",\"registered\":\"2014-01-14T13:07:28 +08:00\",\"latitude\":47.672012,\"longitude\":102.788623,\"tags\":[\"amet\",\"amet\",\"dolore\",\"eu\",\"qui\",\"fugiat\",\"laborum\"],\"friends\":[{\"id\":0,\"name\":\"Rebecca Hardy\"},{\"id\":1,\"name\":\"Sutton Briggs\"},{\"id\":2,\"name\":\"Dena Campos\"}],\"greeting\":\"Hello, Mercedes Brewer! You have 7 unread messages.\",\"favoriteFruit\":\"strawberry\"}" + .getBytes("UTF-8"); + mFormat2 = new Message("test", 0, 0, null, format2, timestamp); + + byte format3[] = "{\"timestamp\":\"02001.July.04 AD 12:08 PM\",\"eid\":\"ME_WORKFLOW_SUMMARY\",\"id\":0,\"guid\":\"0436b17b-e78a-4e82-accf-743bf1f0b884\",\"isActive\":false,\"balance\":\"$3,561.87\",\"picture\":\"http://placehold.it/32x32\",\"age\":23,\"eyeColor\":\"green\",\"name\":\"Mercedes Brewer\",\"gender\":\"female\",\"company\":\"MALATHION\",\"email\":\"mercedesbrewer@malathion.com\",\"phone\":\"+1 (848) 471-3000\",\"address\":\"786 Gilmore Court, Brule, Maryland, 3200\",\"about\":\"Quis nostrud Lorem deserunt esse ut reprehenderit aliqua nisi et sunt mollit est. Cupidatat incididunt minim anim eiusmod culpa elit est dolor ullamco. Aliqua cillum eiusmod ullamco nostrud Lorem sit amet Lorem aliquip esse esse velit.\\r\\n\",\"registered\":\"2014-01-14T13:07:28 +08:00\",\"latitude\":47.672012,\"longitude\":102.788623,\"tags\":[\"amet\",\"amet\",\"dolore\",\"eu\",\"qui\",\"fugiat\",\"laborum\"],\"friends\":[{\"id\":0,\"name\":\"Rebecca Hardy\"},{\"id\":1,\"name\":\"Sutton Briggs\"},{\"id\":2,\"name\":\"Dena Campos\"}],\"greeting\":\"Hello, Mercedes Brewer! You have 7 unread messages.\",\"favoriteFruit\":\"strawberry\"}" + .getBytes("UTF-8"); + mFormat3 = new Message("test", 0, 0, null, format3, timestamp); + + byte invalidDate[] = "{\"timestamp\":\"11111111\",\"eid\":\"ME_WORKFLOW_SUMMARY\",\"id\":0,\"guid\":\"0436b17b-e78a-4e82-accf-743bf1f0b884\",\"isActive\":false,\"balance\":\"$3,561.87\",\"picture\":\"http://placehold.it/32x32\",\"age\":23,\"eyeColor\":\"green\",\"name\":\"Mercedes Brewer\",\"gender\":\"female\",\"company\":\"MALATHION\",\"email\":\"mercedesbrewer@malathion.com\",\"phone\":\"+1 (848) 471-3000\",\"address\":\"786 Gilmore Court, Brule, Maryland, 3200\",\"about\":\"Quis nostrud Lorem deserunt esse ut reprehenderit aliqua nisi et sunt mollit est. Cupidatat incididunt minim anim eiusmod culpa elit est dolor ullamco. Aliqua cillum eiusmod ullamco nostrud Lorem sit amet Lorem aliquip esse esse velit.\\r\\n\",\"registered\":\"2014-01-14T13:07:28 +08:00\",\"latitude\":47.672012,\"longitude\":102.788623,\"tags\":[\"amet\",\"amet\",\"dolore\",\"eu\",\"qui\",\"fugiat\",\"laborum\"],\"friends\":[{\"id\":0,\"name\":\"Rebecca Hardy\"},{\"id\":1,\"name\":\"Sutton Briggs\"},{\"id\":2,\"name\":\"Dena Campos\"}],\"greeting\":\"Hello, Mercedes Brewer! You have 7 unread messages.\",\"favoriteFruit\":\"strawberry\"}" + .getBytes("UTF-8"); + mInvalidDate = new Message("test", 0, 0, null, invalidDate, timestamp); + + byte isoFormat[] = "{\"timestamp\":\"2006-01-02T15:04:05Z\",\"eid\":\"ME_WORKFLOW_SUMMARY\",\"id\":0,\"guid\":\"0436b17b-e78a-4e82-accf-743bf1f0b884\",\"isActive\":false,\"balance\":\"$3,561.87\",\"picture\":\"http://placehold.it/32x32\",\"age\":23,\"eyeColor\":\"green\",\"name\":\"Mercedes Brewer\",\"gender\":\"female\",\"company\":\"MALATHION\",\"email\":\"mercedesbrewer@malathion.com\",\"phone\":\"+1 (848) 471-3000\",\"address\":\"786 Gilmore Court, Brule, Maryland, 3200\",\"about\":\"Quis nostrud Lorem deserunt esse ut reprehenderit aliqua nisi et sunt mollit est. Cupidatat incididunt minim anim eiusmod culpa elit est dolor ullamco. Aliqua cillum eiusmod ullamco nostrud Lorem sit amet Lorem aliquip esse esse velit.\\r\\n\",\"registered\":\"2014-01-14T13:07:28 +08:00\",\"latitude\":47.672012,\"longitude\":102.788623,\"tags\":[\"amet\",\"amet\",\"dolore\",\"eu\",\"qui\",\"fugiat\",\"laborum\"],\"friends\":[{\"id\":0,\"name\":\"Rebecca Hardy\"},{\"id\":1,\"name\":\"Sutton Briggs\"},{\"id\":2,\"name\":\"Dena Campos\"}],\"greeting\":\"Hello, Mercedes Brewer! You have 7 unread messages.\",\"favoriteFruit\":\"strawberry\"}" + .getBytes("UTF-8"); + mISOFormat = new Message("test", 0, 0, null, isoFormat, timestamp); + + byte nanosecondISOFormat[] = "{\"timestamp\":\"2006-01-02T23:59:59.999999999Z\",\"eid\":\"ME_WORKFLOW_SUMMARY\"}" + .getBytes("UTF-8"); + mNanosecondISOFormat = new Message("test", 0, 0, null, nanosecondISOFormat, timestamp); + + byte nestedISOFormat[] = "{\"meta_data\":{\"created\":\"2016-01-11T11:50:28.647Z\"},\"eid\":\"ME_WORKFLOW_SUMMARY\",\"id\":0,\"guid\":\"0436b17b-e78a-4e82-accf-743bf1f0b884\",\"isActive\":false,\"balance\":\"$3,561.87\",\"picture\":\"http://placehold.it/32x32\",\"age\":23,\"eyeColor\":\"green\",\"name\":\"Mercedes Brewer\",\"gender\":\"female\",\"company\":\"MALATHION\",\"email\":\"mercedesbrewer@malathion.com\",\"phone\":\"+1 (848) 471-3000\",\"address\":\"786 Gilmore Court, Brule, Maryland, 3200\",\"about\":\"Quis nostrud Lorem deserunt esse ut reprehenderit aliqua nisi et sunt mollit est. Cupidatat incididunt minim anim eiusmod culpa elit est dolor ullamco. Aliqua cillum eiusmod ullamco nostrud Lorem sit amet Lorem aliquip esse esse velit.\\r\\n\",\"registered\":\"2014-01-14T13:07:28 +08:00\",\"latitude\":47.672012,\"longitude\":102.788623,\"tags\":[\"amet\",\"amet\",\"dolore\",\"eu\",\"qui\",\"fugiat\",\"laborum\"],\"friends\":[{\"id\":0,\"name\":\"Rebecca Hardy\"},{\"id\":1,\"name\":\"Sutton Briggs\"},{\"id\":2,\"name\":\"Dena Campos\"}],\"greeting\":\"Hello, Mercedes Brewer! You have 7 unread messages.\",\"favoriteFruit\":\"strawberry\"}" + .getBytes("UTF-8"); + mNestedISOFormat = new Message("test", 0, 0, null, nestedISOFormat, timestamp); + } + + @Test + public void testExtractDateUsingInputPattern() throws Exception { + Mockito.when(mConfig.getMessageTimestampName()).thenReturn("timestamp"); + Mockito.when(mConfig.getPartitionPrefixMapping()).thenReturn("{\"ME_WORKFLOW_SUMMARY\":\"summary\",\"DEFAULT\":\"raw\"}"); + + Mockito.when(mConfig.getMessageTimestampInputPattern()).thenReturn("yyyy-MM-dd HH:mm:ss"); + assertEquals("summary/2014-07-31", new PatternDateMessageParser(mConfig).extractPartitions(mFormat1)[0]); + + Mockito.when(mConfig.getMessageTimestampInputPattern()).thenReturn("yyyy/MM/d"); + assertEquals("raw/2014-10-25", new PatternDateMessageParser(mConfig).extractPartitions(mFormat2)[0]); + + Mockito.when(mConfig.getMessageTimestampInputPattern()).thenReturn("yyyyy.MMMMM.dd GGG hh:mm aaa"); + assertEquals("summary/2001-07-04", new PatternDateMessageParser(mConfig).extractPartitions(mFormat3)[0]); + + Mockito.when(mConfig.getMessageTimestampInputPattern()).thenReturn("yyyy-MM-dd'T'HH:mm:ss'Z'"); + assertEquals("summary/2006-01-02", new PatternDateMessageParser(mConfig).extractPartitions(mISOFormat)[0]); + + Mockito.when(mConfig.getMessageTimestampInputPattern()).thenReturn("yyyy-MM-dd'T'HH:mm:ss"); + assertEquals("summary/2006-01-02", new PatternDateMessageParser(mConfig).extractPartitions(mISOFormat)[0]); + + Mockito.when(mConfig.getMessageTimestampInputPattern()).thenReturn("yyyy-MM-dd'T'HH:mm:ss"); + assertEquals("summary/2006-01-03", new PatternDateMessageParser(mConfig).extractPartitions(mNanosecondISOFormat)[0]); + } + + @Test + public void testExtractDateWhenPrefixIsNotSet() throws Exception { + Mockito.when(mConfig.getMessageTimestampName()).thenReturn("timestamp"); + Mockito.when(mConfig.isPartitionPrefixEnabled()).thenReturn(false); + + Mockito.when(mConfig.getMessageTimestampInputPattern()).thenReturn("yyyy-MM-dd HH:mm:ss"); + assertEquals("2014-07-31", new PatternDateMessageParser(mConfig).extractPartitions(mFormat1)[0]); + } + + @Test + public void testExtractDateWithWrongEntries() throws Exception { + Mockito.when(mConfig.getMessageTimestampName()).thenReturn("timestamp"); + Mockito.when(mConfig.getString("secor.partition.output_dt_format", "yyyy-MM-dd")).thenReturn("yyyy-MM-dd"); + Mockito.when(mConfig.getMessageTimestampName()).thenReturn("timestamp"); + Mockito.when(mConfig.getPartitionPrefixMapping()).thenReturn("{\"ME_WORKFLOW_SUMMARY\":\"summary\",\"DEFAULT\":\"raw\"}"); + + // invalid date + Mockito.when(mConfig.getMessageTimestampInputPattern()).thenReturn("yyyy-MM-dd HH:mm:ss"); // any pattern + assertEquals("raw/" + PatternDateMessageParser.defaultDate, new PatternDateMessageParser( + mConfig).extractPartitions(mInvalidDate)[0]); + + // invalid pattern + Mockito.when(mConfig.getMessageTimestampInputPattern()).thenReturn("yyy-MM-dd :s"); + assertEquals("raw/" + PatternDateMessageParser.defaultDate, new PatternDateMessageParser( + mConfig).extractPartitions(mFormat1)[0]); + } + + @Test + public void testDatePrefix() throws Exception { + Mockito.when(mConfig.getMessageTimestampName()).thenReturn("timestamp"); + Mockito.when(mConfig.getPartitionPrefixMapping()).thenReturn("{\"ME_WORKFLOW_SUMMARY\":\"summary\",\"DEFAULT\":\"raw\"}"); + Mockito.when(mConfig.getMessageTimestampInputPattern()).thenReturn("yyyy-MM-dd HH:mm:ss"); + Mockito.when(mConfig.getString("secor.partition.output_dt_format", "yyyy-MM-dd")).thenReturn("yyyy-MM-dd"); + + assertEquals("summary/2014-07-31", new PatternDateMessageParser(mConfig).extractPartitions(mFormat1)[0]); + } + + @Test + public void testCustomDateFormat() throws Exception { + Mockito.when(mConfig.getMessageTimestampName()).thenReturn("timestamp"); + Mockito.when(mConfig.getMessageTimestampInputPattern()).thenReturn("yyyy-MM-dd HH:mm:ss"); + Mockito.when(mConfig.getPartitionPrefixMapping()).thenReturn("{\"ME_WORKFLOW_SUMMARY\":\"summary\",\"DEFAULT\":\"raw\"}"); + Mockito.when(mConfig.getPartitionOutputDtFormat()).thenReturn("'yr='yyyy'/mo='MM'/dy='dd'/hr='HH"); + + assertEquals("summary/yr=2014/mo=07/dy=31/hr=04", new PatternDateMessageParser(mConfig).extractPartitions(mFormat1)[0]); + } +}