From cb53e96a24d7a9395528229668b6b9123a16783b Mon Sep 17 00:00:00 2001 From: Luke Sikina Date: Tue, 23 Jan 2024 11:10:03 -0500 Subject: [PATCH] [ALS-5755] Switch time series processor to ISO timestamps - Make service that does this - Isolate time series logic a bit more - Tests --- .../hpds/processing/AbstractProcessor.java | 2 +- .../TimeSeriesConversionService.java | 18 ++++++++++++ .../{ => timeseries}/TimeseriesProcessor.java | 28 +++++++++++++++---- .../TimeSeriesConversionServiceTest.java | 22 +++++++++++++++ .../avillach/hpds/service/QueryService.java | 1 + 5 files changed, 64 insertions(+), 7 deletions(-) create mode 100644 processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/timeseries/TimeSeriesConversionService.java rename processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/{ => timeseries}/TimeseriesProcessor.java (82%) create mode 100644 processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/timeseries/TimeSeriesConversionServiceTest.java diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java index 8276e3bb..7a1a5063 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java @@ -672,7 +672,7 @@ protected PhenoCube getCube(String path) { * Useful for federated pic-sure's where there are fewer * guarantees about concept paths. */ - protected Optional> nullableGetCube(String path) { + public Optional> nullableGetCube(String path) { try { return Optional.ofNullable(store.get(path)); } catch (InvalidCacheLoadException | ExecutionException e) { diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/timeseries/TimeSeriesConversionService.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/timeseries/TimeSeriesConversionService.java new file mode 100644 index 00000000..2f2229f8 --- /dev/null +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/timeseries/TimeSeriesConversionService.java @@ -0,0 +1,18 @@ +package edu.harvard.hms.dbmi.avillach.hpds.processing.timeseries; + +import org.springframework.stereotype.Service; + +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.TimeZone; + +@Service +public class TimeSeriesConversionService { + + public String toISOString(Long unixTimeStamp, TimeZone timeZone) { + Date date = new Date(unixTimeStamp); + SimpleDateFormat isoFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.S'Z'"); + isoFormat.setTimeZone(timeZone); + return isoFormat.format(date); + } +} diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/TimeseriesProcessor.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/timeseries/TimeseriesProcessor.java similarity index 82% rename from processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/TimeseriesProcessor.java rename to processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/timeseries/TimeseriesProcessor.java index 2eabdf1d..790e30b7 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/TimeseriesProcessor.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/timeseries/TimeseriesProcessor.java @@ -1,9 +1,13 @@ -package edu.harvard.hms.dbmi.avillach.hpds.processing; +package edu.harvard.hms.dbmi.avillach.hpds.processing.timeseries; import java.io.FileNotFoundException; import java.io.IOException; import java.util.*; +import edu.harvard.hms.dbmi.avillach.hpds.processing.AbstractProcessor; +import edu.harvard.hms.dbmi.avillach.hpds.processing.AsyncResult; +import edu.harvard.hms.dbmi.avillach.hpds.processing.HpdsProcessor; +import edu.harvard.hms.dbmi.avillach.hpds.processing.QueryProcessor; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -36,14 +40,16 @@ public class TimeseriesProcessor implements HpdsProcessor { private Logger log = LoggerFactory.getLogger(QueryProcessor.class); private AbstractProcessor abstractProcessor; + private final TimeSeriesConversionService conversionService; private final String ID_CUBE_NAME; private final int ID_BATCH_SIZE; private final int CACHE_SIZE; @Autowired - public TimeseriesProcessor(AbstractProcessor abstractProcessor) { + public TimeseriesProcessor(AbstractProcessor abstractProcessor, TimeSeriesConversionService conversionService) { this.abstractProcessor = abstractProcessor; + this.conversionService = conversionService; // todo: handle these via spring annotations CACHE_SIZE = Integer.parseInt(System.getProperty("CACHE_SIZE", "100")); ID_BATCH_SIZE = Integer.parseInt(System.getProperty("ID_BATCH_SIZE", "0")); @@ -115,14 +121,24 @@ private void addDataForConcepts(Collection pathList, Set exporte if (cube.isStringType()) { KeyAndValue keyAndValue = (KeyAndValue) kvObj; // "PATIENT_NUM","CONCEPT_PATH","NVAL_NUM","TVAL_CHAR","TIMESTAMP" - String[] entryData = { keyAndValue.getKey().toString(), conceptPath, "", keyAndValue.getValue(), - keyAndValue.getTimestamp().toString() }; + String[] entryData = { + keyAndValue.getKey().toString(), + conceptPath, + "", + keyAndValue.getValue(), + conversionService.toISOString(keyAndValue.getTimestamp(), TimeZone.getTimeZone("UTC")) + }; dataEntries.add(entryData); } else { // numeric KeyAndValue keyAndValue = (KeyAndValue) kvObj; // "PATIENT_NUM","CONCEPT_PATH","NVAL_NUM","TVAL_CHAR","TIMESTAMP" - String[] entryData = { keyAndValue.getKey().toString(), conceptPath, - keyAndValue.getValue().toString(), "", keyAndValue.getTimestamp().toString() }; + String[] entryData = { + keyAndValue.getKey().toString(), + conceptPath, + keyAndValue.getValue().toString(), + "", + conversionService.toISOString(keyAndValue.getTimestamp(), TimeZone.getTimeZone("UTC")) + }; dataEntries.add(entryData); } //batch exports so we don't take double memory (valuesForKeys + dataEntries could be a lot of data points) diff --git a/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/timeseries/TimeSeriesConversionServiceTest.java b/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/timeseries/TimeSeriesConversionServiceTest.java new file mode 100644 index 00000000..6fc61467 --- /dev/null +++ b/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/timeseries/TimeSeriesConversionServiceTest.java @@ -0,0 +1,22 @@ +package edu.harvard.hms.dbmi.avillach.hpds.processing.timeseries; + +import org.junit.Test; +import org.springframework.beans.factory.annotation.Autowired; + +import java.util.TimeZone; + +import static org.junit.Assert.assertEquals; + + +public class TimeSeriesConversionServiceTest { + + TimeSeriesConversionService subject = new TimeSeriesConversionService(); + + @Test + public void shouldConvertToIsoString() { + String actual = subject.toISOString(0L, TimeZone.getTimeZone("UTC")); + String expected = "1970-01-01T00:00:00.0Z"; + + assertEquals(expected, actual); + } +} \ No newline at end of file diff --git a/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/QueryService.java b/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/QueryService.java index fe120904..c1b69c3b 100644 --- a/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/QueryService.java +++ b/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/QueryService.java @@ -7,6 +7,7 @@ import java.util.function.Predicate; import java.util.stream.Collectors; +import edu.harvard.hms.dbmi.avillach.hpds.processing.timeseries.TimeseriesProcessor; import edu.harvard.hms.dbmi.avillach.hpds.service.util.QueryDecorator; import org.slf4j.Logger; import org.slf4j.LoggerFactory;