Skip to content

Commit

Permalink
ALS-7165: Update table name to include dataset id
Browse files Browse the repository at this point in the history
  • Loading branch information
ramari16 committed Sep 4, 2024
1 parent 9d0a260 commit eeb971b
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import org.apache.avro.Schema;
import org.apache.avro.SchemaBuilder;
import org.apache.avro.file.Codec;
import org.apache.avro.file.CodecFactory;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericData;
Expand All @@ -21,10 +20,15 @@

public class PfbWriter implements ResultWriter {

public static final String PATIENT_TABLE_PREFIX = "pic-sure-";
private Logger log = LoggerFactory.getLogger(PfbWriter.class);

private final Schema metadataSchema;
private final Schema nodeSchema;

private final String queryId;

private final String patientTableName;
private SchemaBuilder.FieldAssembler<Schema> entityFieldAssembler;

private List<String> fields;
Expand All @@ -35,8 +39,10 @@ public class PfbWriter implements ResultWriter {

private static final Set<String> SINGULAR_FIELDS = Set.of("patient_id");

public PfbWriter(File tempFile) {
file = tempFile;
public PfbWriter(File tempFile, String queryId) {
this.file = tempFile;
this.queryId = queryId;
this.patientTableName = formatFieldName(PATIENT_TABLE_PREFIX + queryId);
entityFieldAssembler = SchemaBuilder.record("entity")
.namespace("edu.harvard.dbmi")
.fields();
Expand All @@ -58,7 +64,7 @@ public PfbWriter(File tempFile) {
@Override
public void writeHeader(String[] data) {
fields = Arrays.stream(data.clone()).map(this::formatFieldName).collect(Collectors.toList());
SchemaBuilder.FieldAssembler<Schema> patientRecords = SchemaBuilder.record("patientData")
SchemaBuilder.FieldAssembler<Schema> patientRecords = SchemaBuilder.record(patientTableName)
.fields();

fields.forEach(field -> {
Expand Down Expand Up @@ -163,7 +169,7 @@ public void writeMultiValueEntity(Collection<List<List<String>>> entities) {

GenericRecord entityRecord = new GenericData.Record(entitySchema);
entityRecord.put("object", patientData);
entityRecord.put("name", "patientData");
entityRecord.put("name", patientTableName);
entityRecord.put("id", patientId);

try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import java.util.UUID;

import static org.junit.jupiter.api.Assertions.*;

Expand All @@ -15,7 +16,7 @@ public class PfbWriterTest {

@Test
public void writeValidPFB() {
PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"));
PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"), UUID.randomUUID().toString());

pfbWriter.writeHeader(new String[] {"patient_id", "\\demographics\\age\\", "\\phs123\\stroke\\"});
List<List<String>> nullableList = new ArrayList<>();
Expand All @@ -38,21 +39,21 @@ public void writeValidPFB() {

@Test
public void formatFieldName_spacesAndBackslashes_replacedWithUnderscore() {
PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"));
PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"), UUID.randomUUID().toString());
String formattedName = pfbWriter.formatFieldName("\\Topmed Study Accession with Subject ID\\\\");
assertEquals("_Topmed_Study_Accession_with_Subject_ID__", formattedName);
}

@Test
public void formatFieldName_startsWithDigit_prependUnderscore() {
PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"));
PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"), UUID.randomUUID().toString());
String formattedName = pfbWriter.formatFieldName("123Topmed Study Accession with Subject ID\\\\");
assertEquals("_123Topmed_Study_Accession_with_Subject_ID__", formattedName);
}

@Test
public void formatFieldName_randomGarbage_replaceWithUnderscore() {
PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"));
PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"), UUID.randomUUID().toString());
String formattedName = pfbWriter.formatFieldName("$$$my garbage @vro var!able nam#");
assertEquals("___my_garbage__vro_var_able_nam_", formattedName);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,17 +135,18 @@ private AsyncResult initializeResult(Query query) throws ClassNotFoundException,
throw new RuntimeException("UNSUPPORTED RESULT TYPE");
}

String queryId = UUIDv5.UUIDFromString(query.toString()).toString();
ResultWriter writer;
if (ResultType.DATAFRAME_PFB.equals(query.getExpectedResultType())) {
writer = new PfbWriter(File.createTempFile("result-" + System.nanoTime(), ".avro"));
writer = new PfbWriter(File.createTempFile("result-" + System.nanoTime(), ".avro"), queryId);
} else {
writer = new CsvWriter(File.createTempFile("result-" + System.nanoTime(), ".sstmp"));
}

AsyncResult result = new AsyncResult(query, p, writer)
.setStatus(AsyncResult.Status.PENDING)
.setQueuedTime(System.currentTimeMillis())
.setId(UUIDv5.UUIDFromString(query.toString()).toString());
.setId(queryId);
query.setId(result.getId());
results.put(result.getId(), result);
return result;
Expand Down

0 comments on commit eeb971b

Please sign in to comment.