From 2d6d3e79f4170d1807a8db4fa40298ff7f93ed6f Mon Sep 17 00:00:00 2001 From: Ryan Amari Date: Mon, 26 Feb 2024 10:46:17 -0500 Subject: [PATCH] ALS-5981: Add gene and consequence to variant spec --- .../hpds/data/genotype/VariantSpec.java | 62 +++++++++++++++++-- .../hpds/etl/genotype/NewVCFLoader.java | 13 +++- 2 files changed, 68 insertions(+), 7 deletions(-) diff --git a/data/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/data/genotype/VariantSpec.java b/data/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/data/genotype/VariantSpec.java index b244e407..83359789 100644 --- a/data/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/data/genotype/VariantSpec.java +++ b/data/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/data/genotype/VariantSpec.java @@ -1,6 +1,7 @@ package edu.harvard.hms.dbmi.avillach.hpds.data.genotype; import java.io.Serializable; +import java.util.Objects; import org.apache.commons.csv.CSVRecord; @@ -13,7 +14,22 @@ public class VariantCoords implements Serializable { public String ref; public String alt; public int qual; - public String format; + public String format; + public String gene; + public String consequence; + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + VariantCoords that = (VariantCoords) o; + return qual == that.qual && Objects.equals(chromosome, that.chromosome) && Objects.equals(offset, that.offset) && Objects.equals(name, that.name) && Objects.equals(ref, that.ref) && Objects.equals(alt, that.alt) && Objects.equals(format, that.format) && Objects.equals(gene, that.gene) && Objects.equals(consequence, that.consequence); + } + + @Override + public int hashCode() { + return Objects.hash(chromosome, offset, name, ref, alt, qual, format, gene, consequence); + } } public static int CHR = 0, OFF = 1, NAME = 2, REF = 3, ALT = 4, QUAL = 5, FILTER = 6, INFO = 7, FORMAT = 8, DATA = 9; public long heteroOffset; @@ -32,6 +48,20 @@ public VariantSpec(CSVRecord r) { }catch(NumberFormatException e) { this.metadata.qual = -1; } + + String[] variantInfo = r.get(INFO).split("[=;]"); + String gene = "NULL"; + String consequence = "NULL"; + for (int i = 0; i < variantInfo.length; i = i + 2) { + if ("Gene_with_variant".equals(variantInfo[i])) { + gene = variantInfo[i + 1]; + } + if ("Variant_consequence_calculated".equals(variantInfo[i])) { + consequence = variantInfo[i + 1]; + } + } + this.metadata.gene = gene; + this.metadata.consequence = consequence; } public VariantSpec(String variant) { @@ -43,28 +73,48 @@ public VariantSpec(String variant) { this.metadata.ref = segments[2]; this.metadata.alt = segments[3]; this.metadata.qual = -1; + this.metadata.gene = segments[4]; + this.metadata.consequence = segments[5]; } public String specNotation() { return this.metadata.chromosome + "," + this.metadata.offset + "," + - this.metadata.ref + "," + this.metadata.alt; + this.metadata.ref + "," + this.metadata.alt + "," + this.metadata.gene + "," + this.metadata.consequence; } - @Override public int compareTo(VariantSpec o) { int ret = 0; ret = this.metadata.chromosome.compareTo(o.metadata.chromosome); - if(ret == 0) { + if (ret == 0) { ret = this.metadata.offset.compareTo(o.metadata.offset); } - if(ret == 0) { + if (ret == 0) { ret = this.metadata.ref.compareTo(o.metadata.ref); } - if(ret == 0) { + if (ret == 0) { ret = this.metadata.alt.compareTo(o.metadata.alt); } + if (ret == 0) { + ret = this.metadata.gene.compareTo(o.metadata.gene); + } + if (ret == 0) { + ret = this.metadata.consequence.compareTo(o.metadata.consequence); + } + return ret; } + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + VariantSpec that = (VariantSpec) o; + return heteroOffset == that.heteroOffset && homoOffset == that.homoOffset && Objects.equals(metadata, that.metadata); + } + + @Override + public int hashCode() { + return Objects.hash(heteroOffset, homoOffset, metadata); + } } \ No newline at end of file diff --git a/etl/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/etl/genotype/NewVCFLoader.java b/etl/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/etl/genotype/NewVCFLoader.java index 43f6c351..996c4078 100644 --- a/etl/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/etl/genotype/NewVCFLoader.java +++ b/etl/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/etl/genotype/NewVCFLoader.java @@ -470,8 +470,19 @@ private void setMasksForSample(char[][] zygosityMaskStrings, int index, int star } private String currentSpecNotation() { + String[] variantInfo = currentLineSplit[7].split("[=;]"); + String gene = "NULL"; + String consequence = "NULL"; + for (int i = 0; i < variantInfo.length; i = i + 2) { + if ("Gene_with_variant".equals(variantInfo[i])) { + gene = variantInfo[i + 1]; + } + if ("Variant_consequence_calculated".equals(variantInfo[i])) { + consequence = variantInfo[i + 1]; + } + } return currentLineSplit[0] + "," + currentLineSplit[1] + "," + currentLineSplit[3] + "," - + currentLineSplit[4]; + + currentLineSplit[4] + "," + gene + "," + consequence; } public void readHeaders(ConcurrentHashMap infoStores) throws IOException {