From b83eca30d56f6ab2ec616ddf367716cc5304dd3a Mon Sep 17 00:00:00 2001 From: Nobuaki Karasawa Date: Thu, 6 Feb 2025 20:47:51 +0900 Subject: [PATCH] feat: add prefer-deletion-insertion option to coding-dna --- src/varity/hgvs.clj | 2 ++ src/varity/vcf_to_hgvs.clj | 15 +++++++++++++-- src/varity/vcf_to_hgvs/coding_dna.clj | 3 ++- src/varity/vcf_to_hgvs/protein.clj | 7 +++++-- test/varity/vcf_to_hgvs_test.clj | 9 +++++++++ 5 files changed, 31 insertions(+), 5 deletions(-) diff --git a/src/varity/hgvs.clj b/src/varity/hgvs.clj index 9e56af4..8d9d58d 100644 --- a/src/varity/hgvs.clj +++ b/src/varity/hgvs.clj @@ -10,6 +10,8 @@ {:prefer-deletion? true} {:prefer-insertion? false} {:prefer-insertion? true} + {:prefer-deletion-insertion? false} + {:prefer-deletion-insertion? true} {:prefer-extension-for-initial-codon-alt? false} {:prefer-extension-for-initial-codon-alt? true}]) diff --git a/src/varity/vcf_to_hgvs.clj b/src/varity/vcf_to_hgvs.clj index fb1555d..11bad0f 100644 --- a/src/varity/vcf_to_hgvs.clj +++ b/src/varity/vcf_to_hgvs.clj @@ -69,6 +69,7 @@ (def ^:private default-options {:prefer-deletion? false :prefer-insertion? false + :prefer-deletion-insertion? false :prefer-extension-for-initial-codon-alt? false :tx-margin 5000 :verbose? false}) @@ -91,6 +92,10 @@ :prefer-insertion? Prefer insertion (e.g. \"c.9_10insAGG\") to repeated sequences (e.g. \"c.4_6[3]\"), default false. + :prefer-deletion-insertion? Prefer indel (e.g. \"c.18_20delATCinsGAT\") + to repeated sequences and inversion (e.g. \"c.18_20inv\"), + default false. + :tx-margin The length of transcription margin, up to a maximum of 10000, default 5000. @@ -158,8 +163,10 @@ :prefer-deletion? Prefer deletion (e.g. \"p.P7_H8del\") to repeated sequences (e.g. \"p.P5_H6[1]\"), default false. - :prefer-insertion? Prefer insertion (e.g. \"c.H9_L10insRPH\") to repeated - sequences (e.g. \"c.R4_H6[3]\"), default false. + :prefer-insertion? Prefer insertion (e.g. \"p.H9_L10insRPH\") to repeated + sequences (e.g. \"p.R4_H6[3]\"), default false. + + :prefer-deletion-insertion? Prefer indel to repeated sequences, default false. :prefer-extension-for-initial-codon-alt? Prefer extension to protein unknown variant that affects initial codon, default false. @@ -234,6 +241,10 @@ :prefer-insertion? Prefer insertion (e.g. \"c.9_10insAGG\") to repeated sequences (e.g. \"c.4_6[3]\"), default false. + :prefer-deletion-insertion? Prefer indel (e.g. \"c.18_20delATCinsGAT\") + to repeated sequences and inversion (e.g. \"c.18_20inv\"), + default false. + :prefer-extension-for-initial-codon-alt? Prefer extension to protein unknown variant that affects initial codon, default false. diff --git a/src/varity/vcf_to_hgvs/coding_dna.clj b/src/varity/vcf_to_hgvs/coding_dna.clj index 43ccfd9..1ff0993 100644 --- a/src/varity/vcf_to_hgvs/coding_dna.clj +++ b/src/varity/vcf_to_hgvs/coding_dna.clj @@ -68,7 +68,7 @@ :reverse (repeat-info-backward seq-rdr rg pos alt type)))) (defn- mutation-type - [seq-rdr rg pos ref alt {:keys [prefer-deletion? prefer-insertion?]}] + [seq-rdr rg pos ref alt {:keys [prefer-deletion? prefer-insertion? prefer-deletion-insertion?]}] (if (re-matches #"[acgntACGNT]*" alt) (let [[ref-only alt-only offset _] (diff-bases ref alt) nrefo (count ref-only) @@ -79,6 +79,7 @@ (or (= nrefo nalto 0) (= nrefo nalto 1)) :substitution (and prefer-deletion? (pos? nrefo) (zero? nalto)) :deletion (and prefer-insertion? (zero? nrefo) (pos? nalto)) :insertion + (and prefer-deletion-insertion? (pos? nrefo) (pos? nalto)) :indel (= ref-only (util-seq/revcomp alt-only)) :inversion (and (some? unit) (= ref-repeat 1) (= alt-repeat 2)) :duplication (and (some? unit) (pos? alt-repeat) diff --git a/src/varity/vcf_to_hgvs/protein.clj b/src/varity/vcf_to_hgvs/protein.clj index 78e2c8a..64fb6b0 100644 --- a/src/varity/vcf_to_hgvs/protein.clj +++ b/src/varity/vcf_to_hgvs/protein.clj @@ -382,7 +382,8 @@ [{:keys [strand] :as rg} pos ref alt {:keys [ref-exon-seq ref-prot-seq alt-exon-seq alt-rg ref-include-ter-site ref-include-from-ter-start-and-over-ter-end utr-variant] :as seq-info} - {:keys [prefer-deletion? prefer-insertion? prefer-extension-for-initial-codon-alt?]}] + {:keys [prefer-deletion? prefer-insertion? prefer-deletion-insertion? + prefer-extension-for-initial-codon-alt?]}] (cond (:overlap-exon-intron-boundary seq-info) {:type :overlap-exon-intron-boundary, :pos nil, :ref nil, :alt nil} @@ -466,7 +467,9 @@ (and prefer-deletion? (pos? nprefo) (zero? npalto)) :deletion (and prefer-insertion? (zero? nprefo) (pos? npalto)) :insertion (and (some? unit) (= ref-repeat 1) (= alt-repeat 2)) :duplication - (and (some? unit) (pos? alt-repeat)) :repeated-seqs + (and (some? unit) (pos? alt-repeat)) (if (and prefer-deletion-insertion? (pos? npref) (pos? npalto)) + :indel + :repeated-seqs) (and (pos? nprefo) (zero? npalto)) :deletion (and (pos? nprefo) (pos? npalto)) (if (= base-ppos 1) :extension diff --git a/test/varity/vcf_to_hgvs_test.clj b/test/varity/vcf_to_hgvs_test.clj index f9c0ef9..b999ee6 100644 --- a/test/varity/vcf_to_hgvs_test.clj +++ b/test/varity/vcf_to_hgvs_test.clj @@ -166,6 +166,15 @@ "chr3" 126492636 "C" "CCTCT" {:prefer-insertion? true} '("NM_001165974:c.1690-121_1690-120insAGAG" "NM_144639:c.1510-121_1510-120insAGAG") + ;; prefer-deletion-inserion? + ;; inversion cf. rs267608133 (+) + "chr2" 47806747 "AAAACTTTTTTTTTTTTTTTTTTAA" "ATTAAAAAAAAAAAAAAAAAAGTTT" {:prefer-deletion-insertion? true} '("NM_000179:c.4002-31_4002-8delAAACTTTTTTTTTTTTTTTTTTAAinsTTAAAAAAAAAAAAAAAAAAGTTT" + "NM_001281492:c.3612-31_3612-8delAAACTTTTTTTTTTTTTTTTTTAAinsTTAAAAAAAAAAAAAAAAAAGTTT" + "NM_001281493:c.3096-31_3096-8delAAACTTTTTTTTTTTTTTTTTTAAinsTTAAAAAAAAAAAAAAAAAAGTTT" + "NM_001281494:c.3096-31_3096-8delAAACTTTTTTTTTTTTTTTTTTAAinsTTAAAAAAAAAAAAAAAAAAGTTT" + "NM_025133:c.*1347_*1370delTTAAAAAAAAAAAAAAAAAAGTTTinsAAACTTTTTTTTTTTTTTTTTTAA" + "NM_001190274:c.*1347_*1370delTTAAAAAAAAAAAAAAAAAAGTTTinsAAACTTTTTTTTTTTTTTTTTTAA") + ;; tx-margin "chr5" 1295113 "G" "A" {:tx-margin 5000} '("NM_001193376:c.-124C>T" "NM_198253:c.-124C>T")