Skip to content

Commit

Permalink
fix: add termination site deletion condition to delins
Browse files Browse the repository at this point in the history
  • Loading branch information
nokara26 committed Dec 22, 2023
1 parent 3cb346e commit 367a363
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 22 deletions.
68 changes: 46 additions & 22 deletions src/varity/vcf_to_hgvs/protein.clj
Original file line number Diff line number Diff line change
Expand Up @@ -227,9 +227,21 @@
(and (seq ref-only) (empty? alt-only)) [ref-only :del])]
(common/repeat-info seq* pos alt type)))

(defn- get-first-diff-aa-info
[pos ref-seq alt-seq]
(->> (map vector ref-seq alt-seq)
(drop (dec pos))
(map-indexed vector)
(drop-while (fn [[_ [r a]]] (= r a)))
first
((fn [[i [r a]]]
{:ppos (+ pos i)
:pref (str r)
:palt (str a)}))))

(defn- ->protein-variant
[{:keys [strand] :as rg} pos ref alt
{:keys [ref-exon-seq ref-prot-seq alt-exon-seq alt-rg] :as seq-info}
{:keys [ref-exon-seq ref-prot-seq alt-exon-seq alt-rg ref-include-ter-site] :as seq-info}
{:keys [prefer-deletion? prefer-insertion?]}]
(cond
(= ref-exon-seq alt-exon-seq)
Expand Down Expand Up @@ -277,7 +289,9 @@
(= palt (subs pref 0 (count palt))))
(= (first palt-only) \*))
:fs-ter-substitution
:frame-shift)
(if ref-include-ter-site
:indel
:frame-shift))
(or (and (zero? nprefo) (zero? npalto))
(and (= nprefo 1) (= npalto 1))) :substitution
(and prefer-deletion? (pos? nprefo) (zero? npalto)) :deletion
Expand Down Expand Up @@ -347,16 +361,32 @@
(->> (seq ins) (map mut/->long-amino-acid)))))

(defn- protein-indel
[ppos pref palt]
(let [[del ins offset _] (diff-bases pref palt)
ndel (count del)]
(mut/protein-indel (mut/->long-amino-acid (first del))
(coord/protein-coordinate (+ ppos offset))
(when (> ndel 1)
(mut/->long-amino-acid (last del)))
(when (> ndel 1)
(coord/protein-coordinate (+ ppos offset ndel -1)))
(->> (seq ins) (map mut/->long-amino-acid)))))
[ppos pref palt {:keys [ref-prot-seq c-ter-adjusted-alt-prot-seq ref-include-ter-site]}]
(let [[pref palt ppos] (if ref-include-ter-site
(let [{:keys [ppos]} (get-first-diff-aa-info ppos ref-prot-seq c-ter-adjusted-alt-prot-seq)
get-seq-between-pos-ter-site (fn [seq pos]
(-> (pstring/split-at seq (dec pos))
last
(string/split #"\*")
first))
pref (get-seq-between-pos-ter-site ref-prot-seq ppos)
palt (get-seq-between-pos-ter-site c-ter-adjusted-alt-prot-seq ppos)]
[pref palt ppos])
[pref palt ppos])
[del ins offset _] (diff-bases pref palt)
ndel (count del)
include-ter-site? (if ref-include-ter-site
(string/includes? (subs c-ter-adjusted-alt-prot-seq (dec ppos)) "*")
true)]
(if include-ter-site?
(mut/protein-indel (mut/->long-amino-acid (first del))
(coord/protein-coordinate (+ ppos offset))
(when (> ndel 1)
(mut/->long-amino-acid (last del)))
(when (> ndel 1)
(coord/protein-coordinate (+ ppos offset ndel -1)))
(->> (seq ins) (map mut/->long-amino-acid)))
(mut/protein-unknown-mutation))))

(defn- protein-repeated-seqs
[ppos pref palt seq-info]
Expand All @@ -375,17 +405,11 @@
alt-repeat)))

(defn- protein-frame-shift
[ppos seq-info]
(let [[ppos pref palt] (->> (map vector (:ref-prot-seq seq-info) (:alt-prot-seq seq-info))
(drop (dec ppos))
(map-indexed vector)
(drop-while (fn [[_ [r a]]] (= r a)))
first
((fn [[i [r a]]]
[(+ ppos i) (str r) (str a)])))
[ppos {:keys [ref-prot-seq alt-prot-seq] :as seq-info}]
(let [{:keys [ppos pref palt]} (get-first-diff-aa-info ppos ref-prot-seq alt-prot-seq)
[_ _ offset _] (diff-bases pref palt)
alt-prot-seq (format-alt-prot-seq seq-info)
ref (nth (:ref-prot-seq seq-info) (dec (+ ppos offset)))
ref (nth ref-prot-seq (dec (+ ppos offset)))
alt (nth alt-prot-seq (dec (+ ppos offset)))
ter-site (-> seq-info
format-alt-prot-seq
Expand Down Expand Up @@ -433,7 +457,7 @@
:deletion (protein-deletion ppos pref palt)
:duplication (protein-duplication ppos pref palt)
:insertion (protein-insertion ppos pref palt seq-info)
:indel (protein-indel ppos pref palt)
:indel (protein-indel ppos pref palt seq-info)
:repeated-seqs (protein-repeated-seqs ppos pref palt seq-info)
:frame-shift (protein-frame-shift ppos seq-info)
:extension (protein-extension ppos pref palt seq-info)
Expand Down
9 changes: 9 additions & 0 deletions test/varity/vcf_to_hgvs/protein_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,15 @@
[15 20] 15
[5 10] 5)))

(deftest get-first-diff-aa-info-test
(let [ref-seq "ABCDEFGHIJKLMN"]
(are [p alt-seq pos] (= (#'prot/get-first-diff-aa-info pos
ref-seq
alt-seq)
p)
{:ppos 6 :pref "F" :palt "G"} "ABCDEG" 4
{:ppos 13 :pref "M" :palt "K"} "ACBDEFGHIJKLK" 10)))

(def ref-gene-EGFR
{:bin 125
:name "NM_005228"
Expand Down
15 changes: 15 additions & 0 deletions test/varity/vcf_to_hgvs_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,9 @@
"chr2" 47445589 "CTTACTGAT" "CCC" '("p.L440_D442delinsP" "p.L374_D376delinsP") ; cf. rs63749931 (+)
"chr1" 152111364 "TGC" "TCG" '("p.E617_Q618delinsDE") ; cf. rs35444647 (-)

;; indel include stop codon deletion
"chr8" 116847497 "TCCTTATATAATATGGAACCTTGGTCCAGGTGTTGCGATGATGTCACTGTA" "T" '("p.Y617_I631delinsS")

;; repeated sequences
"chr1" 47438996 "T" "TCCGCAC" '("p.P286_H287[5]") ; cf. rs3046924 (+)
"chr1" 11796319 "C" "CGGCGGC" '("p.A222[3]") ; not actual example (-)
Expand All @@ -230,19 +233,31 @@
"p.S1415Ifs*2") ; https://github.com/chrovis/varity/issues/58
"chr17" 31261816 "CC" "C" '("p.N1542Tfs*11" "p.N1563Tfs*11") ; cf. rs1555619041 (+)

;; Frame shift without termination site
"chr17" 81537070 "G" "GTA" '("p.W514Cfs*?" "p.W490Cfs*?") ; not actual example (+)
"chr17" 9771493 "CCT" "C" '("p.E310Gfs*?" "p.E311Gfs*?") ; not actual example (-)

;; Extension
"chr2" 188974490 "A" "C" '("p.M1Lext-23")
"chr2" 189011772 "T" "C" '("p.*1467Qext*45") ; cf. ClinVar 101338
"chr11" 125655318 "TGA" "TAT" '("p.*477Yext*17" "p.*443Yext*17" "p.*477Yext*24")
;; NOTE: There are very few correct examples...

;; Extension without termination site
"chr17" 81537077 "CT" "C" '("p.*517ext*?" "p.*493ext*?") ; not actual example (+)
"chr17" 9771487 "GT" "G" '("p.*312Yext*?" "p.*313Yext*?") ; not actual example (-)

;; no effect
"chr7" 55181876 "A" "T" '("p.=") ; not actual example (+)
"chr7" 55181874 "TGAT" "T" '("p.=") ; not actual example (+)
"chr7" 55181876 "A" "AGGT" '("p.=") ; not actual example (+)

;; unknown
"chr12" 40393453 "G" "A" '("p.?") ; not actual example (+)

;; unknown because variant includes termination site and alternative termination site is not found
"chr17" 81537074 "GTACTGAGGC" "G" '("p.?") ; not actual example(+)
"chr17" 9771484 "GCAGTTACC" "G" '("p.?") ; not actual example(-)
)))

(cavia-testing "options"
Expand Down

0 comments on commit 367a363

Please sign in to comment.