Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: (#405) somatic variant qc #407

Open
wants to merge 20 commits into
base: main
Choose a base branch
from
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
finializing somatic_variant_checking
  • Loading branch information
giacuong171 committed Aug 2, 2023
commit e9cb75b3cea118e761039cc394cb078b21b341a5
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,17 @@ def get_contigs_from_bed_file(bedfile):


def get_variant_type(ref, alt):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As we discussed before: perhaps you want to remove possible - characters in ref & alt before checking their length (this would alleviate problems with admittedly badly encoded variants).
Then you have to consider the case of inserts (len(alt) > len(ref))

ref = ref.replace("-", "")
alt = alt.replace("-", "")
variant_type = "unknown"
if len(ref) == len(alt):
if len(alt) == 1:
return "SNV"
if len(ref) == 1:
variant_type = "snv"
else:
return "ONV"
elif len(alt) < len(ref):
return "indel"
variant_type = "onv"
else:
variant_type = "indel"
return variant_type


def check_sp_read(variant, pos_sample, minimal, limited):
Expand Down Expand Up @@ -70,7 +74,6 @@ def assign_class_snvs(variant, mt_mat):
mt_mat[4] += 1
elif temp in ["G>T", "C>A"]:
mt_mat[5] += 1
return mt_mat


def process_vcf_file(
Expand Down Expand Up @@ -129,14 +132,14 @@ def process_vcf_file(
] += 1

# Need to check multi allelic. Users shouldn't input multi allelic vcf file.
if get_variant_type(variant.REF, variant.ALT[0]) == "SNV":
if get_variant_type(variant.REF, variant.ALT[0]) == "snv":
infor["n_snps"] += 1
infor["mt_classes"] = assign_class_snvs(variant, infor["mt_classes"])
assign_class_snvs(variant, infor["mt_classes"])
elif get_variant_type(variant.REF, variant.ALT[0]) == "indel":
# More for indels
infor["n_indels"] += 1
infor["indels_length"].append(abs(len(variant.REF) - len(variant.ALT[0])))
elif get_variant_type(variant.REF, variant.ALT[0]) == "ONV":
elif get_variant_type(variant.REF, variant.ALT[0]) == "onv":
infor["n_onvs"] += 1
# Gathering information of variants in comparison to hard mapped regions
else:
Expand Down