Skip to content

Commit a2bb642

Browse files
Merge pull request #18 from sanogenetics/hotfix/haploid-vcf
handle haploid vcf rows
2 parents 4e24ead + bb4ffea commit a2bb642

File tree

1 file changed

+23
-17
lines changed

1 file changed

+23
-17
lines changed

src/snps/io/reader.py

+23-17
Original file line numberDiff line numberDiff line change
@@ -1119,29 +1119,35 @@ def _parse_vcf(self, buffer, rsids):
11191119
if len(alt.split(",")) > 1 and alt.split(",")[1] == "<NON_REF>":
11201120
alt = alt.split(",")[0]
11211121

1122-
zygote = line_split[9]
1123-
zygote = zygote.split(":")[0]
1124-
11251122
ref_alt = [ref] + alt.split(",")
11261123

11271124
# skip insertions and deletions
11281125
if sum(map(len, ref_alt)) > len(ref_alt):
11291126
continue
11301127

1131-
zygote1, zygote2 = zygote.replace("|", " ").replace("/", " ").split(" ")
1132-
if zygote1 == "." or zygote2 == ".":
1133-
# assign null genotypes if either allele is None
1134-
genotype = np.nan
1135-
elif (zygote1 == "0" or zygote2 == "0") and ref == ".":
1136-
# sample allele specifies REF allele, which is None
1137-
genotype = np.nan
1138-
elif (zygote1 == "1" or zygote2 == "1") and alt == ".":
1139-
# sample allele specifies ALT allele, which is None
1140-
genotype = np.nan
1141-
else:
1142-
# Could capture full genotype, if REF is None, but genotype is 1/1 or
1143-
# if ALT is None, but genotype is 0/0
1144-
genotype = ref_alt[int(zygote1)] + ref_alt[int(zygote2)]
1128+
# GT (genotype) is usually the first sample-specific field
1129+
# | = diploid phased
1130+
# / = diploid unphased
1131+
# or haploid e.g. male sex chromosome
1132+
genotype = ""
1133+
zygote = line_split[9]
1134+
zygote = zygote.split(":")[0]
1135+
for z in zygote.replace("|", "/").split("/"):
1136+
if z == ".":
1137+
# missing genotype
1138+
genotype = np.nan
1139+
break
1140+
z = int(z)
1141+
if z > len(ref_alt):
1142+
# invalid genotype number
1143+
genotype = np.nan
1144+
break
1145+
elif ref_alt[z] == ".":
1146+
# missing genotype in ref or alt
1147+
genotype = np.nan
1148+
break
1149+
else:
1150+
genotype = genotype + ref_alt[z]
11451151

11461152
if "/" in zygote and pd.notna(genotype):
11471153
phased = False

0 commit comments

Comments
 (0)