Skip to content

Commit b71f976

Browse files
add: implement index by column name
1 parent 1f82866 commit b71f976

File tree

4 files changed

+23
-16
lines changed

4 files changed

+23
-16
lines changed

containers_build/boostdm/features/aachange.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,12 @@
1010
def get_aachange(chr_, pos, alt, gene, reader):
1111

1212
for data in reader.get(chr_, pos, pos):
13-
alt_vep = (data[3] == alt)
14-
mane_vep = (data[-5] != '-') # impose MANE transcript
15-
correct_gene = (data[-9] == gene) # skip cases with antisense overlapping gene (gene is gene_symbol)
13+
alt_vep = (data['ALT'] == alt)
14+
mane_vep = (data['MANE_SELECT'] != '-') # impose MANE transcript
15+
correct_gene = (data['SYMBOL'] == gene) # skip cases with antisense overlapping gene (gene is gene_symbol)
1616
if alt_vep and mane_vep and correct_gene:
17-
aas = data[11] # [11] -> amino-acids involved in change ("I/T")
18-
aa_pos = data[10] # [10] -> amino-acid position
17+
aas = data['AA'] # [11] -> amino-acids involved in change ("I/T")
18+
aa_pos = data['PROT_POS'] # [10] -> amino-acid position
1919
if '/' in aas:
2020
aa_ref, aa_alt = tuple(aas.split('/'))
2121
return aa_ref + aa_pos + aa_alt

containers_build/boostdm/features/consequence_type.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,11 @@ def get_csqn_type(chr_, pos, alt, gene, reader):
99

1010
for data in reader.get(chr_, pos, pos):
1111

12-
alt_vep = (data[3] == alt) # same alternate allele
13-
mane_vep = (data[-5] != '-') # impose mane transcript
14-
correct_gene = (data[-9] == gene) # skip cases with antisense overlapping genes
12+
alt_vep = (data['ALT'] == alt) # same alternate allele
13+
mane_vep = (data["MANE_SELECT"] != '-') # impose mane transcript
14+
correct_gene = (data["SYMBOL"] == gene) # skip cases with antisense overlapping genes
1515
if alt_vep and mane_vep and correct_gene:
16-
csqn = CONSEQUENCES_LIST[min([CONSEQUENCES_DICT[c] for c in data[7].split(',')])]
16+
csqn = CONSEQUENCES_LIST[min([CONSEQUENCES_DICT[c] for c in data["CNSQ"].split(',')])]
1717
return AGGREGATION_DICT.get(csqn, None)
1818

1919
return None
@@ -23,5 +23,5 @@ def add_feature(df):
2323

2424
with Tabix(TABIX_FILE) as reader:
2525
get_from_reader = partial(get_csqn_type, reader=reader)
26-
df['csqn_type'] = df.apply(lambda row: get_from_reader(str(row['chr']), int(row['pos']), row['alt'], row['gene']), axis=1)
26+
df['csqn_type'] = df.apply(lambda row: get_from_reader((str(row['chr']), int(row['pos']), row['alt'], row['gene'])), axis=1)
2727
return df

containers_build/boostdm/features/exon.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,11 @@ def nmd_rule(exon, total_exons):
2626
def get_exon(chr_, pos, alt,gene, reader):
2727

2828
for data in reader.get(chr_, pos, pos):
29-
alt_vep = (data[3] == alt)
30-
mane_vep = (data[-5] != '-') # impose mane transcript
31-
correct_gene = (data[-9] == gene) # skip cases with antisense overlapping gene
29+
alt_vep = (data["ALT"] == alt)
30+
mane_vep = (data["MANE_SELECT"] != '-') # impose mane transcript
31+
correct_gene = (data["SYMBOL"] == gene) # skip cases with antisense overlapping gene
3232
if alt_vep and mane_vep and correct_gene:
33-
exons = data[-2]
33+
exons = data["EXON"]
3434
if '/' in exons:
3535
exon, total_exons = tuple(exons.split('/'))
3636
else:

containers_build/boostdm/vepreader.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,12 @@
55
GENOME_SEQUENCE_MAPS.update({'chrX': 'X', '23': 'X', 'chr23': 'X', 'chrY': 'Y', '24': 'Y', 'chr24': 'Y'})
66
GENOME_SEQUENCE_MAPS.update({'chrM': 'M', 'MT': 'M', 'chrMT': 'M'})
77

8+
HEADER = [
9+
'CHR', 'POS', 'REF', 'ALT', 'GENE','ENST','TYPE','CNSQ','cDNA_POS',
10+
'CDS_POS', 'PROT_POS','AA','CODONS','EXISTING_VARIATION','IMPACT','DISTANCE','STRAND','FLAGS','SYMBOL',
11+
'SYMBOL_SOURCE','HGNC_ID','CANONICAL','MANE_SELECT','MANE_PLUS_CLINICAL','ENSP','EXON','INTRON'
12+
]
13+
814

915
class Tabix:
1016

@@ -22,6 +28,7 @@ def __exit__(self, exc_type, exc_val, exc_tb):
2228

2329
def get(self, chromosome, start, stop):
2430
chr_ = self.map.get(chromosome, chromosome)
25-
for row in self.tb.query("{}".format(chr_), start, stop):
26-
yield row
31+
for row in self.tb.query('{}'.format(chr_), start, stop):
32+
row_dict = dict(zip(HEADER, row))
33+
yield row_dict
2734

0 commit comments

Comments
 (0)