|
10 | 10 |
|
11 | 11 | from boostdm import BoostDMError
|
12 | 12 | from boostdm.annotations.utils import encode_consequence_type, rectify_synonymous, rectify_missense, rectify_splicing
|
13 |
| -from boostdm.globals import CANONICAL_TRANSCRIPTS_FILE, MNVS_FILE, COHORTS_PATH, DRIVERS_PATH |
| 13 | +from boostdm.globals import MANE_TRANSCRIPTS_FILE, MNVS_FILE, COHORTS_PATH, DRIVERS_PATH |
14 | 14 | from boostdm.oncotree import Oncotree
|
15 | 15 | from boostdm.features import phylop, consequence_type, aachange, exon, ptms, clustl, hotmaps, smregions, dndscv
|
16 | 16 | from boostdm.passengers import retrieve_exons, randomize
|
@@ -68,7 +68,7 @@ def retrieve_expectation(exp_dict, v):
|
68 | 68 | def set_string_chr(row):
|
69 | 69 | try:
|
70 | 70 | return str(int(row["chr"]))
|
71 |
| - except: |
| 71 | + except ValueError: |
72 | 72 | return str(row["chr"])
|
73 | 73 |
|
74 | 74 |
|
@@ -149,16 +149,16 @@ def mnvs_to_remove():
|
149 | 149 |
|
150 | 150 | def retrieve_transcript():
|
151 | 151 |
|
152 |
| - """Returns dataframe with canonical transcript regions""" |
| 152 | + """Returns dataframe with mane transcript regions (cds + 25bp for splicing)""" |
153 | 153 |
|
154 |
| - canonical_transcript_df = pd.read_csv(CANONICAL_TRANSCRIPTS_FILE, |
| 154 | + mane_transcript_df = pd.read_csv(MANE_TRANSCRIPTS_FILE, |
155 | 155 | sep='\t', header=None, compression='gzip', low_memory=False, skiprows=1)
|
156 | 156 |
|
157 | 157 | # TODO: verify the columns we are selecting are the right ones
|
158 | 158 |
|
159 |
| - canonical_transcript_df = canonical_transcript_df[[0, 1, 2, 6]].copy() |
160 |
| - canonical_transcript_df.columns = ['chr', 'start', 'end', 'gene'] |
161 |
| - return canonical_transcript_df |
| 159 | + mane_transcript_df = mane_transcript_df[[0, 1, 2, 6]].copy() |
| 160 | + mane_transcript_df.columns = ['chr', 'start', 'end', 'gene'] |
| 161 | + return mane_transcript_df |
162 | 162 |
|
163 | 163 |
|
164 | 164 | def intersect_region_mutations(cds, pos):
|
@@ -228,8 +228,8 @@ def initialize_trainset(df, drivers):
|
228 | 228 |
|
229 | 229 | def build_positive_set(df_expect):
|
230 | 230 |
|
231 |
| - canonical_transcript = retrieve_transcript() |
232 |
| - pos = intersect_region_mutations(canonical_transcript, df_expect) |
| 231 | + mane_transcript = retrieve_transcript() |
| 232 | + pos = intersect_region_mutations(mane_transcript, df_expect) |
233 | 233 | pos['response'] = 1
|
234 | 234 | return pos
|
235 | 235 |
|
|
0 commit comments