diff --git a/README.md b/README.md index db652b8..9b3a75e 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ Tracks are specified as GTF or BED and are annotated with name, coordinates, and Coordinate-based information can be provided as CSV/TSV (point-based) or BEDGRAPH (interval-based). The user can customize the y axis with tick precision and scientific notation. The user can specify the line color, alpha value, and choose whether to fill in the area under the curve. -Headers are not supported for BED files. BED files must have at least three columns, which should be `chrom`, `start`, and `end`. +If BED file contains a header, the line must begin with `#`. BED files must have at least three columns, which should be `chrom`, `start`, and `end`. Plots can be output as HTML, PNG, or SVG. @@ -76,7 +76,7 @@ https://home.chpc.utah.edu/~u6038618/transcriptionary/plot.html - `format`: file format of variant file; 'vcf' - `filepath`: path to VCF - `chrom`: chromosome -- `info_annotations`: VCF only; INFO fields to add to hover boxes +- `info_annotations`: for VCF, INFO fields to add to hover boxes - `` - `vep`: VCF only; leave empty if not VEP annotated - `field_name`: name of INFO field with VEP string (e.g. vep, ann, csq) @@ -89,12 +89,14 @@ https://home.chpc.utah.edu/~u6038618/transcriptionary/plot.html - `MED`: - `HIGH`: -`#BED (no header)` +`#BED (if header in file, it must start with #)` ``: variant set label - `format`: file format of variant file; 'vcf' - `filepath`: path to VCF - `chrom`: chromosome - `header`: BED only; list of column names in BED file +- `info_annotations`: for BED, column names to add to hover boxes + - `` - `color`: default lollipop color; use hex codes or predefined colors from default_colors/named_colors.yaml - `variant_severity_colors`: specify lollipop colors by variant severity; use hex codes or predefined colors from default_colors/named_colors.yaml - `LOW`: diff --git a/transcriptionary/get_coords.py b/transcriptionary/get_coords.py index 8a4fea7..ec91cf8 100755 --- a/transcriptionary/get_coords.py +++ b/transcriptionary/get_coords.py @@ -88,14 +88,17 @@ def get_variants_bed(bed_path): if fi.readline().count('\t') != len(variant_params[variant_set]['header']) - 1: print('Warning: number of fields in variant set {} header argument not equal to number of fields in file {} (check for trailing delimiters).'.format(variant_set, bed_path)) - df = pd.read_csv(bed_path, names=variant_params[variant_set]['header'], sep='\t', index_col=None) + df = pd.read_csv(bed_path, names=variant_params[variant_set]['header'], sep='\t', index_col=None, keep_default_na=False, comment='#') variant_params[variant_set]['has_yaxis_info'] = False + if variant_params[variant_set]['consequence_idx']: plot_params['add_variant_severity_checkbox'] = True for _,row in df.iterrows(): if str(row.iloc[0]) != str(variant_params[variant_set]['chrom']): continue di_variant = dict(pos=row.iloc[1], compact_start=-1, variant_set=variant_set, info_annotations=variant_params[variant_set]['info_annotations'], vep=variant_params[variant_set]['vep'], allele_count=0, allele_frequency=0, allele_number=0) + + for info_field in variant_params[variant_set]['info_annotations']: di_variant[info_field] = row[info_field] + if variant_params[variant_set]['consequence_idx']: #0-based - plot_params['add_variant_severity_checkbox'] = True for transcript_ID in transcript_IDs: di_variant[transcript_ID + '_severity'] = VEP(row.iloc[variant_params[variant_set]['consequence_idx']],['Consequence']).impact_severity else: diff --git a/transcriptionary/transcriptionary.py b/transcriptionary/transcriptionary.py index 0249dec..6880c6a 100755 --- a/transcriptionary/transcriptionary.py +++ b/transcriptionary/transcriptionary.py @@ -2,7 +2,7 @@ from .get_coords import get_variants,get_line,get_track from .colors import color_boxes from .axes import add_user_axis,add_variant_axis -from .glyphs import add_intron_glyph, add_exon_glyph, add_variant_glyph, add_UTR_glyph, add_track_glyph, add_multi_line_glyph, flatten +from .glyphs import add_intron_glyph, add_exon_glyph, add_variant_glyph, add_UTR_glyph, add_track_glyph, add_multi_line_glyph from .widget_callbacks import add_checkbox,add_variant_severity_checkbox,add_user_tracks_checkbox,add_user_lines_checkbox,add_smoothing_slider,add_legend,add_linear_log_scale,add_exon_zoom,add_variant_sets_checkbox from . import project_coords import numpy as np @@ -55,7 +55,7 @@ def plot_transcript(plot_params, variant_params, user_track_params, user_line_pa if plot_params['add_variant_axis']: def log10(f): return np.log10(f) if f > 0 else 0 - all_vars = flatten([variant_params[var_set]['variant_ls'] for var_set in variant_params]) + all_vars = sum([variant_params[var_set]['variant_ls'] for var_set in variant_params], []) #flatten list all_vars_in_transcript = [v for v in all_vars if v['compact_pos'] >= 0] #get list of all variants in transcript to get max and min for variant axes allele_counts = [v['allele_count'] for v in all_vars_in_transcript] allele_frequencies = [v['allele_frequency'] for v in all_vars_in_transcript] @@ -114,7 +114,7 @@ def log10(f): return np.log10(f) if f > 0 else 0 xs_ls,ys_ls = project_coords.map_line(user_lines[axis_name][line], transcript_dict['exons'], user_line_params[axis_name]['lines'][line]['chrom']) all_xs.append(xs_ls) all_ys.append(ys_ls) - try: y_max = max([i for s in [i for s in all_ys for i in s] for i in s]) #flatten 3D list to 1D list to take max #TODO change to flatten + try: y_max = max([i for s in [i for s in all_ys for i in s] for i in s]) #flatten 3D list to 1D list to take max except: continue for idx,line in enumerate(user_line_params[axis_name]['lines']): @@ -188,10 +188,19 @@ def get_color(color): return color if color[0] == "#" else named_colors[color] variant_params[variant_set]['info_annotations'] = [] except: variant_params[variant_set]['info_annotations'] = [] + try: #if consequence_idx nonexistent, + int(variant_params[variant_set]['consequence_idx']) + except: variant_params[variant_set]['consequence_idx'] = False + + #for BED files, replace space with underscore in annotation field names (space causes issues with hover box) + try: + variant_params[variant_set]['header'] = list(map(lambda field_name: str.replace(field_name, ' ', '_'), variant_params[variant_set]['header'])) + variant_params[variant_set]['info_annotations'] = list(map(lambda field_name: str.replace(field_name, ' ', '_'), variant_params[variant_set]['info_annotations'])) + except: pass + try: #if vep empty or nonexistent, set to empty params if not variant_params[variant_set]['vep']: variant_params[variant_set]['vep'] = {'field_name': '' ,'vep_fields': [], 'annotate_severity_by': ''} except: variant_params[variant_set]['vep'] = {'field_name': '' ,'vep_fields': [], 'annotate_severity_by': ''} - # except: variant_params[variant_set]['vep'] = {'vep_fields': []} ### TRACKS ### for track_name in user_track_params: @@ -249,7 +258,6 @@ def transcriptionary(): for line_name in user_line_params: line_axes[line_name] = [] - #for idx,ID in enumerate(transcript_IDs): for ID in transcript_IDs: title = 'gene={}; transcript={}/{}'.format(plot_params['gene_name'], ID, transcripts[ID]['ID']) if transcripts[ID]['direction']: title += ' ({})'.format(transcripts[ID]['direction'])