Skip to content

Commit

Permalink
support info_annotations for BED variant files
Browse files Browse the repository at this point in the history
  • Loading branch information
slulla committed Sep 18, 2023
1 parent 45a1c80 commit aac4a6d
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 10 deletions.
8 changes: 5 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Tracks are specified as GTF or BED and are annotated with name, coordinates, and

Coordinate-based information can be provided as CSV/TSV (point-based) or BEDGRAPH (interval-based). The user can customize the y axis with tick precision and scientific notation. The user can specify the line color, alpha value, and choose whether to fill in the area under the curve.

Headers are not supported for BED files. BED files must have at least three columns, which should be `chrom`, `start`, and `end`.
If BED file contains a header, the line must begin with `#`. BED files must have at least three columns, which should be `chrom`, `start`, and `end`.

Plots can be output as HTML, PNG, or SVG.

Expand Down Expand Up @@ -76,7 +76,7 @@ https://home.chpc.utah.edu/~u6038618/transcriptionary/plot.html
- `format`: file format of variant file; 'vcf'
- `filepath`: path to VCF
- `chrom`: chromosome
- `info_annotations`: VCF only; INFO fields to add to hover boxes
- `info_annotations`: for VCF, INFO fields to add to hover boxes
- `<info_field_1>`
- `vep`: VCF only; leave empty if not VEP annotated
- `field_name`: name of INFO field with VEP string (e.g. vep, ann, csq)
Expand All @@ -89,12 +89,14 @@ https://home.chpc.utah.edu/~u6038618/transcriptionary/plot.html
- `MED`:
- `HIGH`:

`#BED (no header)`
`#BED (if header in file, it must start with #)`
`<variant_set>`: variant set label
- `format`: file format of variant file; 'vcf'
- `filepath`: path to VCF
- `chrom`: chromosome
- `header`: BED only; list of column names in BED file
- `info_annotations`: for BED, column names to add to hover boxes
- `<info_field_1>`
- `color`: default lollipop color; use hex codes or predefined colors from default_colors/named_colors.yaml
- `variant_severity_colors`: specify lollipop colors by variant severity; use hex codes or predefined colors from default_colors/named_colors.yaml
- `LOW`:
Expand Down
7 changes: 5 additions & 2 deletions transcriptionary/get_coords.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,14 +88,17 @@ def get_variants_bed(bed_path):
if fi.readline().count('\t') != len(variant_params[variant_set]['header']) - 1:
print('Warning: number of fields in variant set {} header argument not equal to number of fields in file {} (check for trailing delimiters).'.format(variant_set, bed_path))

df = pd.read_csv(bed_path, names=variant_params[variant_set]['header'], sep='\t', index_col=None)
df = pd.read_csv(bed_path, names=variant_params[variant_set]['header'], sep='\t', index_col=None, keep_default_na=False, comment='#')
variant_params[variant_set]['has_yaxis_info'] = False
if variant_params[variant_set]['consequence_idx']: plot_params['add_variant_severity_checkbox'] = True

for _,row in df.iterrows():
if str(row.iloc[0]) != str(variant_params[variant_set]['chrom']): continue
di_variant = dict(pos=row.iloc[1], compact_start=-1, variant_set=variant_set, info_annotations=variant_params[variant_set]['info_annotations'], vep=variant_params[variant_set]['vep'], allele_count=0, allele_frequency=0, allele_number=0)

for info_field in variant_params[variant_set]['info_annotations']: di_variant[info_field] = row[info_field]

if variant_params[variant_set]['consequence_idx']: #0-based
plot_params['add_variant_severity_checkbox'] = True
for transcript_ID in transcript_IDs:
di_variant[transcript_ID + '_severity'] = VEP(row.iloc[variant_params[variant_set]['consequence_idx']],['Consequence']).impact_severity
else:
Expand Down
18 changes: 13 additions & 5 deletions transcriptionary/transcriptionary.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from .get_coords import get_variants,get_line,get_track
from .colors import color_boxes
from .axes import add_user_axis,add_variant_axis
from .glyphs import add_intron_glyph, add_exon_glyph, add_variant_glyph, add_UTR_glyph, add_track_glyph, add_multi_line_glyph, flatten
from .glyphs import add_intron_glyph, add_exon_glyph, add_variant_glyph, add_UTR_glyph, add_track_glyph, add_multi_line_glyph
from .widget_callbacks import add_checkbox,add_variant_severity_checkbox,add_user_tracks_checkbox,add_user_lines_checkbox,add_smoothing_slider,add_legend,add_linear_log_scale,add_exon_zoom,add_variant_sets_checkbox
from . import project_coords
import numpy as np
Expand Down Expand Up @@ -55,7 +55,7 @@ def plot_transcript(plot_params, variant_params, user_track_params, user_line_pa
if plot_params['add_variant_axis']:
def log10(f): return np.log10(f) if f > 0 else 0

all_vars = flatten([variant_params[var_set]['variant_ls'] for var_set in variant_params])
all_vars = sum([variant_params[var_set]['variant_ls'] for var_set in variant_params], []) #flatten list
all_vars_in_transcript = [v for v in all_vars if v['compact_pos'] >= 0] #get list of all variants in transcript to get max and min for variant axes
allele_counts = [v['allele_count'] for v in all_vars_in_transcript]
allele_frequencies = [v['allele_frequency'] for v in all_vars_in_transcript]
Expand Down Expand Up @@ -114,7 +114,7 @@ def log10(f): return np.log10(f) if f > 0 else 0
xs_ls,ys_ls = project_coords.map_line(user_lines[axis_name][line], transcript_dict['exons'], user_line_params[axis_name]['lines'][line]['chrom'])
all_xs.append(xs_ls)
all_ys.append(ys_ls)
try: y_max = max([i for s in [i for s in all_ys for i in s] for i in s]) #flatten 3D list to 1D list to take max #TODO change to flatten
try: y_max = max([i for s in [i for s in all_ys for i in s] for i in s]) #flatten 3D list to 1D list to take max
except: continue

for idx,line in enumerate(user_line_params[axis_name]['lines']):
Expand Down Expand Up @@ -188,10 +188,19 @@ def get_color(color): return color if color[0] == "#" else named_colors[color]
variant_params[variant_set]['info_annotations'] = []
except: variant_params[variant_set]['info_annotations'] = []

try: #if consequence_idx nonexistent,
int(variant_params[variant_set]['consequence_idx'])
except: variant_params[variant_set]['consequence_idx'] = False

#for BED files, replace space with underscore in annotation field names (space causes issues with hover box)
try:
variant_params[variant_set]['header'] = list(map(lambda field_name: str.replace(field_name, ' ', '_'), variant_params[variant_set]['header']))
variant_params[variant_set]['info_annotations'] = list(map(lambda field_name: str.replace(field_name, ' ', '_'), variant_params[variant_set]['info_annotations']))
except: pass

try: #if vep empty or nonexistent, set to empty params
if not variant_params[variant_set]['vep']: variant_params[variant_set]['vep'] = {'field_name': '' ,'vep_fields': [], 'annotate_severity_by': ''}
except: variant_params[variant_set]['vep'] = {'field_name': '' ,'vep_fields': [], 'annotate_severity_by': ''}
# except: variant_params[variant_set]['vep'] = {'vep_fields': []}

### TRACKS ###
for track_name in user_track_params:
Expand Down Expand Up @@ -249,7 +258,6 @@ def transcriptionary():
for line_name in user_line_params:
line_axes[line_name] = []

#for idx,ID in enumerate(transcript_IDs):
for ID in transcript_IDs:
title = 'gene={}; transcript={}/{}'.format(plot_params['gene_name'], ID, transcripts[ID]['ID'])
if transcripts[ID]['direction']: title += ' ({})'.format(transcripts[ID]['direction'])
Expand Down

0 comments on commit aac4a6d

Please sign in to comment.