Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
YichaoOU committed Jul 10, 2020
1 parent e719dcb commit a3f21d5
Show file tree
Hide file tree
Showing 6 changed files with 172 additions and 172 deletions.
14 changes: 9 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@ The most easiest way to install Easy-Prime is via conda.

```
conda create -n genome_editing -c liyc1989 easy_prime
conda create -n genome_editing -c cheng_lab easy_prime
source activate genome_editing
easy_prime -h
easy_prime_vis -h
```

# Usage
Expand All @@ -46,7 +48,9 @@ easy_prime -c config.yaml -f test.vcf
```

Easy-Prime also provides a dash application.
Easy-Prime also provides a dash application. [NOT WORKING]

Please have dash installed before running the dash application.

```
Expand Down Expand Up @@ -125,9 +129,9 @@ easy_prime_vis -f topX_pegRNAs.csv -s /path/to/genome_fasta.fa

This will output pdf files to a result dir.

[version-shield]: https://img.shields.io/conda/v/liyc1989/easy_prime.svg
[version-url]: https://anaconda.org/liyc1989/easy_prime
[version-shield]: https://img.shields.io/conda/v/cheng_lab/easy_prime.svg
[version-url]: https://anaconda.org/cheng_lab/easy_prime
[python-shield]: https://img.shields.io/pypi/pyversions/easy_prime.svg
[python-url]: https://pypi.python.org/pypi/easy_prime
[platform-shield]: https://anaconda.org/liyc1989/easy_prime/badges/platforms.svg
[platform-shield]: https://anaconda.org/cheng_lab/easy_prime/badges/platforms.svg

306 changes: 153 additions & 153 deletions bin/easy_prime_vis
Original file line number Diff line number Diff line change
@@ -1,153 +1,153 @@
#!/usr/bin/env python
#-*- coding: utf-8 -*-
import warnings
warnings.filterwarnings("ignore")
import sys
import argparse
import datetime
import getpass
import os
from dna_features_viewer import GraphicFeature, GraphicRecord
import pandas as pd
import uuid
import subprocess
import matplotlib.pyplot as plt
npg_colors = ["#E64B35","#4DBBD5","#00A087","#3C5488","#F39B7F","#464d4f"]
my_colors = {}
my_colors['sgRNA'] = npg_colors[0]
my_colors['PBS'] = npg_colors[1]
my_colors['RTT'] = npg_colors[2]
my_colors['ngRNA'] = npg_colors[3]
my_colors['variant'] = "#e6fc3f"
"""
Output
--------
The output folder will contain:
1. all pegRNA + ngRNA combination for the input vcf file
2. top1 pegRNA + ngRNA combination for each variant
3. visualization of the top1s [TODO]
4. a summary file of each variant
"""

def my_args():
mainParser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter,description="easy_prime for pegRNA design")
username = getpass.getuser()

mainParser.add_argument('-f','--rawX', help="input rawX format, last column with predicted efficiency",required=True)
mainParser.add_argument('-s','--genome_fasta', help="A YAML file specifying parameters",required=True)
mainParser.add_argument('-o','--output', help="output dir",default="easy_prime_vis_%s_%s"%(username,str(datetime.date.today())))

##------- add parameters above ---------------------
args = mainParser.parse_args()
return args

def write_file(file_name,message):
out = open(file_name,"wt")
out.write(message)
out.close()
def get_fasta_single(chr,start,end,genome_fasta=None):
out_bed = str(uuid.uuid4()).split("-")[-1]
out_fa = str(uuid.uuid4()).split("-")[-1]
write_file(out_bed,"%s\t%s\t%s"%(chr,start,end))
command = "bedtools getfasta -fi %s -bed %s -fo %s -tab"%(genome_fasta,out_bed,out_fa)
subprocess.call(command,shell=True)
lines = open(out_fa).readlines()[0]
seq = lines.split()[-1]
subprocess.call("rm %s;rm %s"%(out_bed,out_fa),shell=True)
return seq
def get_strand(x):
if x == "+":
return 1
else:
return -1

def plot_main(df,output=None,genome_fasta=None,**kwargs):
"""Given one instance of easy-prime prediction (rawX format), generate DNA visualization
Input
--------
the data frame contains 4 rows: RTT, PBS, sgRNA, ngRNA
"""
pegRNA_id = df.index.tolist()[0]
variant_id = pegRNA_id.split("_")[0]
chr = df['CHROM'][0]
start = df['start'].min()
start -= start%10
start -= 1
end = df['end'].max()
end -= end%10
end += 10

variant_pos = df.POS.min()
ref = df.REF[0]
alt = df.ALT[0]
predicted_efficiency = df.predicted_efficiency[0]*100
pos = variant_pos-start
sequence = get_fasta_single(chr,start,end,genome_fasta).upper()

feature_list = []
for s,r in df.iterrows():
r_start = r.start-start
r_end = r_start+(r.end-r.start)
r_strand = get_strand(r.strand)
gf = GraphicFeature(start=r_start, end=r_end, strand=r_strand,
color=my_colors[r.type],label=r.type)
feature_list.append(gf)
record = GraphicRecord(sequence=sequence, features=feature_list)

ax, _ = record.plot(figure_width=int(len(sequence)/5))
record.plot_sequence(ax)
ax.fill_between((pos-1.5, pos-0.5), +1000, -1000, alpha=0.5,color=my_colors['variant'])
locs, labels = plt.xticks()
new_labels = []
flag = True
for i in locs:
if flag:
new_labels.append("%s %s"%(chr,int(start+i+1)))
flag=False
else:
new_labels.append(int(start+i+1))
plt.xticks(locs,new_labels)
plt.title("ID: %s, CHR: %s, POS: %s, REF: %s, ALT: %s \n Predicted efficiency: %.1f"%(variant_id,chr,variant_pos,ref,alt,predicted_efficiency)+"%")
ax.figure.savefig(f'{output}/{pegRNA_id}.pdf', bbox_inches='tight')


def main():

args = my_args()
if not os.path.isfile(args.genome_fasta):
print (f"genome fasta NOT FOUND: {args.genome_fasta}")
df = pd.read_csv(args.rawX,index_col=0)
subprocess.call(f"mkdir -p {args.output}",shell=True)
for i in df.index.unique().tolist():
print (f"Processing {i}...")
plot_main(df.loc[i],**vars(args))


if __name__ == "__main__":
main()




















#!/usr/bin/env python
#-*- coding: utf-8 -*-
import warnings
warnings.filterwarnings("ignore")
import sys
import argparse
import datetime
import getpass
import os
from dna_features_viewer import GraphicFeature, GraphicRecord
import pandas as pd
import uuid
import subprocess
import matplotlib.pyplot as plt
npg_colors = ["#E64B35","#4DBBD5","#00A087","#3C5488","#F39B7F","#464d4f"]
my_colors = {}
my_colors['sgRNA'] = npg_colors[0]
my_colors['PBS'] = npg_colors[1]
my_colors['RTT'] = npg_colors[2]
my_colors['ngRNA'] = npg_colors[3]
my_colors['variant'] = "#e6fc3f"
"""
Output
--------
The output folder will contain:
1. all pegRNA + ngRNA combination for the input vcf file
2. top1 pegRNA + ngRNA combination for each variant
3. visualization of the top1s [TODO]
4. a summary file of each variant
"""

def my_args():
mainParser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter,description="easy_prime for pegRNA design")
username = getpass.getuser()

mainParser.add_argument('-f','--rawX', help="input rawX format, last column with predicted efficiency",required=True)
mainParser.add_argument('-s','--genome_fasta', help="A YAML file specifying parameters",required=True)
mainParser.add_argument('-o','--output', help="output dir",default="easy_prime_vis_%s_%s"%(username,str(datetime.date.today())))

##------- add parameters above ---------------------
args = mainParser.parse_args()
return args

def write_file(file_name,message):
out = open(file_name,"wt")
out.write(message)
out.close()
def get_fasta_single(chr,start,end,genome_fasta=None):
out_bed = str(uuid.uuid4()).split("-")[-1]
out_fa = str(uuid.uuid4()).split("-")[-1]
write_file(out_bed,"%s\t%s\t%s"%(chr,start,end))
command = "bedtools getfasta -fi %s -bed %s -fo %s -tab"%(genome_fasta,out_bed,out_fa)
subprocess.call(command,shell=True)
lines = open(out_fa).readlines()[0]
seq = lines.split()[-1]
subprocess.call("rm %s;rm %s"%(out_bed,out_fa),shell=True)
return seq
def get_strand(x):
if x == "+":
return 1
else:
return -1

def plot_main(df,output=None,genome_fasta=None,**kwargs):
"""Given one instance of easy-prime prediction (rawX format), generate DNA visualization
Input
--------
the data frame contains 4 rows: RTT, PBS, sgRNA, ngRNA
"""
pegRNA_id = df.index.tolist()[0]
variant_id = pegRNA_id.split("_")[0]
chr = df['CHROM'][0]
start = df['start'].min()
start -= start%10
start -= 1
end = df['end'].max()
end -= end%10
end += 10

variant_pos = df.POS.min()
ref = df.REF[0]
alt = df.ALT[0]
predicted_efficiency = df.predicted_efficiency[0]*100
pos = variant_pos-start
sequence = get_fasta_single(chr,start,end,genome_fasta).upper()

feature_list = []
for s,r in df.iterrows():
r_start = r.start-start
r_end = r_start+(r.end-r.start)
r_strand = get_strand(r.strand)
gf = GraphicFeature(start=r_start, end=r_end, strand=r_strand,
color=my_colors[r.type],label=r.type)
feature_list.append(gf)
record = GraphicRecord(sequence=sequence, features=feature_list)

ax, _ = record.plot(figure_width=int(len(sequence)/5))
record.plot_sequence(ax)
ax.fill_between((pos-1.5, pos-0.5), +1000, -1000, alpha=0.5,color=my_colors['variant'])
locs, labels = plt.xticks()
new_labels = []
flag = True
for i in locs:
if flag:
new_labels.append("%s %s"%(chr,int(start+i+1)))
flag=False
else:
new_labels.append(int(start+i+1))
plt.xticks(locs,new_labels)
plt.title("ID: %s, CHR: %s, POS: %s, REF: %s, ALT: %s \n Predicted efficiency: %.1f"%(variant_id,chr,variant_pos,ref,alt,predicted_efficiency)+"%")
ax.figure.savefig(f'{output}/{pegRNA_id}.pdf', bbox_inches='tight')


def main():

args = my_args()
if not os.path.isfile(args.genome_fasta):
print (f"genome fasta NOT FOUND: {args.genome_fasta}")
df = pd.read_csv(args.rawX,index_col=0)
subprocess.call(f"mkdir -p {args.output}",shell=True)
for i in df.index.unique().tolist():
print (f"Processing {i}...")
plot_main(df.loc[i],**vars(args))


if __name__ == "__main__":
main()





















11 changes: 7 additions & 4 deletions conda_build/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
{% set name = "easy_prime" %}
{% set version = "1.1.2" %}
{% set version = "1.1.3" %}

package:
name: "{{ name|lower }}"
version: "{{ version }}"

source:
url: https://pypi.io/packages/source/{{ name[0] }}/{{ name }}/{{ name }}-{{ version }}.tar.gz
sha256: d97c2f928ff8ef1bc0504706df6d515d2e40d58738310bfa0a7fceb7ea1a5bbb
sha256: 5a76ef10289cbffdb8bfffa43b396e46108be820fe5e4e03308efae269b37f92

build:
number: 0
Expand All @@ -30,9 +30,12 @@ requirements:
- scikit-bio
- biopython
- dna_features_viewer
- dash
- dash-bio
- dash-core-components
- jupyter_dashboards
- plotly



test:
imports:
- easy_prime
Expand Down
2 changes: 1 addition & 1 deletion easy_prime/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@
print (e)

__all__ = ["target_mutation"]
__version__ = "1.1.2"
__version__ = "1.1.3"
Loading

0 comments on commit a3f21d5

Please sign in to comment.