-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
234 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
#make bash file executable | ||
#compile c++ code | ||
CXX = g++ | ||
CXXFLAGS= -Wformat -O3 | ||
|
||
TRate: | ||
chmod +x TRate.sh | ||
|
||
$(CXX) -o $@ $(CXXFLAGS) TR.v1.cpp | ||
|
||
clean: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
# _TRate_ | ||
The _TRate_ program computes "rate" of each transcript according to given coverage file. Transcripts are coded by coordinates of their exons (bed file for now, gtf/gff in future). Rate is computed as total _mass_ of exons within the transcript divided by total _length_ of exons. _Mass_ is taken as approximation of the area under coverage curve, i.e. sum of areas of coverage rectangles, and length is computed as sum of lengths of bedgraph intervals within exons. | ||
|
||
The _TRate_ program takes in two arguments in fixed order. | ||
|
||
1. Exons_file - coordinate sorted bed file that provides locations of exons for the corresponding transcript provided in column 4. | ||
|
||
Exons_file format example | ||
|
||
C0000570 10420 10640 Transcript1 | ||
C0000570 128078 128167 Transcript2 | ||
C0000570 128290 128405 Transcript2 | ||
C0000571 72845 73133 Transcript3 | ||
C0000571 73211 73274 Transcript3 | ||
|
||
2. Coverage_file - coordinate sorted file in bedgraph format - it can contain coverage data (usually normalized) from RNAseq study, ChIPseq, ATACseq and so on. | ||
|
||
Coverage_file format example | ||
|
||
C0000570 10481 10549 0.310587 | ||
C0000570 10579 10610 0.41057 | ||
C0000570 128288 128293 1.105 | ||
|
||
## _USAGE_ | ||
### Prerequisites (MUST be in your PATH) | ||
BEDTOOLS | ||
AWK | ||
g++ | ||
|
||
### Installation | ||
Download TRate | ||
|
||
cd TRate | ||
|
||
make | ||
|
||
In file Trate.sh edit path to TRate folder, e.g. | ||
|
||
FOLDER_PATH="your/path/TRate" | ||
|
||
### Run TRate on test data | ||
|
||
./TRate.sh ./data/Exons_file.sbed ./data/Coverage_file.bg | ||
|
||
Output will be in a file Coverage_file.rate. | ||
|
||
Output format | ||
|
||
Transcript1 0.341895 | ||
Transcript2 1.98961 | ||
Transcript3 -1 | ||
|
||
Transcript rate = -1 if no coverage data were found for this transcript. | ||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
|
||
/* Changed command line processing interface compared to v2 */ | ||
|
||
#include <unistd.h> | ||
#include <stdio.h> | ||
|
||
#include <fstream> | ||
#include <string> | ||
#include <string.h> | ||
#include <iostream> | ||
#include <sstream> | ||
#include <stdlib.h> | ||
|
||
using namespace std; | ||
|
||
int main( int argc , char** argv ) { | ||
|
||
|
||
char fName[1000]="", fff[1000]=""; | ||
|
||
//printf ("optind=%d argv[optind]=%s strlen(argv[optind])=%d \n",optind,argv[optind],strlen(argv[optind])); | ||
if (optind >= argc) { | ||
fprintf(stderr, "Expected *.area.bed file\n"); | ||
fprintf(stderr, "Usage: *.area.bed\n"); | ||
exit(EXIT_FAILURE); | ||
} | ||
|
||
//strncat(fname,"",0); | ||
strncat(fName,argv[optind],strlen(argv[optind])); | ||
//printf ("fName=%s\n",fName); | ||
|
||
|
||
std::ifstream area_file( fName ) ; | ||
if (!area_file) { | ||
fprintf(stderr, "Can't open input file %s\n",fName); | ||
exit(EXIT_FAILURE); | ||
} | ||
|
||
std::string line; | ||
string trans="", trans_pred="", areas, widths; | ||
int linecount=0; | ||
float rate, area, width, area_sum=0, width_sum=0; | ||
|
||
sprintf(fff, "%ste", fName); | ||
std::ofstream outfile(fff); | ||
printf("out=%s",fff); | ||
|
||
while ( getline( area_file , line ) ) { | ||
//std::cout << linecount << ": " << line << '\n' ;//supposing '\n' to be line end | ||
//linecount++ ; | ||
stringstream ss(line); | ||
getline(ss,trans, '\t'); | ||
// cout << "Transcript: " << trans << " trans_pred: "<<trans_pred<<endl; | ||
getline(ss,areas, '\t'); | ||
area=atof(areas.c_str()); | ||
// cout << "area: " << area <<endl; | ||
getline(ss,widths, '\t'); | ||
width=atof(widths.c_str()); | ||
// cout << "width: " << width <<endl; | ||
|
||
if(trans_pred != trans) | ||
{ | ||
if(width_sum>0) rate=area_sum/width_sum; | ||
else rate=-1; | ||
|
||
if(trans_pred!="") outfile << trans_pred << "\t" << rate << endl; | ||
|
||
area_sum=area; | ||
width_sum=width; | ||
} | ||
else | ||
{ area_sum+=area; | ||
width_sum+=width; | ||
} | ||
|
||
trans_pred=trans; | ||
} | ||
|
||
//process last record | ||
if(width_sum>0) rate=area_sum/width_sum; | ||
else rate=-1; | ||
|
||
outfile << trans_pred << "\t" << rate << endl; | ||
|
||
outfile.close(); | ||
area_file.close(); | ||
cout << endl << "END"; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
#!/bin/bash | ||
|
||
|
||
FOLDER_PATH="/home/newnataliya/From_timnatuk/APPS/APPS_NT/TRate" | ||
#Transcript_rate version 1 | ||
|
||
# Bedtools should be installed in your Path! | ||
|
||
#This program takes in two arguments in fixed order: | ||
# Exons file - sorted by coordinates bed file with location of exons belonging to the transcript provided in column 4 | ||
#format example | ||
# C0000568 109816 110194 AMEX60DDU001000001.1.bed | ||
# C0000570 39532 39624 AMEX60DDU001000002.1.bed | ||
# C0000570 39699 39934 AMEX60DDU001000002.1.bed | ||
# Coverage file in sorted bedgraph format - it can contain coverage data from RNAseq study, ChIPseq, ATACseq and so on | ||
# format example | ||
# C0000568 129865 129896 0.073524 | ||
# C0000568 129896 129965 0.036762 | ||
# C0000570 128344 128444 0.036762 | ||
# This program computes outputs "rate" of each transcript in exons file. Rate is computed as total mass of exons within the transcript divided by total length of these exons, mass is taken as approximation of the area under coverage curve, i.e. sum of areas of coverage rectangles. | ||
|
||
#$1 EXONS_by_transcripts.sbed | ||
#$2 SRR2885267.sam.bam.norm0.036762.bg.s | ||
Exons_file=$1 | ||
Cov_file=$2 | ||
|
||
if [ ! -s "$Exons_file" ]; then | ||
echo "Exons file $Exons_file is empty or doesn't exist, exit now"; | ||
exit; | ||
fi | ||
|
||
if [ ! -s "$Cov_file" ]; then | ||
echo "Coverage file $Cov_file is empty or doesn't exist, exit now"; | ||
exit; | ||
fi | ||
|
||
if [ ! -s "EXONS_by_transcripts.tmp.sbed" ]; then | ||
cp $Exons_file EXONS_by_transcripts.tmp.sbed; | ||
fi | ||
|
||
Name1=$(basename $Cov_file) | ||
#echo "Name1="$Name1 | ||
|
||
#echo "Cov_file="$Cov_file | ||
|
||
bedtools intersect -sorted -wao -a EXONS_by_transcripts.tmp.sbed -b stdin < $Cov_file | awk '{print $4"\t"$8*$9"\t"$9}' | sort -k1,1 > $Name1".ra" | ||
|
||
$FOLDER_PATH/TRate $Name1".ra" > $Name1".rate.log" | ||
|
||
echo "File $Cov_file processed, result is in "$Name1".rate" | ||
echo "Removing intermediate files"; | ||
rm $Name1".ra" | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
C0000570 10481 10549 0.310587 | ||
C0000570 10579 10610 0.41057 | ||
C0000570 128288 128293 1.105 | ||
C0000570 128293 128294 2.031761 | ||
C0000570 128294 128306 2.042348 | ||
C0000570 128306 128316 2.352935 | ||
C0000570 128316 128317 2.074109 | ||
C0000570 128317 128322 2.084696 | ||
C0000570 128322 128353 2.095283 | ||
C0000570 128353 128388 2.10587 | ||
C0000570 128388 128393 2.0983 | ||
C0000570 128393 128394 2.074109 | ||
C0000570 128394 128406 1.063522 | ||
C0000570 128406 128416 1.052935 | ||
C0000570 128416 128417 1.031761 | ||
C0000570 128417 128420 1.021174 | ||
C0000570 128420 128452 1.010587 | ||
C0000571 33336 33436 5.021174 | ||
C0000571 33437 33440 5.1234 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
C0000570 10420 10640 Transcript1 | ||
C0000570 128078 128167 Transcript2 | ||
C0000570 128290 128405 Transcript2 | ||
C0000571 72845 73133 Transcript3 | ||
C0000571 73211 73274 Transcript3 |