Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
timnat authored Jun 29, 2023
1 parent dc53505 commit 8603745
Show file tree
Hide file tree
Showing 6 changed files with 234 additions and 0 deletions.
11 changes: 11 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#make bash file executable
#compile c++ code
CXX = g++
CXXFLAGS= -Wformat -O3

TRate:
chmod +x TRate.sh

$(CXX) -o $@ $(CXXFLAGS) TR.v1.cpp

clean:
57 changes: 57 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# _TRate_
The _TRate_ program computes "rate" of each transcript according to given coverage file. Transcripts are coded by coordinates of their exons (bed file for now, gtf/gff in future). Rate is computed as total _mass_ of exons within the transcript divided by total _length_ of exons. _Mass_ is taken as approximation of the area under coverage curve, i.e. sum of areas of coverage rectangles, and length is computed as sum of lengths of bedgraph intervals within exons.

The _TRate_ program takes in two arguments in fixed order.

1. Exons_file - coordinate sorted bed file that provides locations of exons for the corresponding transcript provided in column 4.

Exons_file format example

C0000570 10420 10640 Transcript1
C0000570 128078 128167 Transcript2
C0000570 128290 128405 Transcript2
C0000571 72845 73133 Transcript3
C0000571 73211 73274 Transcript3

2. Coverage_file - coordinate sorted file in bedgraph format - it can contain coverage data (usually normalized) from RNAseq study, ChIPseq, ATACseq and so on.

Coverage_file format example

C0000570 10481 10549 0.310587
C0000570 10579 10610 0.41057
C0000570 128288 128293 1.105

## _USAGE_
### Prerequisites (MUST be in your PATH)
BEDTOOLS
AWK
g++

### Installation
Download TRate

cd TRate

make

In file Trate.sh edit path to TRate folder, e.g.

FOLDER_PATH="your/path/TRate"

### Run TRate on test data

./TRate.sh ./data/Exons_file.sbed ./data/Coverage_file.bg

Output will be in a file Coverage_file.rate.

Output format

Transcript1 0.341895
Transcript2 1.98961
Transcript3 -1

Transcript rate = -1 if no coverage data were found for this transcript.




88 changes: 88 additions & 0 deletions TR.v1.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@

/* Changed command line processing interface compared to v2 */

#include <unistd.h>
#include <stdio.h>

#include <fstream>
#include <string>
#include <string.h>
#include <iostream>
#include <sstream>
#include <stdlib.h>

using namespace std;

int main( int argc , char** argv ) {


char fName[1000]="", fff[1000]="";

//printf ("optind=%d argv[optind]=%s strlen(argv[optind])=%d \n",optind,argv[optind],strlen(argv[optind]));
if (optind >= argc) {
fprintf(stderr, "Expected *.area.bed file\n");
fprintf(stderr, "Usage: *.area.bed\n");
exit(EXIT_FAILURE);
}

//strncat(fname,"",0);
strncat(fName,argv[optind],strlen(argv[optind]));
//printf ("fName=%s\n",fName);


std::ifstream area_file( fName ) ;
if (!area_file) {
fprintf(stderr, "Can't open input file %s\n",fName);
exit(EXIT_FAILURE);
}

std::string line;
string trans="", trans_pred="", areas, widths;
int linecount=0;
float rate, area, width, area_sum=0, width_sum=0;

sprintf(fff, "%ste", fName);
std::ofstream outfile(fff);
printf("out=%s",fff);

while ( getline( area_file , line ) ) {
//std::cout << linecount << ": " << line << '\n' ;//supposing '\n' to be line end
//linecount++ ;
stringstream ss(line);
getline(ss,trans, '\t');
// cout << "Transcript: " << trans << " trans_pred: "<<trans_pred<<endl;
getline(ss,areas, '\t');
area=atof(areas.c_str());
// cout << "area: " << area <<endl;
getline(ss,widths, '\t');
width=atof(widths.c_str());
// cout << "width: " << width <<endl;

if(trans_pred != trans)
{
if(width_sum>0) rate=area_sum/width_sum;
else rate=-1;

if(trans_pred!="") outfile << trans_pred << "\t" << rate << endl;

area_sum=area;
width_sum=width;
}
else
{ area_sum+=area;
width_sum+=width;
}

trans_pred=trans;
}

//process last record
if(width_sum>0) rate=area_sum/width_sum;
else rate=-1;

outfile << trans_pred << "\t" << rate << endl;

outfile.close();
area_file.close();
cout << endl << "END";
}
54 changes: 54 additions & 0 deletions TRate.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!/bin/bash


FOLDER_PATH="/home/newnataliya/From_timnatuk/APPS/APPS_NT/TRate"
#Transcript_rate version 1

# Bedtools should be installed in your Path!

#This program takes in two arguments in fixed order:
# Exons file - sorted by coordinates bed file with location of exons belonging to the transcript provided in column 4
#format example
# C0000568 109816 110194 AMEX60DDU001000001.1.bed
# C0000570 39532 39624 AMEX60DDU001000002.1.bed
# C0000570 39699 39934 AMEX60DDU001000002.1.bed
# Coverage file in sorted bedgraph format - it can contain coverage data from RNAseq study, ChIPseq, ATACseq and so on
# format example
# C0000568 129865 129896 0.073524
# C0000568 129896 129965 0.036762
# C0000570 128344 128444 0.036762
# This program computes outputs "rate" of each transcript in exons file. Rate is computed as total mass of exons within the transcript divided by total length of these exons, mass is taken as approximation of the area under coverage curve, i.e. sum of areas of coverage rectangles.

#$1 EXONS_by_transcripts.sbed
#$2 SRR2885267.sam.bam.norm0.036762.bg.s
Exons_file=$1
Cov_file=$2

if [ ! -s "$Exons_file" ]; then
echo "Exons file $Exons_file is empty or doesn't exist, exit now";
exit;
fi

if [ ! -s "$Cov_file" ]; then
echo "Coverage file $Cov_file is empty or doesn't exist, exit now";
exit;
fi

if [ ! -s "EXONS_by_transcripts.tmp.sbed" ]; then
cp $Exons_file EXONS_by_transcripts.tmp.sbed;
fi

Name1=$(basename $Cov_file)
#echo "Name1="$Name1

#echo "Cov_file="$Cov_file

bedtools intersect -sorted -wao -a EXONS_by_transcripts.tmp.sbed -b stdin < $Cov_file | awk '{print $4"\t"$8*$9"\t"$9}' | sort -k1,1 > $Name1".ra"

$FOLDER_PATH/TRate $Name1".ra" > $Name1".rate.log"

echo "File $Cov_file processed, result is in "$Name1".rate"
echo "Removing intermediate files";
rm $Name1".ra"


19 changes: 19 additions & 0 deletions data/Coverage_file.bg
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
C0000570 10481 10549 0.310587
C0000570 10579 10610 0.41057
C0000570 128288 128293 1.105
C0000570 128293 128294 2.031761
C0000570 128294 128306 2.042348
C0000570 128306 128316 2.352935
C0000570 128316 128317 2.074109
C0000570 128317 128322 2.084696
C0000570 128322 128353 2.095283
C0000570 128353 128388 2.10587
C0000570 128388 128393 2.0983
C0000570 128393 128394 2.074109
C0000570 128394 128406 1.063522
C0000570 128406 128416 1.052935
C0000570 128416 128417 1.031761
C0000570 128417 128420 1.021174
C0000570 128420 128452 1.010587
C0000571 33336 33436 5.021174
C0000571 33437 33440 5.1234
5 changes: 5 additions & 0 deletions data/Exons_file.sbed
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
C0000570 10420 10640 Transcript1
C0000570 128078 128167 Transcript2
C0000570 128290 128405 Transcript2
C0000571 72845 73133 Transcript3
C0000571 73211 73274 Transcript3

0 comments on commit 8603745

Please sign in to comment.