-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
nylander
authored and
nylander
committed
Jan 25, 2016
0 parents
commit e8f5ef0
Showing
13 changed files
with
485 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
# ptemplate -- A project template | ||
|
||
Inspired by [A Quick Guide to Organizing Computational Biology Projects](http://dx.doi.org/10.1371/journal.pcbi.1000424) | ||
|
||
To initialize a new `project`, run these three steps: | ||
|
||
git clone https://github.com/nylander/ptemplate.git | ||
mv ptemplate project && cd project | ||
sh bin/init.sh | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# ptemplate -- bin | ||
|
||
**Version:** 2016-01-25 | ||
|
||
**Sign:** nylander | ||
|
||
## Description | ||
|
||
Text here. | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
#!/usr/bin/perl | ||
#=============================================================================== | ||
=pod | ||
=head1 | ||
FILE: fasta_unwrap.pl | ||
USAGE: ./fasta_unwrap.pl | ||
DESCRIPTION: Un-wrap sequence lines in fasta | ||
OPTIONS: --- | ||
REQUIREMENTS: --- | ||
BUGS: --- | ||
NOTES: --- | ||
AUTHOR: Johan Nylander (JN), [email protected] | ||
COMPANY: BILS/NRM | ||
VERSION: 1.0 | ||
CREATED: 01/12/2016 04:25:36 PM | ||
REVISION: --- | ||
=cut | ||
|
||
#=============================================================================== | ||
|
||
$/ = '>'; | ||
while(<>) { | ||
chomp; | ||
next if ($_ eq ''); | ||
my ($id, @seqlines) = split /\n/; | ||
print '>', $id, "\n"; | ||
my $seq = ''; | ||
foreach my $line (@seqlines) { | ||
$seq .= $line; | ||
} | ||
print $seq, "\n"; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
#!/usr/bin/perl | ||
#=============================================================================== | ||
=pod | ||
=head1 | ||
FILE: fasta_wrap.pl | ||
USAGE: ./fasta_wrap.pl | ||
DESCRIPTION: Wrap sequence lines in fasta | ||
OPTIONS: --- | ||
REQUIREMENTS: --- | ||
BUGS: --- | ||
NOTES: --- | ||
AUTHOR: Johan Nylander (JN), [email protected] | ||
COMPANY: BILS/NRM | ||
VERSION: 1.0 | ||
CREATED: 01/12/2016 04:25:36 PM | ||
REVISION: --- | ||
=cut | ||
|
||
#=============================================================================== | ||
|
||
my $length = 60; | ||
|
||
while(<>) { | ||
my $line = $_; | ||
chomp($line); | ||
if ($line =~ /^\s*>/) { | ||
print $line, "\n"; | ||
} | ||
else { | ||
$line =~ s/\S{$length}/$&\n/g; | ||
print $line, "\n"; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,165 @@ | ||
#!/usr/bin/perl | ||
|
||
# Usage: get_fasta_info.pl infile.fa | ||
# | ||
|
||
use strict; | ||
use Data::Dumper; | ||
|
||
my ( | ||
$my_name, $Total_Contigs, $Total_XContigs, $Total_Bases, | ||
$Total_XBases, $Total_Min, $Total_Max, $Unique_Counts | ||
); | ||
|
||
#=== FUNCTION ================================================================ | ||
# NAME: read_file | ||
# VERSION: 02/09/2012 12:11:04 PM | ||
# DESCRIPTION: ??? | ||
# PARAMETERS: ??? | ||
# RETURNS: ??? | ||
# TODO: ??? | ||
#=============================================================================== | ||
sub read_file { | ||
my ($file_name) = @_; | ||
|
||
########## open file ########## | ||
## check if compressed. Warning, does not handle tar archives (*.tar.gz, *.tar.bz2, *.tgz, etc) | ||
if ($file_name =~ /\.gz$/) { | ||
$file_name =~ s/(.*\.gz)\s*$/gzip -dc < $1|/; | ||
} | ||
elsif ($file_name =~ /\.zip$/) { | ||
$file_name =~ s/(.*\.zip)\s*$/gzip -dc < $1|/; | ||
} | ||
elsif ($file_name =~ /\.Z$/) { | ||
$file_name =~ s/(.*\.Z)\s*$/gzip -dc < $1|/; | ||
} | ||
elsif ($file_name =~ /\.bz2$/) { | ||
$file_name =~ s/(.*\.bz2)\s*$/bzip2 -dc < $1|/; | ||
} | ||
open(FASTAIN, $file_name) or die "Can't open $file_name : $!"; | ||
|
||
########## read contigs in file ########## | ||
my ( $header, $contig, $sequence, $line ) = ('') x 4; | ||
my ( $contigs, $xcontigs, $bases, $xbases, $max, $ave, $line_num ) = | ||
(0) x 7; | ||
my $min = undef; | ||
while ( defined( $line = <FASTAIN> ) ) { | ||
chomp $line; | ||
$line_num++; | ||
if ( $line =~ /^>/ ) { | ||
if ($contig) # after first input line? | ||
{ | ||
( $contigs, $xcontigs, $bases, $xbases, $min, $max ) = | ||
process_contig( $contig, $sequence, $contigs, $xcontigs, | ||
$bases, $xbases, $min, $max ); | ||
} | ||
$header = $line; | ||
if ( $header =~ m/^>(\S+)/ ) { | ||
$contig = $1; | ||
} | ||
else { | ||
die "$my_name: Invalid fasta file header at line $line_num\n" | ||
. "of file: '$file_name'\n"; | ||
} | ||
$sequence = ''; | ||
next; | ||
} # end if ($line =~ /^>/) | ||
if ( !$contig ) { | ||
die | ||
"$my_name: File '$file_name' does not begin with a fasta header line\n"; | ||
} | ||
$line =~ s/\s//g; | ||
$sequence .= $line; | ||
} # end while (defined($line = <FASTAIN>)) | ||
close(FASTAIN); | ||
if ($contig) { | ||
( $contigs, $xcontigs, $bases, $xbases, $min, $max ) = | ||
process_contig( $contig, $sequence, $contigs, $xcontigs, $bases, | ||
$xbases, $min, $max ); | ||
} | ||
|
||
# print stats and accumulate totals | ||
$ave = $contigs ? ( sprintf "%.1f", $bases / $contigs ) : 0; | ||
print_counts( $contigs, $xcontigs, $bases, $xbases, $min, $max, $ave, | ||
$file_name ); | ||
$Total_Contigs += $contigs; | ||
$Total_XContigs += $xcontigs; | ||
$Total_Bases += $bases; | ||
$Total_XBases += $xbases; | ||
$Total_Min = $min | ||
if ( ( !defined $Total_Min ) || ( $Total_Min > $min ) ); | ||
$Total_Max = $max if ( $Total_Max < $max ); | ||
|
||
#print STDERR "Bases: ", $Total_Bases, "\n"; | ||
|
||
return; | ||
} # end read_file | ||
|
||
#=== FUNCTION ================================================================ | ||
# NAME: process_contig | ||
# VERSION: 03/11/2009 03:03:13 PM PST | ||
# DESCRIPTION: ??? | ||
# PARAMETERS: ??? | ||
# RETURNS: ??? | ||
# TODO: ??? | ||
#=============================================================================== | ||
sub process_contig { | ||
my ( $contig, $sequence, $contigs, $xcontigs, $bases, $xbases, $min, $max ) | ||
= @_; | ||
my $len = length $sequence; | ||
$contigs++; | ||
$bases += $len; | ||
if ($Unique_Counts) { | ||
$xcontigs++; | ||
$xbases += $len; | ||
if ( $contig =~ /__(\d+)$/ ) { | ||
my $extra = $1; | ||
$xcontigs += $extra; | ||
$xbases += $len * $extra; | ||
} | ||
} | ||
$min = $len if ( ( !defined $min ) || ( $min > $len ) ); | ||
$max = $len if ( $max < $len ); | ||
return ( $contigs, $xcontigs, $bases, $xbases, $min, $max ); | ||
} # end process_contig | ||
|
||
#=== FUNCTION ================================================================ | ||
# NAME: print_counts | ||
# VERSION: 03/11/2009 03:03:30 PM PST | ||
# DESCRIPTION: ??? | ||
# PARAMETERS: ??? | ||
# RETURNS: ??? | ||
# TODO: ??? | ||
#=============================================================================== | ||
sub print_counts { | ||
my ( $contigs, $xcontigs, $bases, $xbases, $min, $max, $ave, $file_name ) = | ||
@_; | ||
|
||
if ($Unique_Counts) { | ||
printf STDOUT "%7d %7d %13d %13d %7d %7d %9.1f %s\n", | ||
$contigs, $xcontigs, $bases, $xbases, $min, $max, $ave, | ||
$file_name; | ||
} | ||
else { | ||
printf STDOUT | ||
"\n# File: %s\nNseqs: %9d\nMin. length: %d\nMax. length: %d\nAvg. length: %d\n", | ||
$file_name, $contigs, $min, $max, $ave; | ||
} | ||
|
||
return; | ||
} # end print_counts | ||
|
||
#=== FUNCTION ================================================================ | ||
# NAME: "MAIN" | ||
# VERSION: 03/11/2009 03:08:14 PM PST | ||
# DESCRIPTION: ??? | ||
# PARAMETERS: ??? | ||
# RETURNS: ??? | ||
# TODO: ??? | ||
#=============================================================================== | ||
MAIN: | ||
while ( my $infile = shift(@ARGV) ) { | ||
read_file($infile); | ||
} | ||
exit(0); | ||
__END__ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
#!/bin/bash | ||
|
||
TIMESTAMP=$(date '+%Y-%m-%d') | ||
|
||
## Set wd | ||
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" | ||
PROJPATH=$(dirname $SCRIPTPATH) | ||
PROJNAME=$(basename $PROJPATH) | ||
echo "* Project name:" $PROJNAME | ||
|
||
## Create doc/README.md | ||
if true ; then | ||
cat << EOF > doc/README.md | ||
# Project $PROJNAME | ||
**Version:** $TIMESTAMP | ||
**Sign:** $USER | ||
## Description | ||
Text here. | ||
More documentation in the \`doc\` folder. | ||
## Tools | ||
$(find bin -type f -executable ! -iname "init.sh" -printf "* %p\n") | ||
## Data | ||
Data in the \`data\` folder. | ||
## Analyses | ||
Text here. | ||
--- | ||
## Results | ||
Results in \`results\` folder. | ||
--- | ||
EOF | ||
fi | ||
|
||
## Reinit git, create .gitignore, and append to doc/README.md | ||
if command -v git >/dev/null 2>&1; then | ||
cd $PROJPATH | ||
if [ -e ".git" ] ; then | ||
rm -rf .git | ||
fi | ||
echo -n "* " | ||
git init | ||
cat << EOF > .gitignore | ||
old | ||
EOF | ||
cat << EOF >> doc/README.md | ||
Version Control | ||
To track changes (after creating and editing files) | ||
git add * | ||
git commit -m "first commit" | ||
EOF | ||
fi | ||
|
||
## Create other README.md files | ||
if true ; then | ||
for f in bin data results src ; do | ||
cat << EOF > "$f"/README.md | ||
# $PROJNAME -- $f | ||
**Version:** $TIMESTAMP | ||
**Sign:** $USER | ||
## Description | ||
Text here. | ||
EOF | ||
done | ||
fi | ||
|
||
## List files and folders | ||
echo "* Path, files and folders:" | ||
if command -v tree >/dev/null 2>&1; then | ||
tree -I init.sh "$PROJPATH" | ||
else | ||
ls -I init.sh -F "$PROJPATH" | ||
fi | ||
echo "* Start with file doc/README.md" | ||
|
||
## Move init.sh to old/ | ||
mv "$0" $PROJPATH/old/. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# ptemplate -- data | ||
|
||
**Version:** 2016-01-25 | ||
|
||
**Sign:** nylander | ||
|
||
## Description | ||
|
||
Text here. | ||
|
Oops, something went wrong.