Skip to content

Commit

Permalink
first commit
Browse files Browse the repository at this point in the history
  • Loading branch information
nylander authored and nylander committed Jan 25, 2016
0 parents commit e8f5ef0
Show file tree
Hide file tree
Showing 13 changed files with 485 additions and 0 deletions.
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# ptemplate -- A project template

Inspired by [A Quick Guide to Organizing Computational Biology Projects](http://dx.doi.org/10.1371/journal.pcbi.1000424)

To initialize a new `project`, run these three steps:

git clone https://github.com/nylander/ptemplate.git
mv ptemplate project && cd project
sh bin/init.sh


10 changes: 10 additions & 0 deletions bin/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# ptemplate -- bin

**Version:** 2016-01-25

**Sign:** nylander

## Description

Text here.

46 changes: 46 additions & 0 deletions bin/fasta_unwrap.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#!/usr/bin/perl
#===============================================================================
=pod
=head1
FILE: fasta_unwrap.pl
USAGE: ./fasta_unwrap.pl
DESCRIPTION: Un-wrap sequence lines in fasta
OPTIONS: ---
REQUIREMENTS: ---
BUGS: ---
NOTES: ---
AUTHOR: Johan Nylander (JN), [email protected]
COMPANY: BILS/NRM
VERSION: 1.0
CREATED: 01/12/2016 04:25:36 PM
REVISION: ---
=cut

#===============================================================================

$/ = '>';
while(<>) {
chomp;
next if ($_ eq '');
my ($id, @seqlines) = split /\n/;
print '>', $id, "\n";
my $seq = '';
foreach my $line (@seqlines) {
$seq .= $line;
}
print $seq, "\n";
}
47 changes: 47 additions & 0 deletions bin/fasta_wrap.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/usr/bin/perl
#===============================================================================
=pod
=head1
FILE: fasta_wrap.pl
USAGE: ./fasta_wrap.pl
DESCRIPTION: Wrap sequence lines in fasta
OPTIONS: ---
REQUIREMENTS: ---
BUGS: ---
NOTES: ---
AUTHOR: Johan Nylander (JN), [email protected]
COMPANY: BILS/NRM
VERSION: 1.0
CREATED: 01/12/2016 04:25:36 PM
REVISION: ---
=cut

#===============================================================================

my $length = 60;

while(<>) {
my $line = $_;
chomp($line);
if ($line =~ /^\s*>/) {
print $line, "\n";
}
else {
$line =~ s/\S{$length}/$&\n/g;
print $line, "\n";
}
}
165 changes: 165 additions & 0 deletions bin/get_fasta_info.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
#!/usr/bin/perl

# Usage: get_fasta_info.pl infile.fa
#

use strict;
use Data::Dumper;

my (
$my_name, $Total_Contigs, $Total_XContigs, $Total_Bases,
$Total_XBases, $Total_Min, $Total_Max, $Unique_Counts
);

#=== FUNCTION ================================================================
# NAME: read_file
# VERSION: 02/09/2012 12:11:04 PM
# DESCRIPTION: ???
# PARAMETERS: ???
# RETURNS: ???
# TODO: ???
#===============================================================================
sub read_file {
my ($file_name) = @_;

########## open file ##########
## check if compressed. Warning, does not handle tar archives (*.tar.gz, *.tar.bz2, *.tgz, etc)
if ($file_name =~ /\.gz$/) {
$file_name =~ s/(.*\.gz)\s*$/gzip -dc < $1|/;
}
elsif ($file_name =~ /\.zip$/) {
$file_name =~ s/(.*\.zip)\s*$/gzip -dc < $1|/;
}
elsif ($file_name =~ /\.Z$/) {
$file_name =~ s/(.*\.Z)\s*$/gzip -dc < $1|/;
}
elsif ($file_name =~ /\.bz2$/) {
$file_name =~ s/(.*\.bz2)\s*$/bzip2 -dc < $1|/;
}
open(FASTAIN, $file_name) or die "Can't open $file_name : $!";

########## read contigs in file ##########
my ( $header, $contig, $sequence, $line ) = ('') x 4;
my ( $contigs, $xcontigs, $bases, $xbases, $max, $ave, $line_num ) =
(0) x 7;
my $min = undef;
while ( defined( $line = <FASTAIN> ) ) {
chomp $line;
$line_num++;
if ( $line =~ /^>/ ) {
if ($contig) # after first input line?
{
( $contigs, $xcontigs, $bases, $xbases, $min, $max ) =
process_contig( $contig, $sequence, $contigs, $xcontigs,
$bases, $xbases, $min, $max );
}
$header = $line;
if ( $header =~ m/^>(\S+)/ ) {
$contig = $1;
}
else {
die "$my_name: Invalid fasta file header at line $line_num\n"
. "of file: '$file_name'\n";
}
$sequence = '';
next;
} # end if ($line =~ /^>/)
if ( !$contig ) {
die
"$my_name: File '$file_name' does not begin with a fasta header line\n";
}
$line =~ s/\s//g;
$sequence .= $line;
} # end while (defined($line = <FASTAIN>))
close(FASTAIN);
if ($contig) {
( $contigs, $xcontigs, $bases, $xbases, $min, $max ) =
process_contig( $contig, $sequence, $contigs, $xcontigs, $bases,
$xbases, $min, $max );
}

# print stats and accumulate totals
$ave = $contigs ? ( sprintf "%.1f", $bases / $contigs ) : 0;
print_counts( $contigs, $xcontigs, $bases, $xbases, $min, $max, $ave,
$file_name );
$Total_Contigs += $contigs;
$Total_XContigs += $xcontigs;
$Total_Bases += $bases;
$Total_XBases += $xbases;
$Total_Min = $min
if ( ( !defined $Total_Min ) || ( $Total_Min > $min ) );
$Total_Max = $max if ( $Total_Max < $max );

#print STDERR "Bases: ", $Total_Bases, "\n";

return;
} # end read_file

#=== FUNCTION ================================================================
# NAME: process_contig
# VERSION: 03/11/2009 03:03:13 PM PST
# DESCRIPTION: ???
# PARAMETERS: ???
# RETURNS: ???
# TODO: ???
#===============================================================================
sub process_contig {
my ( $contig, $sequence, $contigs, $xcontigs, $bases, $xbases, $min, $max )
= @_;
my $len = length $sequence;
$contigs++;
$bases += $len;
if ($Unique_Counts) {
$xcontigs++;
$xbases += $len;
if ( $contig =~ /__(\d+)$/ ) {
my $extra = $1;
$xcontigs += $extra;
$xbases += $len * $extra;
}
}
$min = $len if ( ( !defined $min ) || ( $min > $len ) );
$max = $len if ( $max < $len );
return ( $contigs, $xcontigs, $bases, $xbases, $min, $max );
} # end process_contig

#=== FUNCTION ================================================================
# NAME: print_counts
# VERSION: 03/11/2009 03:03:30 PM PST
# DESCRIPTION: ???
# PARAMETERS: ???
# RETURNS: ???
# TODO: ???
#===============================================================================
sub print_counts {
my ( $contigs, $xcontigs, $bases, $xbases, $min, $max, $ave, $file_name ) =
@_;

if ($Unique_Counts) {
printf STDOUT "%7d %7d %13d %13d %7d %7d %9.1f %s\n",
$contigs, $xcontigs, $bases, $xbases, $min, $max, $ave,
$file_name;
}
else {
printf STDOUT
"\n# File: %s\nNseqs: %9d\nMin. length: %d\nMax. length: %d\nAvg. length: %d\n",
$file_name, $contigs, $min, $max, $ave;
}

return;
} # end print_counts

#=== FUNCTION ================================================================
# NAME: "MAIN"
# VERSION: 03/11/2009 03:08:14 PM PST
# DESCRIPTION: ???
# PARAMETERS: ???
# RETURNS: ???
# TODO: ???
#===============================================================================
MAIN:
while ( my $infile = shift(@ARGV) ) {
read_file($infile);
}
exit(0);
__END__
99 changes: 99 additions & 0 deletions bin/init.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#!/bin/bash

TIMESTAMP=$(date '+%Y-%m-%d')

## Set wd
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
PROJPATH=$(dirname $SCRIPTPATH)
PROJNAME=$(basename $PROJPATH)
echo "* Project name:" $PROJNAME

## Create doc/README.md
if true ; then
cat << EOF > doc/README.md
# Project $PROJNAME
**Version:** $TIMESTAMP
**Sign:** $USER
## Description
Text here.
More documentation in the \`doc\` folder.
## Tools
$(find bin -type f -executable ! -iname "init.sh" -printf "* %p\n")
## Data
Data in the \`data\` folder.
## Analyses
Text here.
---
## Results
Results in \`results\` folder.
---
EOF
fi

## Reinit git, create .gitignore, and append to doc/README.md
if command -v git >/dev/null 2>&1; then
cd $PROJPATH
if [ -e ".git" ] ; then
rm -rf .git
fi
echo -n "* "
git init
cat << EOF > .gitignore
old
EOF
cat << EOF >> doc/README.md
Version Control
To track changes (after creating and editing files)
git add *
git commit -m "first commit"
EOF
fi

## Create other README.md files
if true ; then
for f in bin data results src ; do
cat << EOF > "$f"/README.md
# $PROJNAME -- $f
**Version:** $TIMESTAMP
**Sign:** $USER
## Description
Text here.
EOF
done
fi

## List files and folders
echo "* Path, files and folders:"
if command -v tree >/dev/null 2>&1; then
tree -I init.sh "$PROJPATH"
else
ls -I init.sh -F "$PROJPATH"
fi
echo "* Start with file doc/README.md"

## Move init.sh to old/
mv "$0" $PROJPATH/old/.
10 changes: 10 additions & 0 deletions data/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# ptemplate -- data

**Version:** 2016-01-25

**Sign:** nylander

## Description

Text here.

Loading

0 comments on commit e8f5ef0

Please sign in to comment.