Skip to content

Commit

Permalink
Merge branch 'release/0.3.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
Niklas Mähler committed Apr 26, 2015
2 parents 54c9166 + 78ee541 commit 84a0e2d
Show file tree
Hide file tree
Showing 38 changed files with 1,619 additions and 666 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@
/.coverage
/docs/_build
/data
/bin/seqpoetc
87 changes: 61 additions & 26 deletions bin/propex → bin/seqpoet
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ import itertools
import os
import sys

import propex
import seqpoet

def get_probe(fname):
with open(fname) as f:
try:
seqs = [propex.sequence.Sequence(line.strip()) for line in f \
seqs = [seqpoet.sequence.Sequence(line.strip()) for line in f \
if len(line.strip()) > 0]
except ValueError:
print('ERROR: probe file does not contain valid sequences',
Expand All @@ -30,14 +30,14 @@ def get_single_sequence(fname, genbank_only=False, stop_on_error=False):
genbank_success = False
fasta_success = False
try:
seq = propex.GenBank(fname)
seq = seqpoet.GenBank(fname)
genbank_success = True
except ValueError:
except seqpoet.genbank.ParsingError:
pass

if not genbank_success and not genbank_only:
try:
seq = propex.Fasta(fname)
seq = seqpoet.Fasta(fname)
fasta_success = True
except ValueError:
pass
Expand Down Expand Up @@ -100,10 +100,10 @@ def match_probe(probe, seqs, mismatches=2):
pl = len(probe)
for f in seqs.itervalues():
for i, record in enumerate(f):
res1 = propex.search.search(str(probe), str(record.seq),
res1 = seqpoet.search.search(str(probe), str(record.seq),
mismatches=mismatches)
res2 = [len(record.seq) - x - pl for x in \
propex.search.search(str(probe), str(record.seq.revcomp()),
seqpoet.search.search(str(probe), str(record.seq.revcomp()),
mismatches=mismatches)]

if len(res1) > 0:
Expand Down Expand Up @@ -143,16 +143,16 @@ def match_primer(primers, seqs, mismatches=2,
pl2 = len(primers[1])
for f in seqs.itervalues():
for i, record in enumerate(f):
res1_1 = propex.search.search(str(primers[0]), str(record.seq),
res1_1 = seqpoet.search.search(str(primers[0]), str(record.seq),
mismatches=mismatches)
res1_2 = [len(record.seq) - x - pl1 for x in \
propex.search.search(str(primers[0]), str(record.seq.revcomp()),
seqpoet.search.search(str(primers[0]), str(record.seq.revcomp()),
mismatches=mismatches)]

res2_1 = propex.search.search(str(primers[1]), str(record.seq),
res2_1 = seqpoet.search.search(str(primers[1]), str(record.seq),
mismatches=mismatches)
res2_2 = [len(record.seq) - x - pl2 for x in \
propex.search.search(str(primers[1]), str(record.seq.revcomp()),
seqpoet.search.search(str(primers[1]), str(record.seq.revcomp()),
mismatches=mismatches)]

# Match res1_1 with res2_2 and res2_1 with res1_2 to get primer
Expand Down Expand Up @@ -203,17 +203,29 @@ def match_primer(primers, seqs, mismatches=2,

return matches

def find_operon(matches, seqs, max_distance=500):
def find_operon(matches, seqs, max_distance=500, no_revcomp=False,
extend_downstream=0, extend_upstream=0):
match_operon = []
for m in matches:
gb = seqs[m['filename']]
locus = gb[m['seqindex']]
location = propex.genbank.Location.from_int(m['hitstart'], m['hitend'])
features = locus.features_at_location(location)
if m['strand'] == '+':
location = seqpoet.genbank.Location.from_int(
max(1, m['hitstart'] - extend_upstream),
m['hitend'] + extend_downstream)
else:
location = seqpoet.genbank.Location.from_int(
max(1, m['hitstart'] - extend_downstream),
m['hitend'] + extend_upstream)

features = filter(lambda x: x.location.is_complement == \
(m['strand'] == '-'), locus.features_at_location(location))

if len(features) == 0:
print('WARNING: no gene for match in locus {0}'.format(m['seqname']),
file=sys.stderr)
continue

operon_genes = []
for f in features:
# Find upstream genes
Expand Down Expand Up @@ -243,7 +255,9 @@ def find_operon(matches, seqs, max_distance=500):

operon_seq = locus.seq[min_start:max_end]

# Reverse complement matches on minus-strand?
# Reverse complement matches on minus-strand
if not no_revcomp and m['strand'] == '-':
operon_seq = operon_seq.revcomp()

match_operon.append({
'filename': m['filename'],
Expand All @@ -258,7 +272,7 @@ def find_operon(matches, seqs, max_distance=500):

return match_operon

def write_fasta(matches, filename=sys.stdout):
def write_fasta(matches, filename=sys.stdout, no_revcomp=False):
if isinstance(filename, file):
f = filename
close = False
Expand All @@ -267,8 +281,10 @@ def write_fasta(matches, filename=sys.stdout):
close = True

for m in matches:
if not no_revcomp and m['strand'] == '-':
m['seq'] = m['seq'].revcomp()
m['filename'] = os.path.basename(m['filename'])
s = propex.fasta.FastaRecord(m['seq'],
s = seqpoet.fasta.FastaRecord(m['seq'],
'{filename}:{seqname}:{hitstart}:{hitend}:{length}:{strand}' \
.format(**m))
print(s, file=f)
Expand All @@ -293,25 +309,37 @@ def parse_args():
parser.add_argument('-m', '--mismatches', help=('the maximum number of '
'mismatches allowed when aligning probe/primer to the genome '
'(default: %(default)d)'),
type=int, default=2, metavar='N')
type=int, default=2, metavar='int')

parser.add_argument('-d', '--max-distance', help=('the maximum intergenic '
'distance allowed when assembling operons (default: %(default)d)'),
type=int, default=500, metavar='N')
type=int, default=500, metavar='int')

parser.add_argument('--min-product', help=('minimum PCR product length '
'to consider (default: %(default)d)'), type=int, default=0,
metavar='N')
metavar='int')

parser.add_argument('--max-product', help=('maximum PCR product length '
'to consider (default: %(default)d)'), type=int, default=3000,
metavar='N')
metavar='int')

parser.add_argument('--no-revcomp', help=('don\'t reverse complement '
'results on the minus strand (default: do reverse complementation)'),
action='store_true')

parser.add_argument('--downstream', help=('extend probe/primer match '
'%(metavar)s bases downstream for operon finding (default: '
'%(default)s)'), metavar='int', default=0, type=int)

parser.add_argument('--upstream', help=('extend probe/primer match '
'%(metavar)s bases upstream for operon finding (default: '
'%(default)s)'), metavar='int', default=0, type=int)

parser.add_argument('-o', '--out', help='file for output (default: stdout)',
default=sys.stdout)
default=sys.stdout, metavar='file')

parser.add_argument('--version', help=('print version and exit'),
action='version', version='%(prog)s v{0}'.format(propex.__version__))
action='version', version='%(prog)s v{0}'.format(seqpoet.__version__))

args = parser.parse_args()

Expand All @@ -330,7 +358,8 @@ def parse_args():
if not os.path.exists(os.path.dirname(args.out)):
parser.error('file or directory not found: {}'.format(args.out))

# Mismatches, distance and max/min product length should be integers >= 0
# Mismatches, distance, max/min product length and upstream/downstream
# should be integers >= 0
if args.mismatches < 0:
parser.error('mismatches must not be negative')
if args.max_distance < 0:
Expand All @@ -339,6 +368,10 @@ def parse_args():
parser.error('minimum product length must not be negative')
if args.max_product < 0:
parser.error('maximum product length must not be negative')
if args.downstream < 0:
parser.error('downstream extension must not be negative')
if args.upstream < 0:
parser.error('upstream extension must not be negative')

return args

Expand Down Expand Up @@ -376,12 +409,14 @@ def main():

# In silico PCR results
if is_primer and args.pcr:
write_fasta(matches, filename=args.out)
write_fasta(matches, filename=args.out, no_revcomp=args.no_revcomp)
exit(0)

# Operon extraction
print('Looking for operons', file=sys.stderr)
match_features = find_operon(matches, seqs, max_distance=args.max_distance)
match_features = find_operon(matches, seqs, max_distance=args.max_distance,
no_revcomp=args.no_revcomp, extend_downstream=args.downstream,
extend_upstream=args.upstream)

if len(match_features) == 0:
print('WARNING: no operons found', file=sys.stderr)
Expand Down
8 changes: 4 additions & 4 deletions docs/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,9 @@ qthelp:
@echo
@echo "Build finished; now you can run "qcollectiongenerator" with the" \
".qhcp project file in $(BUILDDIR)/qthelp, like this:"
@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/propex.qhcp"
@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/seqpoet.qhcp"
@echo "To view the help file:"
@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/propex.qhc"
@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/seqpoet.qhc"

applehelp:
$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
Expand All @@ -104,8 +104,8 @@ devhelp:
@echo
@echo "Build finished."
@echo "To view the help file:"
@echo "# mkdir -p $$HOME/.local/share/devhelp/propex"
@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/propex"
@echo "# mkdir -p $$HOME/.local/share/devhelp/seqpoet"
@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/seqpoet"
@echo "# devhelp"

epub:
Expand Down
9 changes: 9 additions & 0 deletions docs/_static/style.css
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
.wy-table-responsive table td, .wy-table-responsive table th {
white-space: normal;
}

.wy-table-responsive {
margin-bottom: 24px;
max-width: 100%;
overflow: visible;
}
3 changes: 3 additions & 0 deletions docs/_templates/layout.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{# layout.html #}
{% extends "!layout.html" %}
{% set css_files = css_files + ['_static/style.css'] %}
40 changes: 40 additions & 0 deletions docs/command_line.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
Command line arguments
======================

::

seqpoet [options] genomedir probe

Mandatory arguments
-------------------

============= =======================================================
genomedir directory containing the genome files to use (FASTA or
GenBank format) or a single GenBank or FASTA file
probe file containing either a single sequence (probe) or a
pair of sequences (primer pair; one sequence per line)
============= =======================================================

Optional arguments
------------------

-h, --help show this help message and exit
--pcr only perform in silico PCR. Requires that the probe
file contains a primer pair (default: perform operon
extraction)
-m int, --mismatches int
the maximum number of mismatches allowed when aligning
probe/primer to the genome (default: 2)
-d int, --max-distance int
the maximum intergenic distance allowed when
assembling operons (default: 500)
--min-product int minimum PCR product length to consider (default: 0)
--max-product int maximum PCR product length to consider (default: 3000)
--no-revcomp don't reverse complement results on the minus strand
(default: do reverse complementation)
--downstream int extend probe/primer match int bases downstream for
operon finding (default: 0)
--upstream int extend probe/primer match int bases upstream for
operon finding (default: 0)
-o file, --out file file for output (default: stdout)
--version print version and exit
29 changes: 19 additions & 10 deletions docs/conf.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
#
# propex documentation build configuration file, created by
# sphinx-quickstart on Sat Mar 14 20:54:34 2015.
# seqpoet documentation build configuration file, created by
# sphinx-quickstart on Tue Apr 21 20:33:27 2015.
#
# This file is execfile()d with the current directory set to its
# containing dir.
Expand Down Expand Up @@ -48,7 +48,7 @@
master_doc = 'index'

# General information about the project.
project = u'propex'
project = u'seqpoet'
copyright = u'2015, Niklas Mähler'
author = u'Niklas Mähler'

Expand All @@ -59,7 +59,7 @@
# The short X.Y version.
import pkg_resources
try:
release = pkg_resources.get_distribution('propex').version
release = pkg_resources.get_distribution('seqpoet').version
except pkg_resources.DistributionNotFound:
print 'To build the documentation, The distribution information of sandman'
print 'Has to be available. Either install the package into your'
Expand Down Expand Up @@ -119,7 +119,16 @@

# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
html_theme = 'sphinx_rtd_theme'

# on_rtd is whether we are on readthedocs.org
import os
on_rtd = os.environ.get('READTHEDOCS', None) == 'True'

if not on_rtd: # only import and set the theme if we're building docs locally
import sphinx_rtd_theme
html_theme = 'sphinx_rtd_theme'
html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]


# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
Expand Down Expand Up @@ -211,7 +220,7 @@
#html_search_scorer = 'scorer.js'

# Output file base name for HTML help builder.
htmlhelp_basename = 'propexdoc'
htmlhelp_basename = 'seqpoetdoc'

# -- Options for LaTeX output ---------------------------------------------

Expand All @@ -233,7 +242,7 @@
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(master_doc, 'propex.tex', u'propex Documentation',
(master_doc, 'seqpoet.tex', u'seqpoet Documentation',
u'Author', 'manual'),
]

Expand Down Expand Up @@ -263,7 +272,7 @@
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
(master_doc, 'propex', u'propex Documentation',
(master_doc, 'seqpoet', u'seqpoet Documentation',
[author], 1)
]

Expand All @@ -277,8 +286,8 @@
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(master_doc, 'propex', u'propex Documentation',
author, 'propex', 'One line description of project.',
(master_doc, 'seqpoet', u'seqpoet Documentation',
author, 'seqpoet', 'One line description of project.',
'Miscellaneous'),
]

Expand Down
Loading

0 comments on commit 84a0e2d

Please sign in to comment.