diff --git a/NEWS.md b/NEWS.md index 749025d6..e3ded430 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,13 @@ +v0.18 (2016-01-04) +------------------ + +- Several commands now support parallelization with the `--parallel` option + (`fva`, `fluxcheck`, `fluxcoupling`, `robustness`). +- A more robust reaction parser is now used to parse reaction equations in + YAML files. This also means that quoting compound names with pipes (`|`) is + now optional. + v0.17 (2015-12-07) ------------------ diff --git a/docs/api/datasource_reaction.rst b/docs/api/datasource_reaction.rst new file mode 100644 index 00000000..bb9f0834 --- /dev/null +++ b/docs/api/datasource_reaction.rst @@ -0,0 +1,6 @@ + +``psamm.datasource.reaction`` -- Parser for reactions +=================================================================== + +.. automodule:: psamm.datasource.reaction + :members: diff --git a/psamm/command.py b/psamm/command.py index 2c8d659e..310c42b1 100644 --- a/psamm/command.py +++ b/psamm/command.py @@ -25,13 +25,17 @@ The :func:`.main` function is the entry point of command line interface. """ +from __future__ import division + import os import argparse import logging import abc +from itertools import islice +import multiprocessing as mp import pkg_resources -from six import add_metaclass, iteritems +from six import add_metaclass, iteritems, itervalues from . import __version__ as package_version from .datasource.native import NativeModel @@ -159,6 +163,39 @@ def _get_solver(self, **kwargs): return generic.Solver(**solver_args) +class ParallelTaskMixin(object): + """Mixin for commands that run parallel computation tasks.""" + + @classmethod + def init_parser(cls, parser): + parser.add_argument( + '--parallel', help='Set number of parallel processes (0=auto)', + type=int, default=0) + super(ParallelTaskMixin, cls).init_parser(parser) + + def _create_executor(self, handler, args, cpus_per_worker=1): + """Return a new :class:`.Executor` instance.""" + if self._args.parallel > 0: + workers = self._args.parallel + else: + try: + workers = mp.cpu_count() // cpus_per_worker + except NotImplementedError: + workers = 1 + + if workers != 1: + logger.info('Using {} parallel worker processes...'.format( + workers)) + executor = ProcessPoolExecutor( + processes=workers, handler_init=handler, handler_args=args) + else: + logger.info('Using single worker...') + executor = SequentialExecutor( + handler_init=handler, handler_args=args) + + return executor + + class FilePrefixAppendAction(argparse.Action): """Action that appends one argument or multiple from a file. @@ -194,6 +231,130 @@ def __call__(self, parser, namespace, values, option_string=None): arguments.append(values) +class _ErrorMarker(object): + """Signals error in the child process.""" + + +class ExecutorError(Exception): + """Error running tasks on executor.""" + + +class _ExecutorProcess(mp.Process): + def __init__(self, task_queue, result_queue, handler_init, + handler_args=()): + super(_ExecutorProcess, self).__init__() + self._task_queue = task_queue + self._result_queue = result_queue + self._handler_init = handler_init + self._handler_args = handler_args + + def run(self): + try: + handler = self._handler_init(*self._handler_args) + for tasks in iter(self._task_queue.get, None): + results = {task: handler.handle_task(*task) for task in tasks} + self._result_queue.put(results) + except BaseException: + self._result_queue.put(_ErrorMarker()) + raise + + +class Executor(object): + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + return False + + def close(self): + pass + + +class ProcessPoolExecutor(Executor): + def __init__(self, handler_init, handler_args=(), processes=None): + if processes is None: + try: + processes = mp.cpu_count() + except NotImplementedError: + processes = 1 + + self._process_count = processes + self._processes = [] + self._task_queue = mp.Queue() + self._result_queue = mp.Queue() + + for _ in range(self._process_count): + p = _ExecutorProcess( + self._task_queue, self._result_queue, handler_init, + handler_args) + p.start() + self._processes.append(p) + + def apply(self, task): + self._task_queue.put([task]) + results = self._result_queue.get() + if isinstance(results, _ErrorMarker): + raise ExecutorError('Child process failed') + + return next(itervalues(results)) + + def imap_unordered(self, iterable, chunksize=1): + def iter_chunks(): + while True: + chunk = list(islice(iterable, chunksize)) + if len(chunk) == 0: + break + yield chunk + + it = iter_chunks() + workers = 0 + for i in range(self._process_count): + tasks = next(it, None) + if tasks is None: + break + + self._task_queue.put(tasks) + workers += 1 + + while workers > 0: + results = self._result_queue.get() + if isinstance(results, _ErrorMarker): + raise ExecutorError('Child process failed') + + tasks = next(it, None) + if tasks is None: + workers -= 1 + + self._task_queue.put(tasks) + + for task, result in iteritems(results): + yield task, result + + def close(self): + for i in range(self._process_count): + self._task_queue.put(None) + + def join(self): + for p in self._processes: + p.join() + + +class SequentialExecutor(Executor): + def __init__(self, handler_init, handler_args=()): + self._handler = handler_init(*handler_args) + + def apply(self, task): + return self._handler.handle_task(*task) + + def imap_unordered(self, iterable, chunksize): + for task in iterable: + yield task, self._handler.handle_task(*task) + + def join(self): + pass + + def main(command_class=None, args=None): """Run the command line interface with the given :class:`Command`. diff --git a/psamm/commands/fluxcheck.py b/psamm/commands/fluxcheck.py index b66b142c..31f7812b 100644 --- a/psamm/commands/fluxcheck.py +++ b/psamm/commands/fluxcheck.py @@ -17,15 +17,19 @@ from __future__ import unicode_literals +import time import logging +from itertools import product -from ..command import Command, MetabolicMixin, SolverCommandMixin, CommandError +from ..command import (Command, MetabolicMixin, SolverCommandMixin, + ParallelTaskMixin, CommandError) from .. import fluxanalysis, fastcore logger = logging.getLogger(__name__) -class FluxConsistencyCommand(MetabolicMixin, SolverCommandMixin, Command): +class FluxConsistencyCommand(MetabolicMixin, SolverCommandMixin, + ParallelTaskMixin, Command): """Check that reactions are flux consistent in a model. A reaction is flux consistent if there exists any steady-state flux @@ -82,6 +86,8 @@ def run(self): 'Using Fastcore with thermodynamic constraints' ' is not supported!') + start_time = time.time() + if enable_fastcore: solver = self._get_solver() inconsistent = set(fastcore.fastcc( @@ -100,12 +106,11 @@ def run(self): tfba=enable_tfba, solver=solver)) else: logger.info('Using flux bounds to determine consistency.') - inconsistent = set() - for reaction_id, (lo, hi) in fluxanalysis.flux_variability( - self._mm, sorted(self._mm.reactions), {}, - tfba=enable_tfba, solver=solver): - if abs(lo) < epsilon and abs(hi) < epsilon: - inconsistent.add(reaction_id) + inconsistent = set(self._run_fva_fluxcheck( + self._mm, solver, enable_tfba, epsilon)) + + logger.info('Solving took {:.2f} seconds'.format( + time.time() - start_time)) # Count the number of reactions that are fixed at zero. While these # reactions are still inconsistent, they are inconsistent because they @@ -144,3 +149,39 @@ def run(self): logger.info('Model has {}/{} inconsistent exchange reactions' ' ({} disabled by user)'.format( count_exchange, total_exchange, disabled_exchange)) + + def _run_fva_fluxcheck(self, model, solver, enable_tfba, epsilon): + handler_args = model, solver, enable_tfba + executor = self._create_executor( + FluxCheckFVATaskHandler, handler_args, cpus_per_worker=2) + + results = {} + with executor: + for (reaction_id, direction), value in executor.imap_unordered( + product(model.reactions, (1, -1)), 16): + + if reaction_id not in results: + results[reaction_id] = value + continue + + other_value = results[reaction_id] + if direction == -1: + bounds = value, other_value + else: + bounds = other_value, value + + lower, upper = bounds + if abs(lower) < epsilon and abs(upper) < epsilon: + yield reaction_id + + executor.join() + + +class FluxCheckFVATaskHandler(object): + def __init__(self, model, solver, enable_tfba): + self._problem = fluxanalysis.FluxBalanceProblem(model, solver) + if enable_tfba: + self._problem.add_thermodynamic() + + def handle_task(self, reaction_id, direction): + return self._problem.flux_bound(reaction_id, direction) diff --git a/psamm/commands/fluxcoupling.py b/psamm/commands/fluxcoupling.py index d34b173e..bf0ed748 100644 --- a/psamm/commands/fluxcoupling.py +++ b/psamm/commands/fluxcoupling.py @@ -19,13 +19,15 @@ import logging -from ..command import Command, SolverCommandMixin, MetabolicMixin, CommandError +from ..command import (Command, SolverCommandMixin, MetabolicMixin, + ParallelTaskMixin, CommandError) from .. import fluxanalysis, fluxcoupling logger = logging.getLogger(__name__) -class FluxCouplingCommand(MetabolicMixin, SolverCommandMixin, Command): +class FluxCouplingCommand(MetabolicMixin, SolverCommandMixin, + ParallelTaskMixin, Command): """Find flux coupled reactions in the model. This identifies any reaction pairs where the flux of one reaction @@ -47,33 +49,43 @@ def run(self): self._mm, max_reaction, tfba=False, solver=solver)) optimum = fba_fluxes[max_reaction] - self._fcp = fluxcoupling.FluxCouplingProblem( - self._mm, {max_reaction: 0.999 * optimum}, solver) + handler_args = self._mm, {max_reaction: 0.999 * optimum}, solver + executor = self._create_executor( + FluxCouplingTaskHandler, handler_args, cpus_per_worker=2) self._coupled = {} self._groups = [] - reactions = sorted(self._mm.reactions) - for i, reaction1 in enumerate(reactions): - if reaction1 in self._coupled: - continue - - for reaction2 in reactions[i+1:]: - if (reaction2 in self._coupled and - (self._coupled[reaction2] == - self._coupled.get(reaction1))): + def iter_reaction_pairs(): + reactions = sorted(self._mm.reactions) + for i, reaction1 in enumerate(reactions): + if reaction1 in self._coupled: continue - self._check_reactions(reaction1, reaction2) + for reaction2 in reactions[i+1:]: + if (reaction2 in self._coupled and + (self._coupled[reaction2] == + self._coupled.get(reaction1))): + continue + + yield reaction1, reaction2 + + with executor: + for task, bounds in executor.imap_unordered( + iter_reaction_pairs(), 16): + reaction1, reaction2 = task + self._check_reactions(reaction1, reaction2, bounds) + + executor.join() logger.info('Coupled groups:') for i, group in enumerate(self._groups): if group is not None: logger.info('{}: {}'.format(i, ', '.join(sorted(group)))) - def _check_reactions(self, reaction1, reaction2): - logger.debug('Solving for {}, {}'.format(reaction1, reaction2)) - lower, upper = self._fcp.solve(reaction1, reaction2) + def _check_reactions(self, reaction1, reaction2, bounds): + logger.debug('Solved for {}, {}'.format(reaction1, reaction2)) + lower, upper = bounds logger.debug('Result: {}, {}'.format(lower, upper)) @@ -135,3 +147,12 @@ def _couple_reactions(self, reaction1, reaction2): self._coupled[reaction1] = group_index self._coupled[reaction2] = group_index self._groups.append(group) + + +class FluxCouplingTaskHandler(object): + def __init__(self, model, thresholds, solver): + self._problem = fluxcoupling.FluxCouplingProblem( + model, thresholds, solver) + + def handle_task(self, reaction_1, reaction_2): + return self._problem.solve(reaction_1, reaction_2) diff --git a/psamm/commands/fva.py b/psamm/commands/fva.py index 603839de..e2d6007e 100644 --- a/psamm/commands/fva.py +++ b/psamm/commands/fva.py @@ -19,15 +19,18 @@ import time import logging +from itertools import product -from ..command import Command, SolverCommandMixin, MetabolicMixin, CommandError +from ..command import (Command, SolverCommandMixin, MetabolicMixin, + ParallelTaskMixin, CommandError) from ..util import MaybeRelative from .. import fluxanalysis logger = logging.getLogger(__name__) -class FluxVariabilityCommand(MetabolicMixin, SolverCommandMixin, Command): +class FluxVariabilityCommand(MetabolicMixin, SolverCommandMixin, + ParallelTaskMixin, Command): """Run flux variablity analysis on the model.""" @classmethod @@ -80,14 +83,47 @@ def run(self): logger.info('Setting objective threshold to {}'.format( threshold)) - flux_bounds = fluxanalysis.flux_variability( - self._mm, sorted(self._mm.reactions), {reaction: float(threshold)}, - tfba=enable_tfba, solver=solver) - for reaction_id, bounds in flux_bounds: + handler_args = ( + self._mm, solver, enable_tfba, float(threshold), reaction) + executor = self._create_executor( + FVATaskHandler, handler_args, cpus_per_worker=2) + + def iter_results(): + results = {} + with executor: + for (reaction_id, direction), value in executor.imap_unordered( + product(self._mm.reactions, (1, -1)), 16): + if reaction_id not in results: + results[reaction_id] = value + continue + + other_value = results[reaction_id] + if direction == -1: + bounds = value, other_value + else: + bounds = other_value, value + + yield reaction_id, bounds + + executor.join() + + for reaction_id, (lower, upper) in iter_results(): rx = self._mm.get_reaction(reaction_id) rxt = rx.translated_compounds(lambda x: compound_name.get(x, x)) - print('{}\t{}\t{}\t{}'.format( - reaction_id, bounds[0], bounds[1], rxt)) + print('{}\t{}\t{}\t{}'.format(reaction_id, lower, upper, rxt)) logger.info('Solving took {:.2f} seconds'.format( time.time() - start_time)) + + +class FVATaskHandler(object): + def __init__(self, model, solver, enable_tfba, threshold, reaction): + self._problem = fluxanalysis.FluxBalanceProblem(model, solver) + if enable_tfba: + self._problem.add_thermodynamic() + + self._problem.prob.add_linear_constraints( + self._problem.get_flux_var(reaction) >= threshold) + + def handle_task(self, reaction_id, direction): + return self._problem.flux_bound(reaction_id, direction) diff --git a/psamm/commands/robustness.py b/psamm/commands/robustness.py index 8a48355e..6c36bc29 100644 --- a/psamm/commands/robustness.py +++ b/psamm/commands/robustness.py @@ -20,7 +20,8 @@ import time import logging -from ..command import Command, MetabolicMixin, SolverCommandMixin, CommandError +from ..command import (Command, MetabolicMixin, SolverCommandMixin, + ParallelTaskMixin, CommandError) from .. import fluxanalysis from six.moves import range @@ -28,7 +29,8 @@ logger = logging.getLogger(__name__) -class RobustnessCommand(MetabolicMixin, SolverCommandMixin, Command): +class RobustnessCommand(MetabolicMixin, SolverCommandMixin, + ParallelTaskMixin, Command): """Run robustness analysis on the model. Given a reaction to maximize and a reaction to vary, @@ -97,22 +99,20 @@ def run(self): else: solver = self._get_solver() - p = fluxanalysis.FluxBalanceProblem(self._mm, solver) - if loop_removal == 'none': logger.info('Loop removal disabled; spurious loops are allowed') - run_fba = p.maximize elif loop_removal == 'l1min': logger.info('Loop removal using L1 minimization') - run_fba = p.max_min_l1 elif loop_removal == 'tfba': logger.info('Loop removal using thermodynamic constraints') - p.add_thermodynamic() - run_fba = p.maximize else: raise CommandError('Invalid loop constraint mode: {}'.format( loop_removal)) + p = fluxanalysis.FluxBalanceProblem(self._mm, solver) + if loop_removal == 'tfba': + p.add_thermodynamic() + # Determine minimum and maximum flux for varying reaction if self._args.maximum is None: p.maximize(varying_reaction) @@ -135,27 +135,65 @@ def run(self): start_time = time.time() - # Run FBA on model at different fixed flux values - for i in range(steps): - fixed_flux = flux_min + i*(flux_max - flux_min)/float(steps-1) - flux_var = p.get_flux_var(varying_reaction) - c, = p.prob.add_linear_constraints(flux_var == fixed_flux) + handler_args = ( + self._mm, solver, loop_removal, self._args.all_reaction_fluxes) + executor = self._create_executor( + RobustnessTaskHandler, handler_args, cpus_per_worker=2) - try: - run_fba(reaction) + def iter_tasks(): + for i in range(steps): + fixed_flux = flux_min + i*(flux_max - flux_min)/float(steps-1) + constraint = varying_reaction, fixed_flux + yield constraint, reaction - if not self._args.all_reaction_fluxes: - print('{}\t{}'.format(fixed_flux, p.get_flux(reaction))) - else: + # Run FBA on model at different fixed flux values + with executor: + for task, result in executor.imap_unordered(iter_tasks(), 16): + (varying_reaction, fixed_flux), _ = task + if result is None: + logger.warning('No solution found for {} at {}'.format( + varying_reaction, fixed_flux)) + elif self._args.all_reaction_fluxes: for other_reaction in self._mm.reactions: print('{}\t{}\t{}'.format( other_reaction, fixed_flux, - p.get_flux(other_reaction))) - except fluxanalysis.FluxBalanceError: - logger.warning('No solution found for {} at {}'.format( - varying_reaction, fixed_flux)) - finally: - c.delete() + result[other_reaction])) + else: + print('{}\t{}'.format(fixed_flux, result)) + + executor.join() logger.info('Solving took {:.2f} seconds'.format( time.time() - start_time)) + + +class RobustnessTaskHandler(object): + def __init__(self, model, solver, loop_removal, all_reactions): + self._problem = fluxanalysis.FluxBalanceProblem(model, solver) + + if loop_removal == 'none': + self._run_fba = self._problem.maximize + elif loop_removal == 'l1min': + self._run_fba = self._problem.max_min_l1 + elif loop_removal == 'tfba': + self._problem.add_thermodynamic() + self._run_fba = self._problem.maximize + + self._reactions = list(model.reactions) if all_reactions else None + + def handle_task(self, constraint, reaction): + varying_reaction, fixed_flux = constraint + flux_var = self._problem.get_flux_var(varying_reaction) + c, = self._problem.prob.add_linear_constraints(flux_var == fixed_flux) + + try: + self._run_fba(reaction) + if self._reactions is not None: + return {reaction: self._problem.get_flux(reaction) + for reaction in self._reactions} + else: + return self._problem.get_flux(reaction) + except fluxanalysis.FluxBalanceError: + return None + finally: + c.delete() diff --git a/psamm/commands/search.py b/psamm/commands/search.py index 11e23a17..c7409857 100644 --- a/psamm/commands/search.py +++ b/psamm/commands/search.py @@ -20,15 +20,7 @@ import re from ..command import Command, FilePrefixAppendAction -from ..reaction import Compound - - -def parse_compound(s): - """Parse a compound specification with optional compartment""" - m = re.match(r'([^\[]+)\[(\w+)\]', s) - if m is not None: - return Compound(m.group(1), compartment=m.group(2)) - return Compound(s) +from ..datasource.reaction import parse_compound def filter_search_term(s): diff --git a/psamm/datasource/misc.py b/psamm/datasource/misc.py index d614b878..d6777687 100644 --- a/psamm/datasource/misc.py +++ b/psamm/datasource/misc.py @@ -17,103 +17,35 @@ """Miscellaneaus data source functions.""" -import re -from decimal import Decimal - -from psamm.reaction import Reaction, Compound +from .reaction import ReactionParser +from ..reaction import Reaction def parse_sudensimple_reaction(s, arrow_rev='<=>', arrow_irrev='->'): - """Parse a reaction string (SudenSimple)""" - - # Compile pattern for matching reaction arrows - arrow_pattern = re.compile( - '(' + ('|'.join(re.escape(x) for x in (arrow_irrev, arrow_rev))) + ')') - - def parse_compound_list(s): - if s.strip() == '': - return - - for cpd in s.strip().split('+'): - cpd_split = cpd.strip().split(' ') - if len(cpd_split) == 1: - count = 1 - spec = cpd_split[0].strip() - else: - count = Decimal(cpd_split[0]) - if count % 1 == 0: - count = int(count) - spec = cpd_split[1].strip() - - m = re.match(r'^(.+)\[(.+)\]$', spec) - if m: - # compartment - cpdid = m.group(1) - comp = m.group(2) - else: - cpdid = spec - comp = None + """Parse a reaction string (SudenSimple). - yield Compound(cpdid, compartment=comp), count + .. deprecated:: 0.18 + Use :func:`~psamm.datasource.reaction.parse_reaction` instead. + """ - # Split by equation arrow - direction = Reaction.Right - left, arrow, right = arrow_pattern.split(s, maxsplit=1) - direction = Reaction.Right if arrow == arrow_irrev else Reaction.Bidir + arrows = ( + (arrow_rev, Reaction.Bidir), + (arrow_irrev, Reaction.Right) + ) - return Reaction(direction, parse_compound_list(left), - parse_compound_list(right)) + return ReactionParser(arrows=arrows).parse(s) def parse_metnet_reaction(s, arrow_rev='<==>', arrow_irrev='-->'): - """Parser for the reaction format in MetNet model""" - - # Compile pattern for matching reaction arrows - arrow_pattern = re.compile( - '(' + ('|'.join(re.escape(x) for x in (arrow_irrev, arrow_rev))) + ')') - - def parse_compound_list(s, global_comp): - if s.strip() == '': - return - - for cpd in s.strip().split('+'): - cpd_split = cpd.strip().split(') ') - if len(cpd_split) == 1: - count = 1 - spec = cpd_split[0].strip() - else: - count = Decimal(cpd_split[0].lstrip('(')) - if count % 1 == 0: - count = int(count) - spec = cpd_split[1].strip() - - if global_comp is None: - m = re.match(r'^(.+)\[(.+)\]$', spec) - if m: - # compartment - cpdid = m.group(1) - comp = m.group(2) - else: - cpdid = spec - comp = None - else: - cpdid = spec - comp = global_comp - - yield Compound(cpdid, compartment=comp), count + """Parser for the reaction format in MetNet model. - # Split by colon for compartment information - m = re.match(r'^\s*\[(\w+)\]\s*:\s*(.*)', s) - if m is not None: - global_comp = m.group(1) - s = m.group(2) - else: - global_comp = None + .. deprecated:: 0.18 + Use :func:`~psamm.datasource.reaction.parse_reaction` instead. + """ - # Split by equation arrow - direction = Reaction.Right - left, arrow, right = arrow_pattern.split(s, maxsplit=1) - direction = Reaction.Right if arrow == arrow_irrev else Reaction.Bidir + arrows = ( + (arrow_irrev, Reaction.Right), + (arrow_rev, Reaction.Bidir) + ) - return Reaction(direction, parse_compound_list(left, global_comp), - parse_compound_list(right, global_comp)) + return ReactionParser(arrows=arrows, parse_global=True).parse(s) diff --git a/psamm/datasource/modelseed.py b/psamm/datasource/modelseed.py index c8937124..c0acc78b 100644 --- a/psamm/datasource/modelseed.py +++ b/psamm/datasource/modelseed.py @@ -19,10 +19,9 @@ import csv import re -from decimal import Decimal -from ..reaction import Reaction, Compound from .context import FileMark +from .reaction import parse_reaction as parse_universal_reaction class ParseError(Exception): @@ -113,167 +112,9 @@ def parse_compound_file(f, context=None): def parse_reaction(s): - """Parse a ModelSEED reaction + """Parse a ModelSEED reaction. - This parser is based on the grammer.:: - - ::= ' ' ' ' - ::= '<=' | '<=>' | '=>' | '?' | '' - ::= '' | | ' + ' - ::= ' ' | - ::= '(' ')' | - ::= - ::= '|' '|' | 'cpd' | 'cdp' - ::= '[' ']' | - ::= - ::= - ::= - """ - - def tokenize(s): - """Return tokens of reaction string""" - s = s.lstrip() - token = '' - barquote = False - for c in s: - if c.isspace() and not barquote: - yield token - token = '' - continue - - token += c - - if c == '|': - barquote = not barquote - - if token != '': - yield token - - def parse_compound_number(number): - """Parse compound number - - Return plain int if possible, otherwise use Decimal.""" - - d = Decimal(number) - if d % 1 == 0: - return int(d) - return d - - def parse_compound_count(count): - """Parse compound count""" - - m = re.match(r'^\((.+)\)|(.+)$', count) - if not m: - raise ParseError( - 'Unable to parse compound count: {}'.format(count)) - - number = m.group(1) if m.group(1) is not None else m.group(2) - return parse_compound_number(number) - - def parse_compound_name(name): - """Parse compound name""" - - m = re.match(r'^\|(.+)\||(cdp\d+.*)|(cpd\d+.*)$', name) - if not m: - raise ParseError('Unable to parse compound name: {}'.format(name)) - - # Return first matching group - for g in m.groups(): - if g is not None: - return g - - return None - - def parse_compound(cmpd): - """Parse compound""" - - count = 1 - if len(cmpd) == 2: - count = parse_compound_count(cmpd[0]) - name = parse_compound_name(cmpd[1]) - elif len(cmpd) == 1: - name = parse_compound_name(cmpd[0]) - else: - raise ParseError( - 'Unexpected number of tokens in compound: {}'.format(cmpd)) - - compartment = None - m = re.match(r'^(.+?)\[(.+)\]$', name) - if m is not None: - name = m.group(1) - compartment = m.group(2) - - return Compound(name, compartment=compartment), count - - def parse_compound_list(cmpds): - """Parse a list of compounds""" - - if len(cmpds) == 0: - return - - cmpd = [] - for t in cmpds: - if t == '+': - yield parse_compound(cmpd) - cmpd = [] - else: - cmpd.append(t) - - if len(cmpd) == 0: - raise ParseError('Expected compound in compound list') - - yield parse_compound(cmpd) - - tokens = list(tokenize(s)) - direction = None - for i, t in enumerate(tokens): - if t in ('<=', '<=>', '=>', '?', ''): - direction = t - left = tokens[:i] - right = tokens[i+1:] - - if direction is None: - raise ParseError('Failed to parse reaction: {}'.format(tokens)) - - if direction in ('', '?'): - direction = Reaction.Bidir - - return Reaction(direction, parse_compound_list(left), - parse_compound_list(right)) - - -def format_reaction(reaction): - """Format reaction as string - - converting parsed reactions - back to string represetation using this module only a subset of the grammar - is used.:: - - ::= ' ' ' ' - ::= '<=>' | '=>' | '?' - ::= '' | | ' + ' - ::= ' ' | - ::= '(' ')' - ::= '|' '|' | '|' - '[' ']' '|' - ::= - ::= + .. deprecated:: 0.18 + Use :func:`~psamm.datasource.reaction.parse_reaction` instead. """ - - # Define helper functions - def format_compound(compound, count): - """Format compound""" - cpdspec = str(compound) - if count != 1: - return '({}) |{}|'.format(count, cpdspec) - return '|{}|'.format(cpdspec) - - def format_compound_list(cmpds): - """Format compound list""" - return ' + '.join(format_compound(compound, count) - for compound, count in cmpds) - - return '{} {} {}'.format( - format_compound_list(reaction.left), - '?' if reaction.direction == '' else reaction.direction, - format_compound_list(reaction.right)) + return parse_universal_reaction(s) diff --git a/psamm/datasource/native.py b/psamm/datasource/native.py index d930cd4c..fd72840f 100644 --- a/psamm/datasource/native.py +++ b/psamm/datasource/native.py @@ -35,9 +35,9 @@ from ..reaction import Reaction, Compound from .context import FilePathContext, FileMark +from .reaction import ReactionParser from . import modelseed - # Module-level logging logger = logging.getLogger(__name__) @@ -46,6 +46,8 @@ _HAS_YAML_LIBRARY = None +_REACTION_PARSER = ReactionParser() + class ParseError(Exception): """Exception used to signal errors while parsing""" @@ -382,7 +384,7 @@ def parse_compound_list(l): yield compound, value if isinstance(equation_def, string_types): - return modelseed.parse_reaction(equation_def).normalized() + return _REACTION_PARSER.parse(equation_def).normalized() compartment = equation_def.get('compartment', None) reversible = bool(equation_def.get('reversible', True)) @@ -459,7 +461,7 @@ def parse_reaction_table_file(path, f): props = {key: value for key, value in iteritems(row) if value != ''} if 'equation' in props: - props['equation'] = modelseed.parse_reaction(props['equation']) + props['equation'] = _REACTION_PARSER.parse(props['equation']) mark = FileMark(context, lineno + 2, 0) yield ReactionEntry(row['id'], props, mark) diff --git a/psamm/datasource/reaction.py b/psamm/datasource/reaction.py new file mode 100644 index 00000000..541b05ce --- /dev/null +++ b/psamm/datasource/reaction.py @@ -0,0 +1,255 @@ +# This file is part of PSAMM. +# +# PSAMM is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# PSAMM is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with PSAMM. If not, see . +# +# Copyright 2014-2015 Jon Lund Steffensen + +"""Reaction parser.""" + +import re +from decimal import Decimal + +from six import raise_from + +from psamm.reaction import Reaction, Compound +from psamm.expression import affine + + +class ParseError(Exception): + """Error raised when parsing reaction fails.""" + + def __init__(self, *args, **kwargs): + self._span = kwargs.pop('span', None) + super(ParseError, self).__init__(*args, **kwargs) + + @property + def indicator(self): + if self._span is None: + return None + pre = ' ' * self._span[0] + ind = '^' * max(1, self._span[1] - self._span[0]) + return pre + ind + + +class ReactionParser(object): + """Parser of reactions equations. + + This parser recognizes: + + * Global compartment specification as a prefix (when ``parse_global`` is + ``True``) + + * Configurable reaction arrow tokens (``arrows``) + + * Compounds quoted by pipe (``|``) (required only if the compound name + includes a space) + + * Compound counts that are affine expressions. + """ + + SPACE_TOKEN = object() + QUOTED_TOKEN = object() + GROUP_TOKEN = object() + PLUS_TOKEN = object() + ARROW_TOKEN = object() + OTHER_TOKEN = object() + END_TOKEN = object() + + def __init__(self, arrows=None, parse_global=False): + if arrows is None: + arrows = ( + ('<=>', Reaction.Bidir), + ('=>', Reaction.Right), + ('<=', Reaction.Left) + ) + + arrow_p = '|'.join(re.escape(arrow) for arrow, _ in arrows) + self._arrows = dict(arrows) + self._tokens = ( + (r'\s+', ReactionParser.SPACE_TOKEN), + (r'\|[^|]+\|', ReactionParser.QUOTED_TOKEN), + (r'\([^)]+\)(?=\Z|\s|\|)', ReactionParser.GROUP_TOKEN), + (r'\+(?=\s)', ReactionParser.PLUS_TOKEN), + (arrow_p + r'(?=\Z|\s)', ReactionParser.ARROW_TOKEN), + (r'\S+', ReactionParser.OTHER_TOKEN), + (r'\Z', ReactionParser.END_TOKEN), + (r'.', None) + ) + + self._scanner = re.compile( + '|'.join('(' + pattern + ')' for pattern, _ in self._tokens), + re.DOTALL | re.UNICODE) + + self._parse_global = parse_global + + def parse(self, s): + """Parse reaction string.""" + + global_comp = None + if self._parse_global: + # Split by colon for global compartment information + m = re.match(r'^\s*\[(\w+)\]\s*:\s*(.*)', s) + if m is not None: + global_comp = m.group(1) + s = m.group(2) + + expect_operator = False + direction = None + left = [] + right = [] + current_side = left + saved_token = None + + def tokenize(): + for match in re.finditer(self._scanner, s): + for i, group in enumerate(match.groups()): + if group is not None: + token = self._tokens[i][1] + span = match.start(), match.end() + if token is None: + raise ParseError( + 'Invalid token in expression string:' + ' {!r}'.format(group), span=span) + yield self._tokens[i][1], group, span + break + + for token, value, span in tokenize(): + # Handle partially parsed compound. + if saved_token is not None and ( + token in (ReactionParser.PLUS_TOKEN, + ReactionParser.ARROW_TOKEN, + ReactionParser.END_TOKEN)): + compound = parse_compound(saved_token, global_comp) + current_side.append((compound, 1)) + expect_operator = True + saved_token = None + + if token == ReactionParser.PLUS_TOKEN: + # Compound separator. Expect compound name or compound count + # next. + if not expect_operator: + raise ParseError('Unexpected token: {!r}'.format(value), + span=span) + expect_operator = False + elif token == ReactionParser.ARROW_TOKEN: + # Reaction arrow. Expect compound name or compound count next. + if direction is not None: + raise ParseError( + 'More than one equation arrow: {!r}'.format(value), + span=span) + + if not expect_operator and len(left) > 0: + raise ParseError('Unexpected token: {!r}'.format(value), + span=span) + + expect_operator = False + direction = self._arrows[value] + current_side = right + elif token == ReactionParser.GROUP_TOKEN: + # Compound count. Expect compound name next. + if expect_operator: + raise ParseError('Expected plus or arrow: {!r}'.format( + value), span=span) + if saved_token is not None: + raise ParseError('Expected compound name: {!r}'.format( + value), span=span) + saved_token = value + elif token == ReactionParser.QUOTED_TOKEN: + # Compound name. Expect operator next. + if expect_operator: + raise ParseError('Expected plus or arrow: {!r}'.format( + value), span=span) + if saved_token is not None: + try: + count = parse_compound_count(saved_token) + except ValueError as e: + raise_from(ParseError( + 'Unable to parse compound count: {!r}'.format( + saved_token), + span=span), e) + else: + count = 1 + compound = parse_compound(value, global_comp) + current_side.append((compound, count)) + expect_operator = True + saved_token = None + elif token == ReactionParser.OTHER_TOKEN: + # Could be count or compound name. Store and expect other, + # quoted, operator or end. + if expect_operator: + raise ParseError('Expected plus or arrow: {!r}'.format( + value), span=span) + if saved_token is not None: + try: + count = parse_compound_count(saved_token) + except ValueError as e: + raise_from(ParseError( + 'Unable to parse compound count: {!r}'.format( + saved_token), + span=span), e) + compound = parse_compound(value, global_comp) + current_side.append((compound, count)) + expect_operator = True + saved_token = None + else: + saved_token = value + + if direction is None: + raise ParseError('Missing equation arrow') + + return Reaction(direction, left, right) + + +_DEFAULT_PARSER = ReactionParser() + + +def parse_reaction(s): + """Parse reaction string using the default parser.""" + return _DEFAULT_PARSER.parse(s) + + +def parse_compound(s, global_compartment=None): + """Parse a compound specification. + + If no compartment is specified in the string, the global compartment + will be used. + """ + m = re.match(r'^\|(.*)\|$', s) + if m: + s = m.group(1) + + m = re.match(r'^(.+)\[(\S+)\]$', s) + if m: + compound_id = m.group(1) + compartment = m.group(2) + else: + compound_id = s + compartment = global_compartment + + return Compound(compound_id, compartment=compartment) + + +def parse_compound_count(s): + """Parse a compound count (number of compounds).""" + m = re.match(r'^\((.*)\)$', s) + if m: + s = m.group(1) + + for count_type in (int, Decimal, affine.Expression): + try: + return count_type(s) + except: + pass + + raise ValueError('Unable to parse compound count: {}'.format(s)) diff --git a/psamm/datasource/sbml.py b/psamm/datasource/sbml.py index 74905ef7..59fcaf1a 100644 --- a/psamm/datasource/sbml.py +++ b/psamm/datasource/sbml.py @@ -106,6 +106,13 @@ def __init__(self, reader, root): self._name = root.get('name') self._comp = root.get('compartment') + if self._comp is None: + msg = 'Species {} has no compartment!'.format(self.id) + if self._reader._strict: + raise ParseError(msg) + else: + logger.warning(msg) + self._boundary = root.get('boundaryCondition', 'false') == 'true' # In non-strict mode the species that ends with _b are considered diff --git a/psamm/fluxanalysis.py b/psamm/fluxanalysis.py index 7328e92a..8982cd7a 100644 --- a/psamm/fluxanalysis.py +++ b/psamm/fluxanalysis.py @@ -169,6 +169,22 @@ def maximize(self, reaction): self._prob.set_linear_objective(self.flux_expr(reaction)) self._solve() + def flux_bound(self, reaction, direction): + """Return the flux bound of the reaction. + + Direction must be a positive number to obtain the upper bound or a + negative number to obtain the lower bound. A value of inf or -inf is + returned if the problem is unbounded. + """ + try: + self.maximize({reaction: direction}) + except FluxBalanceError as e: + if not e.result.unbounded: + raise + return direction * _INF + else: + return self.get_flux(reaction) + def _add_minimization_vars(self): """Add variables and constraints for L1 norm minimization.""" if self._has_minimization_vars: @@ -316,14 +332,7 @@ def flux_variability(model, reactions, fixed, tfba, solver): def min_max_solve(reaction_id): for direction in (-1, 1): - try: - fba.maximize({reaction_id: direction}) - except FluxBalanceError as e: - if not e.result.unbounded: - raise - yield direction * _INF - else: - yield fba.get_flux(reaction_id) + yield fba.flux_bound(reaction_id, direction) # Solve for each reaction for reaction_id in reactions: diff --git a/psamm/tests/test_database.py b/psamm/tests/test_database.py index e209c3f4..d43b1911 100644 --- a/psamm/tests/test_database.py +++ b/psamm/tests/test_database.py @@ -20,7 +20,7 @@ from psamm.database import DictDatabase, ChainedDatabase from psamm.reaction import Compound, Reaction -from psamm.datasource.modelseed import parse_reaction +from psamm.datasource.reaction import parse_reaction class TestMetabolicDatabase(unittest.TestCase): diff --git a/psamm/tests/test_datasource_reaction.py b/psamm/tests/test_datasource_reaction.py new file mode 100644 index 00000000..d5403d0b --- /dev/null +++ b/psamm/tests/test_datasource_reaction.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python +# This file is part of PSAMM. +# +# PSAMM is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# PSAMM is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with PSAMM. If not, see . +# +# Copyright 2014-2015 Jon Lund Steffensen + +import unittest +from decimal import Decimal + +from psamm.reaction import Reaction, Compound +from psamm.datasource import reaction + + +class TestDefaultReactionParser(unittest.TestCase): + def test_reaction_parse(self): + r = reaction.parse_reaction( + '|H2O| + |PPi| => (2) |Phosphate| + (2) |H+|') + self.assertEqual(r, Reaction( + Reaction.Right, [(Compound('H2O'), 1), (Compound('PPi'), 1)], + [(Compound('Phosphate'), 2), (Compound('H+'), 2)])) + + def test_reaction_parse_with_decimal(self): + r = reaction.parse_reaction('|H2| + (0.5) |O2| => |H2O|') + self.assertEqual(r, Reaction( + Reaction.Right, + [(Compound('H2'), 1), (Compound('O2'), Decimal('0.5'))], + [(Compound('H2O'), 1)])) + + def test_reaction_parse_with_compartment(self): + r = reaction.parse_reaction('(2) |H2| + |O2| => (2) |H2O[e]|') + self.assertEqual(r, Reaction( + Reaction.Right, [(Compound('H2'), 2), (Compound('O2'), 1)], + [(Compound('H2O', compartment='e'), 2)])) + + def test_reaction_parse_with_multichar_compartment(self): + r = reaction.parse_reaction( + '(2) |H2[C_c]| + |O2[C_c]| => (2) |H2O[C_e]|') + self.assertEqual(r, Reaction( + Reaction.Right, + [(Compound('H2', compartment='C_c'), 2), + (Compound('O2', compartment='C_c'), 1)], + [(Compound('H2O', compartment='C_e'), 2)])) + + def test_reaction_parse_raw_compound_id(self): + r = reaction.parse_reaction('(2) cpd00001 => cpd00002') + self.assertEqual(r, Reaction( + Reaction.Right, [(Compound('cpd00001'), 2)], + [(Compound('cpd00002'), 1)])) + + def test_reaction_parse_raw_compound_id_with_typo(self): + r = reaction.parse_reaction('(2) cpd00001 => cdp00002') + self.assertEqual(r, Reaction( + Reaction.Right, [(Compound('cpd00001'), 2)], + [(Compound('cdp00002'), 1)])) + + def test_reaction_parse_raw_compound_id_in_compartment(self): + r = reaction.parse_reaction('(2) cpd00001 => cpd00002[e]') + self.assertEqual(r, Reaction( + Reaction.Right, [(Compound('cpd00001'), 2)], + [(Compound('cpd00002', 'e'), 1)])) + + def test_reaction_str(self): + r = Reaction(Reaction.Left, [(Compound('H2O'), 2)], + [(Compound('H2'), 2), (Compound('O2'), 1)]) + self.assertEqual(str(r), '(2) |H2O| <= (2) |H2| + |O2|') + + +if __name__ == '__main__': + unittest.main() diff --git a/psamm/tests/test_fastcore.py b/psamm/tests/test_fastcore.py index 528c073a..4e32918e 100755 --- a/psamm/tests/test_fastcore.py +++ b/psamm/tests/test_fastcore.py @@ -21,7 +21,7 @@ from psamm.metabolicmodel import MetabolicModel from psamm.database import DictDatabase from psamm import fastcore -from psamm.datasource.modelseed import parse_reaction +from psamm.datasource.reaction import parse_reaction from psamm.lpsolver import generic diff --git a/psamm/tests/test_fluxanalysis.py b/psamm/tests/test_fluxanalysis.py index dd83838c..69d978f1 100755 --- a/psamm/tests/test_fluxanalysis.py +++ b/psamm/tests/test_fluxanalysis.py @@ -21,7 +21,7 @@ from psamm.metabolicmodel import MetabolicModel from psamm.database import DictDatabase from psamm import fluxanalysis -from psamm.datasource.modelseed import parse_reaction +from psamm.datasource.reaction import parse_reaction from psamm.lpsolver import generic from six import itervalues diff --git a/psamm/tests/test_fluxcoupling.py b/psamm/tests/test_fluxcoupling.py index 7f27f498..3857e357 100644 --- a/psamm/tests/test_fluxcoupling.py +++ b/psamm/tests/test_fluxcoupling.py @@ -21,7 +21,7 @@ from psamm.metabolicmodel import MetabolicModel from psamm.database import DictDatabase from psamm import fluxcoupling -from psamm.datasource.modelseed import parse_reaction +from psamm.datasource.reaction import parse_reaction from psamm.lpsolver import generic diff --git a/psamm/tests/test_gapfill.py b/psamm/tests/test_gapfill.py index b9371f8f..c7bd5b2e 100644 --- a/psamm/tests/test_gapfill.py +++ b/psamm/tests/test_gapfill.py @@ -19,7 +19,7 @@ from psamm.database import DictDatabase from psamm.metabolicmodel import MetabolicModel -from psamm.datasource.modelseed import parse_reaction +from psamm.datasource.reaction import parse_reaction from psamm.lpsolver import generic from psamm.gapfill import gapfind, gapfill from psamm.reaction import Compound diff --git a/psamm/tests/test_massconsistency.py b/psamm/tests/test_massconsistency.py index 927e3625..29d394d6 100755 --- a/psamm/tests/test_massconsistency.py +++ b/psamm/tests/test_massconsistency.py @@ -21,7 +21,7 @@ from psamm.metabolicmodel import MetabolicModel from psamm.database import DictDatabase from psamm import massconsistency -from psamm.datasource.modelseed import parse_reaction +from psamm.datasource.reaction import parse_reaction from psamm.reaction import Compound from psamm.lpsolver import generic diff --git a/psamm/tests/test_metabolicmodel.py b/psamm/tests/test_metabolicmodel.py index 5c7f1ea1..f075ca21 100755 --- a/psamm/tests/test_metabolicmodel.py +++ b/psamm/tests/test_metabolicmodel.py @@ -21,7 +21,7 @@ from psamm.metabolicmodel import MetabolicModel, FlipableModelView from psamm.database import DictDatabase from psamm.reaction import Compound -from psamm.datasource.modelseed import parse_reaction +from psamm.datasource.reaction import parse_reaction class TestLoadMetabolicModel(unittest.TestCase): diff --git a/psamm/tests/test_modelseed.py b/psamm/tests/test_modelseed.py deleted file mode 100644 index 5af8320d..00000000 --- a/psamm/tests/test_modelseed.py +++ /dev/null @@ -1,72 +0,0 @@ -#!/usr/bin/env python -# This file is part of PSAMM. -# -# PSAMM is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# PSAMM is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with PSAMM. If not, see . -# -# Copyright 2014-2015 Jon Lund Steffensen - -import unittest -from decimal import Decimal - -from psamm.reaction import Reaction, Compound -from psamm.datasource import modelseed - - -class TestModelSEED(unittest.TestCase): - def test_modelseed_parse(self): - r = modelseed.parse_reaction('|H2O| + |PPi| => (2) |Phosphate| + (2) |H+|') - self.assertEqual(r, Reaction(Reaction.Right, [(Compound('H2O'), 1), (Compound('PPi'), 1)], - [(Compound('Phosphate'), 2), (Compound('H+'), 2)])) - - def test_modelseed_parse_with_decimal(self): - r = modelseed.parse_reaction('|H2| + (0.5) |O2| => |H2O|') - self.assertEqual(r, Reaction(Reaction.Right, [(Compound('H2'), 1), (Compound('O2'), Decimal('0.5'))], - [(Compound('H2O'), 1)])) - - def test_modelseed_parse_with_compartment(self): - r = modelseed.parse_reaction('(2) |H2| + |O2| => (2) |H2O[e]|') - self.assertEqual(r, Reaction(Reaction.Right, [(Compound('H2'), 2), (Compound('O2'), 1)], - [(Compound('H2O', compartment='e'), 2)])) - - def test_modelseed_parse_with_multichar_compartment(self): - r = modelseed.parse_reaction('(2) |H2[C_c]| + |O2[C_c]| => (2) |H2O[C_e]|') - self.assertEqual(r, Reaction(Reaction.Right, [(Compound('H2', compartment='C_c'), 2), - (Compound('O2', compartment='C_c'), 1)], - [(Compound('H2O', compartment='C_e'), 2)])) - - def test_modelseed_parse_raw_compound_id(self): - r = modelseed.parse_reaction('(2) cpd00001 => cpd00002') - self.assertEqual(r, Reaction(Reaction.Right, - [(Compound('cpd00001'), 2)], - [(Compound('cpd00002'), 1)])) - - def test_modelseed_parse_raw_compound_id_with_typo(self): - r = modelseed.parse_reaction('(2) cpd00001 => cdp00002') - self.assertEqual(r, Reaction(Reaction.Right, - [(Compound('cpd00001'), 2)], - [(Compound('cdp00002'), 1)])) - - def test_modelseed_parse_raw_compound_id_in_compartment(self): - r = modelseed.parse_reaction('(2) cpd00001 => cpd00002[e]') - self.assertEqual(r, Reaction(Reaction.Right, - [(Compound('cpd00001'), 2)], - [(Compound('cpd00002', 'e'), 1)])) - - def test_modelseed_str(self): - r = Reaction(Reaction.Left, [(Compound('H2O'), 2)], [(Compound('H2'), 2), (Compound('O2'), 1)]) - self.assertEqual(str(r), '(2) |H2O| <= (2) |H2| + |O2|') - - -if __name__ == '__main__': - unittest.main() diff --git a/setup.py b/setup.py index dbefcf89..b7c9a8be 100755 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ setup( name='psamm', - version='0.17', + version='0.18', description='PSAMM metabolic modeling tools', maintainer='Jon Lund Steffensen', maintainer_email='jon_steffensen@uri.edu',