diff --git a/NEWS.md b/NEWS.md index 600955ee..5b067d85 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,18 @@ +v0.22 (2016-07-01) +------------------ + +- Better unicode handling in commands. +- When running the `gapfill` command the epsilon parameter can now be + specified on the command line. +- When parsing reaction and compound entities from the YAML files, produce + better error messages when IDs are invalid. +- Work around a bug in Cplex that in rare causes a segmentation fault when a + linear programming problem is solved repeatedly. +- API: Add `fastgapfill` module which allows access to run the fastGapFill + algorithm. +- API: Add `randomsparse` module which allows access to generate a random + minimal model which satisfies the flux threshold of the objective reaction. + v0.21 (2016-06-09) ------------------ diff --git a/docs/api/fastgapfill.rst b/docs/api/fastgapfill.rst new file mode 100644 index 00000000..8ebdbe8c --- /dev/null +++ b/docs/api/fastgapfill.rst @@ -0,0 +1,6 @@ + +``psamm.fastgapfill`` -- FastGapFill algorithm +============================================== + +.. automodule:: psamm.fastgapfill + :members: diff --git a/docs/api/randomsparse.rst b/docs/api/randomsparse.rst new file mode 100644 index 00000000..0fd53d43 --- /dev/null +++ b/docs/api/randomsparse.rst @@ -0,0 +1,6 @@ + +``psamm.randomsparse`` -- Find a random minimal network of model reactions +=========================================================================== + +.. automodule:: psamm.randomsparse + :members: diff --git a/psamm/balancecheck.py b/psamm/balancecheck.py index da1cf7eb..9b74e530 100644 --- a/psamm/balancecheck.py +++ b/psamm/balancecheck.py @@ -16,6 +16,8 @@ # Copyright 2016 Jon Lund Steffensen # Copyright 2016 Chao Liu +from __future__ import unicode_literals + import operator import logging diff --git a/psamm/command.py b/psamm/command.py index f764d1e4..51413fd6 100644 --- a/psamm/command.py +++ b/psamm/command.py @@ -25,9 +25,10 @@ The :func:`.main` function is the entry point of command line interface. """ -from __future__ import division +from __future__ import division, unicode_literals import os +import sys import argparse import logging import abc @@ -35,7 +36,7 @@ import multiprocessing as mp import pkg_resources -from six import add_metaclass, iteritems, itervalues +from six import add_metaclass, iteritems, itervalues, text_type from . import __version__ as package_version from .datasource.native import NativeModel @@ -76,7 +77,7 @@ def __init__(self, model, args): name = self._model.get_name() if name is None: - name = str(self._model.context) + name = text_type(self._model.context) logger.info('Model: {}'.format(name)) version = util.git_try_describe(self._model.context.basepath) @@ -91,6 +92,17 @@ def init_parser(cls, parser): def run(self): """Execute command""" + def argument_error(self, msg): + """Raise error indicating error parsing an argument.""" + raise CommandError(msg) + + def fail(self, msg, exc=None): + """Exit command as a result of a failure.""" + logger.error(msg) + if exc is not None: + logger.debug('Command failure caused by exception!', exc_info=exc) + sys.exit(1) + class MetabolicMixin(object): """Mixin for commands that use a metabolic model representation.""" @@ -396,4 +408,4 @@ def main(command_class=None, args=None): try: command.run() except CommandError as e: - parser.error(str(e)) + parser.error(text_type(e)) diff --git a/psamm/commands/excelexport.py b/psamm/commands/excelexport.py index cc14ebd4..821dc3fc 100644 --- a/psamm/commands/excelexport.py +++ b/psamm/commands/excelexport.py @@ -19,7 +19,9 @@ import logging -from ..command import Command, CommandError +from six import text_type + +from ..command import Command try: import xlsxwriter @@ -40,8 +42,7 @@ def init_parser(cls, parser): def run(self): model = self._model if xlsxwriter is None: - raise CommandError( - 'Excel export requires the XlsxWriter python module') + self.fail('Excel export requires the XlsxWriter python module') workbook = xlsxwriter.Workbook(self._args.file) reaction_sheet = workbook.add_worksheet(name='Reactions') @@ -53,10 +54,11 @@ def run(self): key=lambda x: (x != 'id', x != 'equation', x)) for z, i in enumerate(property_list_sorted): - reaction_sheet.write_string(0, z, str(i)) + reaction_sheet.write_string(0, z, text_type(i)) for x, i in enumerate(model.parse_reactions()): for y, j in enumerate(property_list_sorted): - reaction_sheet.write_string(x+1, y, str(i.properties.get(j))) + reaction_sheet.write_string( + x+1, y, text_type(i.properties.get(j))) compound_sheet = workbook.add_worksheet(name='Compounds') @@ -69,10 +71,11 @@ def run(self): x != 'name', x)) for z, i in enumerate(compound_list_sorted): - compound_sheet.write_string(0, z, str(i)) + compound_sheet.write_string(0, z, text_type(i)) for x, i in enumerate(model.parse_compounds()): for y, j in enumerate(compound_list_sorted): - compound_sheet.write_string(x+1, y, str(i.properties.get(j))) + compound_sheet.write_string( + x+1, y, text_type(i.properties.get(j))) media_sheet = workbook.add_worksheet(name='Medium') @@ -82,8 +85,6 @@ def run(self): media_sheet.write_string(0, 3, 'Upper Limit') default_flux = model.get_default_flux_limit() - if default_flux is None: - default_flux = 1000 for x, (compound, reaction, lower, upper) in enumerate( model.parse_medium()): @@ -93,16 +94,16 @@ def run(self): if upper is None: upper = default_flux - media_sheet.write(x+1, 0, str(compound)) - media_sheet.write(x+1, 1, str(reaction)) - media_sheet.write(x+1, 2, str(lower)) - media_sheet.write(x+1, 3, str(upper)) + media_sheet.write(x+1, 0, text_type(compound)) + media_sheet.write(x+1, 1, text_type(reaction)) + media_sheet.write(x+1, 2, text_type(lower)) + media_sheet.write(x+1, 3, text_type(upper)) limits_sheet = workbook.add_worksheet(name='Limits') limits_sheet.write_string(0, 0, 'Reaction ID') limits_sheet.write_string(0, 1, 'Lower Limit') - limits_sheet.write_string(0, 2, str('Upper Limit')) + limits_sheet.write_string(0, 2, 'Upper Limit') for x, i in enumerate(model.parse_limits()): limits_sheet.write(x, 0, (i[0])) diff --git a/psamm/commands/fastgapfill.py b/psamm/commands/fastgapfill.py index 3e17ae57..470cdc4e 100644 --- a/psamm/commands/fastgapfill.py +++ b/psamm/commands/fastgapfill.py @@ -20,8 +20,9 @@ import argparse import logging -from ..command import Command, MetabolicMixin, SolverCommandMixin, CommandError +from ..command import Command, MetabolicMixin, SolverCommandMixin from .. import fastcore, fluxanalysis +from ..fastgapfill import create_extended_model, fastgapfill logger = logging.getLogger(__name__) @@ -69,18 +70,6 @@ def run(self): compound_name[compound.id] = ( compound.name if compound.name is not None else compound.id) - epsilon = self._args.epsilon - model_compartments = set(self._mm.compartments) - extra_comp = self._model.get_extracellular_compartment() - - # Add exchange and transport reactions to database - model_complete = self._mm.copy() - logger.info('Adding database, exchange and transport reactions') - db_added = model_complete.add_all_database_reactions( - model_compartments) - ex_added = model_complete.add_all_exchange_reactions(extra_comp) - tp_added = model_complete.add_all_transport_reactions(extra_comp) - # TODO: The exchange and transport reactions have tuple names. This # means that in Python 3 the reactions can no longer be directly # compared (e.g. while sorting) so define this helper function as a @@ -88,15 +77,8 @@ def run(self): def reaction_key(r): return r if isinstance(r, tuple) else (r,) - # Add penalty weights on reactions - weights = {} - if self._args.db_weight is not None: - weights.update((rxnid, self._args.db_weight) for rxnid in db_added) - if self._args.tp_weight is not None: - weights.update((rxnid, self._args.tp_weight) for rxnid in tp_added) - if self._args.ex_weight is not None: - weights.update((rxnid, self._args.ex_weight) for rxnid in ex_added) - + # Calculate penalty if penalty file exists + penalties = {} if self._args.penalty is not None: for line in self._args.penalty: line, _, comment = line.partition('#') @@ -104,36 +86,37 @@ def reaction_key(r): if line == '': continue rxnid, weight = line.split(None, 1) - weights[rxnid] = float(weight) + penalties[rxnid] = float(weight) - # Run Fastcore and print the induced reaction set - logger.info('Calculating Fastcore induced set on model') - core = set(self._mm.reactions) + model_extended, weights = create_extended_model( + self._model, + db_weight=self._args.db_weight, + ex_weight=self._args.ex_weight, + tp_weight=self._args.tp_weight, + penalties=penalties) - induced = fastcore.fastcore(model_complete, core, epsilon, - weights=weights, solver=solver) - logger.info('Result: |A| = {}, A = {}'.format(len(induced), induced)) - added_reactions = induced - core - logger.info('Extended: |E| = {}, E = {}'.format( - len(added_reactions), added_reactions)) + epsilon = self._args.epsilon + core = set(self._mm.reactions) + induced = fastgapfill(model_extended, core, weights=weights, + epsilon=epsilon, solver=solver) if self._args.reaction is not None: maximized_reaction = self._args.reaction else: maximized_reaction = self._model.get_biomass_reaction() if maximized_reaction is None: - raise CommandError('The maximized reaction was not specified') + self.argument_error( + 'The maximized reaction was not specified') if not self._mm.has_reaction(maximized_reaction): - raise CommandError( - 'The biomass reaction is not a valid model' - ' reaction: {}'.format(maximized_reaction)) + self.fail('The biomass reaction is not a valid model' + ' reaction: {}'.format(maximized_reaction)) logger.info('Flux balance on induced model maximizing {}'.format( maximized_reaction)) - model_induced = self._mm.copy() - for rxnid in induced: - model_induced.add_reaction(rxnid) + model_induced = model_extended.copy() + for rxnid in set(model_extended.reactions) - induced: + model_induced.remove_reaction(rxnid) for rxnid, flux in sorted(fluxanalysis.flux_balance( model_induced, maximized_reaction, tfba=enable_tfba, solver=solver), key=lambda x: (reaction_key(x[0]), x[1])): @@ -142,7 +125,7 @@ def reaction_key(r): if self._mm.has_reaction(rxnid): reaction_class = 'Model' weight = 0 - rx = model_complete.get_reaction(rxnid) + rx = model_induced.get_reaction(rxnid) rxt = rx.translated_compounds(lambda x: compound_name.get(x, x)) print('{}\t{}\t{}\t{}\t{}'.format( rxnid, reaction_class, weight, flux, rxt)) diff --git a/psamm/commands/fba.py b/psamm/commands/fba.py index bac8bfce..4c07ece7 100644 --- a/psamm/commands/fba.py +++ b/psamm/commands/fba.py @@ -20,7 +20,7 @@ import time import logging -from ..command import SolverCommandMixin, MetabolicMixin, Command, CommandError +from ..command import SolverCommandMixin, MetabolicMixin, Command from .. import fluxanalysis logger = logging.getLogger(__name__) @@ -65,11 +65,11 @@ def run(self): else: reaction = self._model.get_biomass_reaction() if reaction is None: - raise CommandError('The biomass reaction was not specified') + self.argument_error('The biomass reaction was not specified') if not self._mm.has_reaction(reaction): - raise CommandError('Specified reaction is not in model: {}'.format( - reaction)) + self.fail( + 'Specified reaction is not in model: {}'.format(reaction)) logger.info('Using {} as objective'.format(reaction)) @@ -84,8 +84,8 @@ def run(self): logger.info('Loop removal using thermodynamic constraints') result = self.run_tfba(reaction) else: - raise CommandError('Invalid loop constraint mode: {}'.format( - loop_removal)) + self.argument_error( + 'Invalid loop constraint mode: {}'.format(loop_removal)) optimum = None total_reactions = 0 diff --git a/psamm/commands/fluxcheck.py b/psamm/commands/fluxcheck.py index 31f7812b..85beb3b9 100644 --- a/psamm/commands/fluxcheck.py +++ b/psamm/commands/fluxcheck.py @@ -22,7 +22,7 @@ from itertools import product from ..command import (Command, MetabolicMixin, SolverCommandMixin, - ParallelTaskMixin, CommandError) + ParallelTaskMixin) from .. import fluxanalysis, fastcore logger = logging.getLogger(__name__) @@ -82,10 +82,9 @@ def run(self): enable_fastcore = self._args.fastcore if enable_tfba and enable_fastcore: - raise CommandError( + self.argument_error( 'Using Fastcore with thermodynamic constraints' ' is not supported!') - start_time = time.time() if enable_fastcore: diff --git a/psamm/commands/fluxcoupling.py b/psamm/commands/fluxcoupling.py index bf0ed748..577e5115 100644 --- a/psamm/commands/fluxcoupling.py +++ b/psamm/commands/fluxcoupling.py @@ -20,7 +20,7 @@ import logging from ..command import (Command, SolverCommandMixin, MetabolicMixin, - ParallelTaskMixin, CommandError) + ParallelTaskMixin) from .. import fluxanalysis, fluxcoupling logger = logging.getLogger(__name__) @@ -43,7 +43,7 @@ def run(self): max_reaction = self._model.get_biomass_reaction() if max_reaction is None: - raise CommandError('The biomass reaction was not specified') + self.fail('The biomass reaction was not specified') fba_fluxes = dict(fluxanalysis.flux_balance( self._mm, max_reaction, tfba=False, solver=solver)) diff --git a/psamm/commands/fva.py b/psamm/commands/fva.py index e2d6007e..6a67df3e 100644 --- a/psamm/commands/fva.py +++ b/psamm/commands/fva.py @@ -22,7 +22,7 @@ from itertools import product from ..command import (Command, SolverCommandMixin, MetabolicMixin, - ParallelTaskMixin, CommandError) + ParallelTaskMixin) from ..util import MaybeRelative from .. import fluxanalysis @@ -60,11 +60,11 @@ def run(self): else: reaction = self._model.get_biomass_reaction() if reaction is None: - raise CommandError('The biomass reaction was not specified') + self.argument_error('The biomass reaction was not specified') if not self._mm.has_reaction(reaction): - raise CommandError('Specified reaction is not in model: {}'.format( - reaction)) + self.fail( + 'Specified reaction is not in model: {}'.format(reaction)) enable_tfba = self._args.tfba if enable_tfba: diff --git a/psamm/commands/gapfill.py b/psamm/commands/gapfill.py index 0817ef08..9655f149 100644 --- a/psamm/commands/gapfill.py +++ b/psamm/commands/gapfill.py @@ -13,7 +13,7 @@ # You should have received a copy of the GNU General Public License # along with PSAMM. If not, see . # -# Copyright 2014-2015 Jon Lund Steffensen +# Copyright 2014-2016 Jon Lund Steffensen from __future__ import unicode_literals @@ -21,10 +21,8 @@ from ..command import (Command, MetabolicMixin, SolverCommandMixin, FilePrefixAppendAction) - -from ..gapfill import gapfind, gapfill - -from psamm.datasource import reaction +from ..gapfill import gapfind, gapfill, GapFillError +from ..datasource import reaction logger = logging.getLogger(__name__) @@ -36,6 +34,9 @@ def init_parser(cls, parser): parser.add_argument( '--compound', metavar='compound', action=FilePrefixAppendAction, type=str, default=[], help='Select Compounds to GapFill') + parser.add_argument( + '--epsilon', type=float, default=1e-5, + help='Threshold for reaction flux') super(GapFillCommand, cls).init_parser(parser) def run(self): @@ -54,13 +55,19 @@ def run(self): solver = self._get_solver(integer=True) extracellular_comp = self._model.get_extracellular_compartment() + epsilon = self._args.epsilon if len(self._args.compound) == 0: # Run GapFind on model - logger.info('Searching for blocked compounds') - result = gapfind(self._mm, solver=solver) - blocked = set(compound for compound in result - if compound.compartment != extracellular_comp) + logger.info('Searching for blocked compounds...') + result = gapfind(self._mm, solver=solver, epsilon=epsilon) + + try: + blocked = set(compound for compound in result + if compound.compartment != extracellular_comp) + except GapFillError as e: + self._log_epsilon_and_fail(epsilon, e) + if len(blocked) > 0: logger.info('Blocked compounds') for compound in blocked: @@ -83,8 +90,12 @@ def run(self): model_complete.add_all_transport_reactions(extracellular_comp) logger.info('Searching for reactions to fill gaps') - added_reactions, reversed_reactions = gapfill( - model_complete, core, blocked, solver=solver) + try: + added_reactions, reversed_reactions = gapfill( + model_complete, core, blocked, solver=solver, + epsilon=epsilon) + except GapFillError as e: + self._log_epsilon_and_fail(epsilon, e) for rxnid in added_reactions: rx = model_complete.get_reaction(rxnid) @@ -99,3 +110,9 @@ def run(self): print('{}\t{}\t{}'.format(rxnid, 'Reverse', rxt)) else: logger.info('No blocked compounds found') + + def _log_epsilon_and_fail(self, epsilon, exc): + msg = ('Finding blocked compounds failed with epsilon set to {}. Try' + ' lowering the epsilon value to reduce artifical constraints on' + ' the model.'.format(epsilon)) + self.fail(msg, exc) diff --git a/psamm/commands/genedelete.py b/psamm/commands/genedelete.py index a9da1136..240a2c7e 100644 --- a/psamm/commands/genedelete.py +++ b/psamm/commands/genedelete.py @@ -22,7 +22,7 @@ import logging from ..command import (Command, MetabolicMixin, SolverCommandMixin, - CommandError, FilePrefixAppendAction) + FilePrefixAppendAction) from .. import fluxanalysis from ..expression import boolean @@ -48,7 +48,7 @@ def run(self): else: obj_reaction = self._model.get_biomass_reaction() if obj_reaction is None: - raise CommandError('The biomass reaction was not specified') + self.argument_error('The biomass reaction was not specified') genes = set() gene_assoc = {} diff --git a/psamm/commands/masscheck.py b/psamm/commands/masscheck.py index cb93af40..716c98b7 100644 --- a/psamm/commands/masscheck.py +++ b/psamm/commands/masscheck.py @@ -22,7 +22,7 @@ from six import iteritems -from ..command import (Command, CommandError, SolverCommandMixin, +from ..command import (Command, SolverCommandMixin, MetabolicMixin, FilePrefixAppendAction) from .. import massconsistency from ..reaction import Compound @@ -87,8 +87,8 @@ def run(self): elif self._args.type == 'reaction': self._check_reactions(known_inconsistent, zeromass, solver) else: - raise CommandError('Invalid type of check: {}'.format( - self._args.type)) + self.argument_error( + 'Invalid type of check: {}'.format(self._args.type)) def _check_compounds(self, known_inconsistent, zeromass, solver): logger.info('Checking stoichiometric consistency of compounds...') diff --git a/psamm/commands/randomsparse.py b/psamm/commands/randomsparse.py index 066de4bc..7d82159e 100644 --- a/psamm/commands/randomsparse.py +++ b/psamm/commands/randomsparse.py @@ -15,18 +15,14 @@ # # Copyright 2014-2015 Jon Lund Steffensen # Copyright 2015 Keith Dufault-Thompson - +# Copyright 2016 Chao Liu from __future__ import unicode_literals -import time -import random import logging -from ..command import Command, MetabolicMixin, SolverCommandMixin, CommandError +from ..command import Command, MetabolicMixin, SolverCommandMixin from .. import fluxanalysis, util -from ..expression import boolean - -from six import string_types +from .. import randomsparse logger = logging.getLogger(__name__) @@ -65,11 +61,11 @@ def run(self): else: reaction = self._model.get_biomass_reaction() if reaction is None: - raise CommandError('The biomass reaction was not specified') + self.argument_error('The biomass reaction was not specified') if not self._mm.has_reaction(reaction): - raise CommandError('Specified reaction is not in model: {}'.format( - reaction)) + self.fail( + 'Specified reaction is not in model: {}'.format(reaction)) if not self._args.tfba: solver = self._get_solver() @@ -91,173 +87,23 @@ def run(self): logger.info('Flux threshold for {} is {}'.format( reaction, flux_threshold)) - if self._args.type in ('reactions', 'exchange'): - self._delete_reactions(p, reaction, flux_threshold) - elif self._args.type == 'genes': - self._delete_genes(p, reaction, flux_threshold) - - def _delete_reactions(self, prob, obj_reaction, flux_threshold): - essential = {obj_reaction} if self._args.type == 'exchange': - reactions = set() - for reaction_id in self._mm.reactions: - if self._mm.is_exchange(reaction_id): - reactions.add(reaction_id) + strategy = randomsparse.ReactionDeletionStrategy( + self._mm, randomsparse.get_exchange_reactions(self._mm)) + entity = 'reactions' + elif self._args.type == 'reactions': + strategy = randomsparse.ReactionDeletionStrategy( + self._mm) + entity = 'reactions' else: - reactions = set(self._mm.reactions) - test_set = reactions - essential - deleted = set() - - start_time = time.time() - - while len(test_set) > 0: - testing_reaction = random.sample(test_set, 1)[0] - test_set.remove(testing_reaction) - - flux_var = prob.get_flux_var(testing_reaction) - c, = prob.prob.add_linear_constraints(flux_var == 0) - - logger.info('Trying FBA without reaction {}...'.format( - testing_reaction)) - - try: - prob.maximize(obj_reaction) - except fluxanalysis.FluxBalanceError: - logger.info( - 'FBA is infeasible, marking {} as essential'.format( - testing_reaction)) - c.delete() - essential.add(testing_reaction) - continue - - logger.debug('Reaction {} has flux {}'.format( - obj_reaction, prob.get_flux(obj_reaction))) - - if prob.get_flux(obj_reaction) < flux_threshold: - c.delete() - essential.add(testing_reaction) - logger.info('Reaction {} was essential'.format( - testing_reaction)) - else: - deleted.add(testing_reaction) - logger.info('Reaction {} was deleted'.format(testing_reaction)) - - logger.info('Solving took {:.2f} seconds'.format( - time.time() - start_time)) - - for reaction_id in sorted(reactions): - value = 0 if reaction_id in deleted else 1 - print('{}\t{}'.format(reaction_id, value)) - - logger.info('Essential reactions: {}/{}'.format( - len(essential), len(reactions))) - - def _delete_genes(self, prob, obj_reaction, flux_threshold): - genes = set() - gene_assoc = {} - for reaction in self._model.parse_reactions(): - assoc = None - if reaction.genes is None: - continue - elif isinstance(reaction.genes, string_types): - assoc = boolean.Expression(reaction.genes) - else: - variables = [boolean.Variable(g) for g in reaction.genes] - assoc = boolean.Expression(boolean.And(*variables)) - genes.update(v.symbol for v in assoc.variables) - gene_assoc[reaction.id] = assoc - - essential = set() - deleted = set() - test_set = set(genes) - reactions = set(self._mm.reactions) - - start_time = time.time() - - while len(test_set) > 0: - testing_gene = random.sample(test_set, 1)[0] - test_set.remove(testing_gene) - new_gene_assoc = {} - deleted_reactions = set() - - logger.info('Trying model without gene {}...'.format( - testing_gene)) - - for reaction in reactions: - if reaction not in gene_assoc: - continue - assoc = gene_assoc[reaction] - if boolean.Variable(testing_gene) in assoc.variables: - new_assoc = assoc.substitute( - lambda v: v if v.symbol != testing_gene else False) - if new_assoc.has_value() and not new_assoc.value: - deleted_reactions.add(reaction) - else: - new_gene_assoc[reaction] = new_assoc - else: - new_gene_assoc[reaction] = assoc - - if obj_reaction in deleted_reactions: - logger.info( - 'Marking gene {} as essential because the objective' - ' reaction depends on this gene...'.format(testing_gene)) - essential.add(testing_gene) - continue - - if len(deleted_reactions) == 0: - logger.info( - 'No reactions were removed when gene {}' - ' was deleted'.format(testing_gene)) - deleted.add(testing_gene) - gene_assoc = new_gene_assoc - continue - - logger.info( - 'Reactions removed when gene {} is deleted: {}'.format( - testing_gene, len(deleted_reactions))) - logger.info('Deleted reactions: {}'.format( - ', '.join(deleted_reactions))) - - constraints = [] - for reaction in deleted_reactions: - flux_var = prob.get_flux_var(reaction) - c, = prob.prob.add_linear_constraints(flux_var == 0) - constraints.append(c) - - try: - prob.maximize(obj_reaction) - except fluxanalysis.FluxBalanceError: - logger.info( - 'FBA is infeasible, marking {} as essential'.format( - testing_gene)) - for c in constraints: - c.delete() - essential.add(testing_gene) - continue - - logger.debug('Reaction {} has flux {}'.format( - obj_reaction, prob.get_flux(obj_reaction))) - - if prob.get_flux(obj_reaction) < flux_threshold: - for c in constraints: - c.delete() - essential.add(testing_gene) - logger.info('Gene {} was essential'.format(testing_gene)) - else: - deleted.add(testing_gene) - reactions.difference_update(deleted_reactions) - gene_assoc = new_gene_assoc - logger.info('Gene {} was deleted'.format(testing_gene)) - - logger.info('Solving took {:.2f} seconds'.format( - time.time() - start_time)) - - for gene_id in sorted(genes): - value = 0 if gene_id in deleted else 1 - print('{}\t{}'.format(gene_id, value)) + strategy = randomsparse.GeneDeletionStrategy( + self._mm, randomsparse.get_gene_associations(self._model)) + entity = 'genes' - logger.info('Essential genes: {}/{}'.format( - len(essential), len(genes))) + essential, deleted = randomsparse.random_sparse( + strategy, p, reaction, flux_threshold) - logger.info('Reactions in minimal network: {}'.format( - ', '.join(sorted(reactions)))) + logger.info('Essential {}: {}/{}'.format( + entity, len(essential), len(strategy.entities))) + logger.info('Deleted {}: {}/{}'.format( + entity, len(deleted), len(strategy.entities))) diff --git a/psamm/commands/robustness.py b/psamm/commands/robustness.py index 6c36bc29..9ac6f153 100644 --- a/psamm/commands/robustness.py +++ b/psamm/commands/robustness.py @@ -21,7 +21,7 @@ import logging from ..command import (Command, MetabolicMixin, SolverCommandMixin, - ParallelTaskMixin, CommandError) + ParallelTaskMixin) from .. import fluxanalysis from six.moves import range @@ -78,20 +78,20 @@ def run(self): else: reaction = self._model.get_biomass_reaction() if reaction is None: - raise CommandError('The biomass reaction was not specified') + self.argument_error('The biomass reaction was not specified') if not self._mm.has_reaction(reaction): - raise CommandError('Specified reaction is not in model: {}'.format( + self.fail('Specified biomass reaction is not in model: {}'.format( reaction)) varying_reaction = self._args.varying if not self._mm.has_reaction(varying_reaction): - raise CommandError('Specified reaction is not in model: {}'.format( + self.fail('Specified varying reaction is not in model: {}'.format( varying_reaction)) steps = self._args.steps if steps <= 0: - raise CommandError('Invalid number of steps: {}\n'.format(steps)) + self.argument_error('Invalid number of steps: {}\n'.format(steps)) loop_removal = self._args.loop_removal if loop_removal == 'tfba': @@ -106,8 +106,8 @@ def run(self): elif loop_removal == 'tfba': logger.info('Loop removal using thermodynamic constraints') else: - raise CommandError('Invalid loop constraint mode: {}'.format( - loop_removal)) + self.argument_error( + 'Invalid loop constraint mode: {}'.format(loop_removal)) p = fluxanalysis.FluxBalanceProblem(self._mm, solver) if loop_removal == 'tfba': @@ -127,7 +127,7 @@ def run(self): flux_min = self._args.minimum if flux_min > flux_max: - raise CommandError('Invalid flux range: {}, {}\n'.format( + self.argument_error('Invalid flux range: {}, {}\n'.format( flux_min, flux_max)) logger.info('Varying {} in {} steps between {} and {}'.format( diff --git a/psamm/commands/tableexport.py b/psamm/commands/tableexport.py index b01c0ec1..467f0dc7 100644 --- a/psamm/commands/tableexport.py +++ b/psamm/commands/tableexport.py @@ -19,6 +19,8 @@ import logging +from six import text_type + from ..command import Command logger = logging.getLogger(__name__) @@ -66,9 +68,9 @@ def _reaction_export(self): property_list_sorted = sorted( property_set, key=lambda x: (x != 'id', x != 'equation', x)) - print('\t'.join([str(x) for x in property_list_sorted])) + print('\t'.join([text_type(x) for x in property_list_sorted])) for i in self._model.parse_reactions(): - print('\t'.join(str(i.properties.get(property)) + print('\t'.join(text_type(i.properties.get(property)) for property in property_list_sorted)) def _compound_export(self): @@ -79,17 +81,15 @@ def _compound_export(self): compound_list_sorted = sorted( compound_set, key=lambda x: (x != 'id', x != 'name', x)) - print('\t'.join([str(x) for x in compound_list_sorted])) + print('\t'.join([text_type(x) for x in compound_list_sorted])) for compound in self._model.parse_compounds(): - print('\t'.join(str(compound.properties.get(property)) + print('\t'.join(text_type(compound.properties.get(property)) for property in compound_list_sorted)) def _media_export(self): print('{}\t{}\t{}\t{}'.format('Compound ID', 'Reaction ID', 'Lower Limit', 'Upper Limit')) default_flux = self._model.get_default_flux_limit() - if default_flux is None: - default_flux = 1000 for compound, reaction, lower, upper in self._model.parse_medium(): if lower is None: diff --git a/psamm/datasource/native.py b/psamm/datasource/native.py index a891bd32..3fd86f64 100644 --- a/psamm/datasource/native.py +++ b/psamm/datasource/native.py @@ -171,7 +171,7 @@ class NativeModel(object): The model is created from a model file or from a directory containing a model file using the default file name (model.yaml or model.yml). This file can specify the model fully or refer to other files within the same - directory subtree that specifes part of the model. + directory subtree that specifies part of the model. """ def __init__(self, path): @@ -211,7 +211,7 @@ def get_extracellular_compartment(self): def get_default_flux_limit(self): """Return the default flux limit specified by the model""" - return self._model.get('default_flux_limit', None) + return self._model.get('default_flux_limit', 1000) def parse_reactions(self): """Yield tuples of reaction ID and reactions defined in the model""" @@ -329,14 +329,40 @@ def context(self): return self._context +def _check_id(entity, entity_type): + """Check whether the ID is valid. + + First check if the ID is missing, and then check if it is a qualified + string type, finally check if the string is empty. For all checks, it + would raise a ParseError with the corresponding message. + + Args: + entity: a string type object to be checked. + entity_type: a string that shows the type of entities to check, usually + `Compound` or 'Reaction'. + """ + + if entity is None: + raise ParseError('{} ID missing'.format(entity_type)) + elif not isinstance(entity, string_types): + msg = '{} ID must be a string, id was {}.'.format(entity_type, entity) + if isinstance(entity, bool): + msg += (' You may have accidentally used an ID value that YAML' + ' interprets as a boolean, such as "yes", "no", "on",' + ' "off", "true" or "false". To use this ID, you have to' + ' quote it with single or double quotes') + raise ParseError(msg) + elif len(entity) == 0: + raise ParseError('{} ID must not be empty'.format(entity_type)) + + def parse_compound(compound_def, context=None): """Parse a structured compound definition as obtained from a YAML file Returns a CompoundEntry.""" compound_id = compound_def.get('id') - if compound_id is None: - raise ParseError('Compound ID missing') + _check_id(compound_id, 'Compound') mark = FileMark(context, None, None) return CompoundEntry(compound_id, compound_def, mark) @@ -430,8 +456,7 @@ def parse_compound_list(l): """Parse a list of reactants or metabolites""" for compound_def in l: compound_id = compound_def.get('id') - if compound_id is None: - raise ParseError('Compound ID missing') + _check_id(compound_id, 'Compound') value = compound_def.get('value') if value is None: @@ -466,8 +491,7 @@ def parse_reaction(reaction_def, context=None): """ reaction_id = reaction_def.get('id') - if reaction_id is None: - raise ParseError('Reaction ID missing') + _check_id(reaction_id, 'Reaction') reaction_props = dict(reaction_def) diff --git a/psamm/datasource/sbml.py b/psamm/datasource/sbml.py index a480a440..34f23f2a 100644 --- a/psamm/datasource/sbml.py +++ b/psamm/datasource/sbml.py @@ -26,7 +26,7 @@ from itertools import count import logging -from six import itervalues, iteritems +from six import itervalues, iteritems, text_type from ..reaction import Reaction, Compound, Direction @@ -58,7 +58,7 @@ def _tag(tag, namespace=None): """Prepend namespace to tag name""" if namespace is None: - return str(tag) + return text_type(tag) return '{{{}}}{}'.format(namespace, tag) @@ -672,9 +672,9 @@ def write_model(self, file, model, reactions, compounds): model_tag = ET.SubElement(root, self._sbml_tag('model')) # Generators of unique IDs - compound_id = ('M_'+str(i) for i in count(1)) - compartment_id = ('C_'+str(i) for i in count(1)) - reaction_id = ('R_'+str(i) for i in count(1)) + compound_id = ('M_{}'.format(i) for i in count(1)) + compartment_id = ('C_{}'.format(i) for i in count(1)) + reaction_id = ('R_{}'.format(i) for i in count(1)) # Build mapping from Compound to species ID model_compartments = {} @@ -694,7 +694,7 @@ def write_model(self, file, model, reactions, compounds): compartment_tag = ET.SubElement( compartments, self._sbml_tag('compartment')) compartment_tag.set(self._sbml_tag('id'), compartment_id) - compartment_tag.set(self._sbml_tag('name'), str(compartment)) + compartment_tag.set(self._sbml_tag('name'), text_type(compartment)) # Create list of species species_list = ET.SubElement( @@ -705,7 +705,8 @@ def write_model(self, file, model, reactions, compounds): species_tag.set(self._sbml_tag('id'), species_id) species_tag.set( self._sbml_tag('name'), - str(species.translate(lambda x: compound_name.get(x, x)))) + text_type( + species.translate(lambda x: compound_name.get(x, x)))) species_tag.set( self._sbml_tag('compartment'), model_compartments[species.compartment]) @@ -731,7 +732,8 @@ def write_model(self, file, model, reactions, compounds): dest_list, self._sbml_tag('speciesReference')) spec_ref.set( self._sbml_tag('species'), model_species[compound]) - spec_ref.set(self._sbml_tag('stoichiometry'), str(abs(value))) + spec_ref.set( + self._sbml_tag('stoichiometry'), text_type(abs(value))) notes_tag = ET.SubElement(reaction_tag, self._sbml_tag('notes')) if reaction in reaction_genes: @@ -747,15 +749,18 @@ def write_model(self, file, model, reactions, compounds): ci_tag.text = 'FLUX_VALUE' param_list = ET.SubElement( kl_tag, self._sbml_tag('listOfParameters')) + + lower_str = text_type(model.limits[reaction].lower) + upper_str = text_type(model.limits[reaction].upper) ET.SubElement(param_list, self._sbml_tag('parameter'), { self._sbml_tag('id'): 'LOWER_BOUND', self._sbml_tag('name'): 'LOWER_BOUND', - self._sbml_tag('value'): str(model.limits[reaction].lower) + self._sbml_tag('value'): lower_str }) ET.SubElement(param_list, self._sbml_tag('parameter'), { self._sbml_tag('id'): 'UPPER_BOUND', self._sbml_tag('name'): 'UPPER_BOUND', - self._sbml_tag('value'): str(model.limits[reaction].upper) + self._sbml_tag('value'): upper_str }) tree = ET.ElementTree(root) diff --git a/psamm/fastgapfill.py b/psamm/fastgapfill.py new file mode 100644 index 00000000..ef3d83bd --- /dev/null +++ b/psamm/fastgapfill.py @@ -0,0 +1,102 @@ +# This file is part of PSAMM. +# +# PSAMM is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# PSAMM is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with PSAMM. If not, see . +# +# Copyright 2014-2015 Jon Lund Steffensen +# Copyright 2016 Chao Liu + +from __future__ import unicode_literals + +import logging + +from six import iteritems +from .fastcore import fastcore + +logger = logging.getLogger(__name__) + + +def create_extended_model(model, db_weight=None, ex_weight=None, + tp_weight=None, penalties=None): + """Create an extended model for FastGapFill algorithm. + + Create a :class:`psamm.metabolicmodel.MetabolicModel` with + all reactions added (the reaction database in the model is taken + to be the universal database) and also with artificial exchange + and transport reactions added. Return the extended + :class:`psamm.metabolicmodel.MetabolicModel` + and a weight dictionary for added reactions in that model. + + Args: + model: :class:`psamm.datasource.native.NativeModel`. + db_weight: weight(float) for database reactions, default is `None`. + ex_weight: weight(float) for exchange reactions, default is `None`. + tb_weigth: weight(float) for transport reactions, default is `None`. + penalties: a dictionary of penalty scores for database reactions. + """ + + # Create metabolic model + model_extended = model.create_metabolic_model() + model_compartments = set(model_extended.compartments) + extra_compartments = model.get_extracellular_compartment() + + # Add exchange and transport reactions to database + logger.info('Adding database, exchange and transport reactions') + db_added = model_extended.add_all_database_reactions(model_compartments) + ex_added = model_extended.add_all_exchange_reactions( + extra_compartments, allow_duplicates=True) + tp_added = model_extended.add_all_transport_reactions( + extra_compartments, allow_duplicates=True) + + # Add penalty weights on reactions + weights = {} + if db_weight is not None: + weights.update((rxnid, db_weight) for rxnid in db_added) + if tp_weight is not None: + weights.update((rxnid, tp_weight) for rxnid in tp_added) + if ex_weight is not None: + weights.update((rxnid, ex_weight) for rxnid in ex_added) + + if penalties is not None: + for rxnid, weight in iteritems(penalties): + weights[rxnid] = weight + return model_extended, weights + + +def fastgapfill(model_extended, core, solver, weights={}, epsilon=1e-5): + """Run FastGapFill gap-filling algorithm by calling + :func:`psamm.fastcore.fastcore`. + + FastGapFill will try to find a minimum subset of reactions that includes + the core reactions and it also has no blocked reactions. + Return the set of reactions in the minimum subset. An extended model that + includes artificial transport and exchange reactions can be generated by + calling :func:`.create_extended_model`. + + Args: + model: :class:`psamm.metabolicmodel.MetabolicModel`. + core: reactions in the original metabolic model. + weights: a weight dictionary for reactions in the model. + solver: linear programming library to use. + epsilon: float number, threshold for Fastcore algorithm. + """ + + # Run Fastcore and print the induced reaction set + logger.info('Calculating Fastcore induced set on model') + induced = fastcore( + model_extended, core, epsilon=1e-5, weights=weights, solver=solver) + logger.info('Result: |A| = {}, A = {}'.format(len(induced), induced)) + added_reactions = induced - core + logger.info('Extended: |E| = {}, E = {}'.format( + len(added_reactions), added_reactions)) + return induced diff --git a/psamm/fluxanalysis.py b/psamm/fluxanalysis.py index e2d767df..d497b2cc 100644 --- a/psamm/fluxanalysis.py +++ b/psamm/fluxanalysis.py @@ -17,6 +17,8 @@ """Implementation of Flux Balance Analysis.""" +from __future__ import unicode_literals + import logging import random diff --git a/psamm/formula.py b/psamm/formula.py index 2849fa3a..36af2845 100644 --- a/psamm/formula.py +++ b/psamm/formula.py @@ -79,13 +79,50 @@ def substitute(self, mapping): return self +class _AtomType(type): + """Metaclass that gives the Atom class properties for each element. + + A class based on this metaclass (i.e. :class:`.Atom`) will have singleton + elements with each name, and will have a property for each element which + contains the instance for that element. + """ + + _ELEMENTS = set([ + 'H', 'He', 'Li', 'Be', 'B', 'C', 'N', 'O', 'F', 'Ne', 'Na', 'Mg', 'Al', + 'Si', 'P', 'S', 'Cl', 'Ar', 'K', 'Ca', 'Sc', 'Ti', 'V', 'Cr', 'Mn', + 'Fe', 'Co', 'Ni', 'Cu', 'Zn', 'Ga', 'Ge', 'As', 'Se', 'Br', 'Kr', 'Rb', + 'Sr', 'Y', 'Zr', 'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'In', + 'Sn', 'Sb', 'Te', 'I', 'Xe', 'Cs', 'Ba', 'La', 'Ce', 'Pr', 'Nd', 'Pm', + 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb', 'Lu', 'Hf', 'Ta', + 'W', 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg', 'Tl', 'Pb', 'Bi', 'Po', 'At', + 'Rn', 'Fr', 'Ra', 'Ac', 'Th', 'Pa', 'U', 'Np', 'Pu', 'Am', 'Cm', 'Bk', + 'Cf', 'Es', 'Fm', 'Md', 'No', 'Lr', 'Rf', 'Db', 'Sg', 'Bh', 'Hs', 'Mt', + 'Ds', 'Rg', 'Cn', 'Uut', 'Fl', 'Uup', 'Lv', 'Uus', 'Uuo' + ]) + + _instances = {} + + def __call__(self, name, *args, **kwargs): + instances = _AtomType._instances.setdefault(self, {}) + if name not in instances: + instances[name] = super(_AtomType, self).__call__( + name, *args, **kwargs) + return instances[name] + + def __getattribute__(self, name): + if name in _AtomType._ELEMENTS: + return self(name) + return super(_AtomType, self).__getattribute__(name) + + @six.python_2_unicode_compatible @functools.total_ordering +@six.add_metaclass(_AtomType) class Atom(FormulaElement): """Represent an atom in a chemical formula - >>> hydrogen = Atom('H') - >>> oxygen = Atom('O') + >>> hydrogen = Atom.H + >>> oxygen = Atom.O >>> str(oxygen | 2*hydrogen) 'H2O' """ @@ -97,7 +134,7 @@ def __init__(self, symbol): def symbol(self): """Atom symbol - >>> Atom('H').symbol + >>> Atom.H.symbol 'H' """ @@ -164,7 +201,7 @@ class Formula(FormulaElement): This is represented as a number of :class:`FormulaElements <.FormulaElement>` with associated counts. - >>> f = Formula({Atom('C'): 6, Atom('H'): 12, Atom('O'): 6}) + >>> f = Formula({Atom.C: 6, Atom.H: 12, Atom.O: 6}) >>> str(f) 'C6H12O6' """ @@ -235,7 +272,7 @@ def __contains__(self, element): def __str__(self): """Return formula represented using Hill notation system - >>> str(Formula({Atom('C'): 6, Atom('H'): 12, Atom('O'): 6})) + >>> str(Formula({Atom.C: 6, Atom.H: 12, Atom.O: 6})) 'C6H12O6' """ @@ -249,13 +286,13 @@ def element_sort_key(pair): else: return 2, None - if Atom('C') in values: - yield Atom('C'), values[Atom('C')] - if Atom('H') in values: - yield Atom('H'), values[Atom('H')] + if Atom.C in values: + yield Atom.C, values[Atom.C] + if Atom.H in values: + yield Atom.H, values[Atom.H] for element, value in sorted( iteritems(values), key=element_sort_key): - if element not in (Atom('C'), Atom('H')): + if element not in (Atom.C, Atom.H): yield element, value else: for element, value in sorted( diff --git a/psamm/lpsolver/cplex.py b/psamm/lpsolver/cplex.py index 74e245c3..87160968 100644 --- a/psamm/lpsolver/cplex.py +++ b/psamm/lpsolver/cplex.py @@ -106,6 +106,7 @@ def __init__(self, **kwargs): self._non_zero_objective = set() self._result = None + self._solve_count = 0 @property def cplex(self): @@ -265,39 +266,48 @@ def set_objective_sense(self, sense): def _reset_problem_type(self): """Reset problem type to whatever is appropriate.""" - integer_count = 0 - for func in (self._cp.variables.get_num_binary, - self._cp.variables.get_num_integer, - self._cp.variables.get_num_semicontinuous, - self._cp.variables.get_num_semiinteger): - integer_count += func() - - integer = integer_count > 0 - quad_constr = self._cp.quadratic_constraints.get_num() > 0 - quad_obj = self._cp.objective.get_num_quadratic_variables() > 0 - if not integer: - if quad_constr: - new_type = self._cp.problem_type.QCP - elif quad_obj: - new_type = self._cp.problem_type.QP - else: - new_type = self._cp.problem_type.LP - else: - if quad_constr: - new_type = self._cp.problem_type.MIQCP - elif quad_obj: - new_type = self._cp.problem_type.MIQP + # Only need to reset the type after the first solve. This also works + # around a bug in Cplex where get_num_binary() is some rare cases + # causes a segfault. + if self._solve_count > 0: + integer_count = 0 + for func in (self._cp.variables.get_num_binary, + self._cp.variables.get_num_integer, + self._cp.variables.get_num_semicontinuous, + self._cp.variables.get_num_semiinteger): + integer_count += func() + + integer = integer_count > 0 + quad_constr = self._cp.quadratic_constraints.get_num() > 0 + quad_obj = self._cp.objective.get_num_quadratic_variables() > 0 + + if not integer: + if quad_constr: + new_type = self._cp.problem_type.QCP + elif quad_obj: + new_type = self._cp.problem_type.QP + else: + new_type = self._cp.problem_type.LP else: - new_type = self._cp.problem_type.MILP + if quad_constr: + new_type = self._cp.problem_type.MIQCP + elif quad_obj: + new_type = self._cp.problem_type.MIQP + else: + new_type = self._cp.problem_type.MILP - logger.debug('Setting problem type to {}...'.format( - self._cp.problem_type[new_type])) - self._cp.set_problem_type(new_type) + logger.debug('Setting problem type to {}...'.format( + self._cp.problem_type[new_type])) + self._cp.set_problem_type(new_type) + else: + logger.debug('Problem type is {}'.format( + self._cp.problem_type[self._cp.get_problem_type()])) # Force QP/MIQP solver to look for global optimum. We set it here only # for QP/MIQP problems to avoid the warnings generated for other # problem types when this parameter is set. + quad_obj = self._cp.objective.get_num_quadratic_variables() > 0 if quad_obj: self._cp.parameters.solutiontarget.set( self._cp.parameters.solutiontarget.values.optimal_global) @@ -308,6 +318,7 @@ def _reset_problem_type(self): def _solve(self): self._reset_problem_type() self._cp.solve() + self._solve_count += 1 if (self._cp.solution.get_status() == self._cp.solution.status.abort_user): raise KeyboardInterrupt() diff --git a/psamm/metabolicmodel.py b/psamm/metabolicmodel.py index 1aedbf61..188d5d9c 100644 --- a/psamm/metabolicmodel.py +++ b/psamm/metabolicmodel.py @@ -17,6 +17,8 @@ """Representation of metabolic network models.""" +from __future__ import unicode_literals + from collections import Mapping from .database import MetabolicDatabase, StoichiometricMatrixView @@ -118,8 +120,8 @@ def __ne__(self, other): return not self == other def __repr__(self): - return '{}({}, {})'.format( - self.__class__.__name__, repr(self.lower), repr(self.upper)) + return str('{}({!r}, {!r})').format( + self.__class__.__name__, self.lower, self.upper) class LimitsView(Mapping): diff --git a/psamm/randomsparse.py b/psamm/randomsparse.py new file mode 100644 index 00000000..fa102ebc --- /dev/null +++ b/psamm/randomsparse.py @@ -0,0 +1,224 @@ +# This file is part of PSAMM. +# +# PSAMM is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# PSAMM is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with PSAMM. If not, see . +# +# Copyright 2014-2015 Jon Lund Steffensen +# Copyright 2015 Keith Dufault-Thompson +# Copyright 2016 Chao Liu + +from __future__ import unicode_literals + +import random +import logging + +from . import fluxanalysis +from .expression import boolean + +from six import iteritems, string_types + +logger = logging.getLogger(__name__) + + +class ReactionDeletionStrategy(object): + """Deleting reactions strategy class. + + When initializing instances of this class, + :func:`.get_exchange_reactions` can be useful if exchange reactions + are used as the test set. + """ + + def __init__(self, model, reaction_set=None): + self._model = model + if reaction_set is None: + self._test_set = set(self._model.reactions) + else: + self._test_set = set(reaction_set) + self._reactions = set(self._test_set) + + @property + def entities(self): + return self._reactions + + def iter_tests(self): + while len(self._test_set) > 0: + reaction = random.choice(tuple(self._test_set)) + self._test_set.remove(reaction) + yield reaction, {reaction} + + def delete(self, entity, deleted_reactions): + pass + + +class GeneDeletionStrategy(object): + """Deleting genes strategy class. + + When initializing instances of this class, :func:`get_gene_associations` + can be called to obtain the gene association dict from the model. + """ + + def __init__(self, model, gene_assoc): + self._model = model + self._genes = set() + self._gene_assoc = dict(gene_assoc) + + for reaction, assoc in iteritems(self._gene_assoc): + self._genes.update(v.symbol for v in assoc.variables) + + self._reactions = set(self._model.reactions) + self._test_set = set(self._genes) + + @property + def entities(self): + return set(self._genes) + + def iter_tests(self): + while len(self._test_set) > 0: + gene = random.choice(tuple(self._test_set)) + self._test_set.remove(gene) + + deleted_reactions = set() + self._new_gene_assoc = {} + for reaction in self._reactions: + if reaction not in self._gene_assoc: + continue + assoc = self._gene_assoc[reaction] + if boolean.Variable(gene) in assoc.variables: + new_assoc = assoc.substitute( + lambda v: v if v.symbol != gene else False) + if new_assoc.has_value() and not new_assoc.value: + deleted_reactions.add(reaction) + else: + self._new_gene_assoc[reaction] = new_assoc + else: + self._new_gene_assoc[reaction] = assoc + + yield gene, deleted_reactions + + def delete(self, entity, deleted_reactions): + self._reactions -= deleted_reactions + self._gene_assoc = self._new_gene_assoc + + +def get_gene_associations(model): + """Create gene association for class :class:`.GeneDeletionStrategy`. + + Return a dict mapping reaction IDs to + :class:`psamm.expression.boolean.Expression` objects, + representing relationships between reactions and related genes. This helper + function should be called when creating :class:`.GeneDeletionStrategy` + objects. + + Args: + model: :class:`psamm.datasource.native.NativeModel`. + """ + + for reaction in model.parse_reactions(): + assoc = None + if reaction.genes is None: + continue + elif isinstance(reaction.genes, string_types): + assoc = boolean.Expression(reaction.genes) + else: + variables = [boolean.Variable(g) for g in reaction.genes] + assoc = boolean.Expression(boolean.And(*variables)) + yield reaction.id, assoc + + +def get_exchange_reactions(model): + """Yield IDs of all exchange reactions from model. + + This helper function would be useful when creating + :class:`.ReactionDeletionStrategy` objects. + + Args: + model: :class:`psamm.metabolicmodel.MetabolicModel`. + """ + for reaction_id in model.reactions: + if model.is_exchange(reaction_id): + yield reaction_id + + +def random_sparse(strategy, prob, obj_reaction, flux_threshold): + """Find a random minimal network of model reactions. + + Given a reaction to optimize and a threshold, delete entities randomly + until the flux of the reaction to optimize falls under the threshold. + Keep deleting until no more entities can be deleted. It works + with two strategies: deleting reactions or deleting genes (reactions + related to certain genes). + + Args: + strategy: :class:`.ReactionDeletionStrategy` or + :class:`.GeneDeletionStrategy`. + prob: :class:`psamm.fluxanalysis.FluxBalanceProblem`. + obj_reaction: objective reactions to optimize. + flux_threshold: threshold of max reaction flux. + """ + + essential = set() + deleted = set() + for entity, deleted_reactions in strategy.iter_tests(): + if obj_reaction in deleted_reactions: + logger.info( + 'Marking entity {} as essential because the objective' + ' reaction depends on this entity...'.format(entity)) + essential.add(entity) + continue + + if len(deleted_reactions) == 0: + logger.info( + 'No reactions were removed when entity {}' + ' was deleted'.format(entity)) + deleted.add(entity) + strategy.delete(entity, deleted_reactions) + continue + + logger.info('Deleted reactions: {}'.format( + ', '.join(deleted_reactions))) + + constr = [] + for r in deleted_reactions: + flux_var = prob.get_flux_var(r) + c, = prob.prob.add_linear_constraints(flux_var == 0) + constr.append(c) + + logger.info('Trying FBA without reactions {}...'.format( + deleted_reactions)) + + try: + prob.maximize(obj_reaction) + except fluxanalysis.FluxBalanceError: + logger.info( + 'FBA is infeasible, marking {} as essential'.format( + entity)) + for c in constr: + c.delete() + essential.add(entity) + continue + + logger.debug('Reaction {} has flux {}'.format( + obj_reaction, prob.get_flux(obj_reaction))) + + if prob.get_flux(obj_reaction) < flux_threshold: + for c in constr: + c.delete() + essential.add(entity) + logger.info('Entity {} was essential'.format( + entity)) + else: + deleted.add(entity) + strategy.delete(entity, deleted_reactions) + logger.info('Entity {} was deleted'.format(entity)) + + return essential, deleted diff --git a/psamm/reaction.py b/psamm/reaction.py index 9da74845..7a3f2b88 100644 --- a/psamm/reaction.py +++ b/psamm/reaction.py @@ -115,7 +115,8 @@ def __str__(self): """ s = self._name if len(self._arguments) > 0: - s += '({})'.format(', '.join(str(a) for a in self._arguments)) + s += '({})'.format( + ', '.join(text_type(a) for a in self._arguments)) if self._compartment is not None: s += '[{}]'.format(self._compartment) return s @@ -336,7 +337,7 @@ def __str__(self): # Use the same format as ModelSEED def format_compound(compound, count): """Format compound""" - cpdspec = str(compound) + cpdspec = text_type(compound) if count != 1: return '({}) |{}|'.format(count, cpdspec) return '|{}|'.format(cpdspec) diff --git a/psamm/tests/test_command.py b/psamm/tests/test_command.py index c1199eb5..e2c4aa55 100644 --- a/psamm/tests/test_command.py +++ b/psamm/tests/test_command.py @@ -15,8 +15,11 @@ # # Copyright 2015 Jon Lund Steffensen +from __future__ import unicode_literals + import sys import os +import codecs import shutil import tempfile from contextlib import contextmanager @@ -24,8 +27,10 @@ from six import StringIO, BytesIO -from psamm.command import main, Command, MetabolicMixin, SolverCommandMixin +from psamm.command import (main, Command, MetabolicMixin, SolverCommandMixin, + CommandError) from psamm.lpsolver import generic +from psamm.datasource.native import NativeModel @contextmanager @@ -83,32 +88,34 @@ def run(self): class TestCommandMain(unittest.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() - with open(os.path.join(self._model_dir, 'model.yaml'), 'w') as f: + path = os.path.join(self._model_dir, 'model.yaml') + with codecs.open(path, 'w', 'utf-8') as f: f.write('''--- name: Test model biomass: rxn_1 reactions: - id: rxn_1 - equation: '|A[e]| => |B[c]|' + equation: '|A_\u2206[e]| => |B[c]|' genes: - gene_1 - gene_2 - - id: rxn_2 + - id: rxn_2_\u03c0 equation: '|B[c]| => |C[e]|' genes: 'gene_3 or (gene_4 and gene_5)' compounds: - - id: A + - id: A_\u2206 - id: B - id: C media: - compartment: e compounds: - - id: A + - id: A_\u2206 - id: C limits: - - reaction: rxn_2 + - reaction: rxn_2_\u03c0 upper: 100 ''') + self._model = NativeModel(self._model_dir) def tearDown(self): shutil.rmtree(self._model_dir) @@ -240,7 +247,7 @@ def test_run_randomsparse_exchange(self): def test_run_robustness(self): self.run_solver_command([ - '--model', self._model_dir, 'robustness', 'rxn_2'], {}) + '--model', self._model_dir, 'robustness', 'rxn_2_\u03c0'], {}) def test_run_sbmlexport(self): with redirected_stdout(BytesIO()): @@ -264,8 +271,8 @@ def test_run_tableexport_reactions(self): self.assertEqual(f.getvalue(), '\n'.join([ 'id\tequation\tgenes', - "rxn_1\t|A[e]| => |B[c]|\t['gene_1', 'gene_2']", - 'rxn_2\t|B[c]| => |C[e]|\tgene_3 or (gene_4 and gene_5)', + "rxn_1\t|A_\u2206[e]| => |B[c]|\t['gene_1', 'gene_2']", + 'rxn_2_\u03c0\t|B[c]| => |C[e]|\tgene_3 or (gene_4 and gene_5)', '' ])) @@ -275,7 +282,7 @@ def test_run_tableexport_compounds(self): '--model', self._model_dir, 'tableexport', 'compounds']) self.assertEqual(f.getvalue(), '\n'.join([ - 'id', 'A', 'B', 'C', '' + 'id', 'A_\u2206', 'B', 'C', '' ])) def test_run_tableexport_medium(self): @@ -285,7 +292,7 @@ def test_run_tableexport_medium(self): self.assertEqual(f.getvalue(), '\n'.join([ 'Compound ID\tReaction ID\tLower Limit\tUpper Limit', - 'A[e]\tNone\t-1000\t1000', + 'A_\u2206[e]\tNone\t-1000\t1000', 'C[e]\tNone\t-1000\t1000', '' ])) @@ -297,7 +304,7 @@ def test_run_tableexport_limits(self): self.assertEqual(f.getvalue(), '\n'.join([ 'Reaction ID\tLower Limits\tUpper Limits', - 'rxn_2\tNone\t100', + 'rxn_2_\u03c0\tNone\t100', '' ])) @@ -309,7 +316,7 @@ def test_run_tableexport_metadata(self): self.assertEqual(f.getvalue(), '\n'.join([ 'Model Name: Test model', 'Biomass Reaction: rxn_1', - 'Default Flux Limits: None', + 'Default Flux Limits: 1000', '' ])) @@ -330,3 +337,13 @@ def test_metabolic_command_main(self): main(MockMetabolicCommand, args=[ '--model', self._model_dir]) self.assertTrue(MockMetabolicCommand.has_metabolic_model) + + def test_command_fail(self): + mock_command = MockCommand(self._model, None) + with self.assertRaises(SystemExit): + mock_command.fail('Run time error') + + def test_command_argument_error(self): + mock_command = MockCommand(self._model, None) + with self.assertRaises(CommandError): + mock_command.argument_error(msg='Argument error') diff --git a/psamm/tests/test_fastgapfill.py b/psamm/tests/test_fastgapfill.py new file mode 100644 index 00000000..a84e6890 --- /dev/null +++ b/psamm/tests/test_fastgapfill.py @@ -0,0 +1,133 @@ +# This file is part of PSAMM. +# +# PSAMM is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# PSAMM is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with PSAMM. If not, see . +# +# Copyright 2016 Jon Lund Steffensen +# Copyright 2016 Chao liu + +import os +import unittest +import tempfile +import shutil + +from psamm.datasource.native import NativeModel +from psamm.metabolicmodel import MetabolicModel +from psamm.database import DictDatabase +from psamm.reaction import Compound +from psamm import fastgapfill +from psamm.lpsolver import generic +from psamm.datasource.reaction import parse_reaction + + +class TestCreateExtendedModel(unittest.TestCase): + def setUp(self): + self._model_dir = tempfile.mkdtemp() + with open(os.path.join(self._model_dir, 'model.yaml'), 'w') as f: + f.write('\n'.join([ + '---', + 'reactions:', + ' - id: rxn_1', + ' equation: A[e] <=> B[c]', + ' - id: rxn_2', + ' equation: A[e] => C[c]', + ' - id: rxn_3', + ' equation: A[e] => D[e]', + ' - id: rxn_4', + ' equation: C[c] => D[e]', + 'compounds:', + ' - id: A', + ' - id: B', + ' - id: C', + ' - id: D', + 'media:', + ' - compartment: e', + ' compounds:', + ' - id: A', + ' reaction: rxn_5', + ' - id: D', + ' reaction: rxn_6', + 'model:', + ' - reactions:', + ' - rxn_1', + ' - rxn_2', + ])) + self._model = NativeModel(self._model_dir) + + def tearDown(self): + shutil.rmtree(self._model_dir) + + def test_create_model_extended(self): + expected_reactions = set([ + 'rxn_1', + 'rxn_2', + 'rxn_3', + 'rxn_4', + 'rxn_5', + 'rxn_6', + ('rxntp', Compound('B', 'c')), + ('rxntp', Compound('C', 'c')), + ('rxnex', Compound('A', 'e')), + ('rxnex', Compound('B', 'c')), + ('rxnex', Compound('C', 'c')), + ('rxnex', Compound('D', 'e')) + ]) + + expected_weights = { + 'rxn_3': 5.6, + 'rxn_4': 1.0, + ('rxntp', Compound('B', 'c')): 3.0, + ('rxntp', Compound('C', 'c')): 3.0, + ('rxnex', Compound('A', 'e')): 2.0, + ('rxnex', Compound('B', 'c')): 2.0, + ('rxnex', Compound('C', 'c')): 2.0, + ('rxnex', Compound('D', 'e')): 2.0 + } + penalties = {'rxn_3': 5.6} + + model_extended, weights = fastgapfill.create_extended_model( + self._model, + db_weight=1.0, + ex_weight=2.0, + tp_weight=3.0, + penalties=penalties) + self.assertEqual(set(model_extended.reactions), expected_reactions) + self.assertEqual(weights, expected_weights) + + +class TestFastGapFill(unittest.TestCase): + def setUp(self): + self._database = DictDatabase() + self._database.set_reaction('rxn_1', parse_reaction('|A| <=>')) + self._database.set_reaction('rxn_2', parse_reaction('|A| <=> |B|')) + self._database.set_reaction('rxn_3', parse_reaction('|C| <=> |B|')) + self._database.set_reaction('rxn_4', parse_reaction('|C| <=>')) + self._mm = MetabolicModel.load_model( + self._database, self._database.reactions) + + try: + self._solver = generic.Solver() + except generic.RequirementsError: + self.skipTest('Unable to find an LP solver for tests') + + def test_fastgapfill(self): + core = {'rxn_2', 'rxn_3'} + induced = fastgapfill.fastgapfill( + self._mm, + core, + epsilon=0.001, + solver=self._solver) + + self.assertEqual( + set(induced), + {'rxn_1', 'rxn_2', 'rxn_3', 'rxn_4'}) diff --git a/psamm/tests/test_formula.py b/psamm/tests/test_formula.py index cd7ec197..502c1cf2 100755 --- a/psamm/tests/test_formula.py +++ b/psamm/tests/test_formula.py @@ -59,6 +59,14 @@ def test_atom_symbol_wide(self): a = Atom('Zn') self.assertEqual(a.symbol, 'Zn') + def test_atom_symbol_non_standard(self): + a = Atom('X') + self.assertEqual(a.symbol, 'X') + + def test_atom_singleton_property(self): + a = Atom.C + self.assertEqual(a.symbol, 'C') + def test_atom_to_string(self): a = Atom('C') self.assertEqual(str(a), 'C') @@ -69,9 +77,8 @@ def test_atom_repr(self): def test_atom_equals(self): a1 = Atom('H') - a2 = Atom('Zn') - self.assertEqual(a1, a1) - self.assertNotEqual(a1, a2) + self.assertEqual(a1, Atom.H) + self.assertNotEqual(a1, Atom.Zn) def test_atom_ordered(self): a1 = Atom('H') @@ -83,70 +90,70 @@ def test_atom_ordered(self): class TestFormula(unittest.TestCase): def test_formula_merge_same_formulas_with_same_atoms(self): - f = Formula({Atom('H'): 2, Atom('O'): 1}) | Formula({Atom('N'): 1, Atom('O'): 2}) - self.assertEqual(f, Formula({Atom('H'): 2, Atom('N'): 1, Atom('O'): 3})) + f = Formula({Atom.H: 2, Atom.O: 1}) | Formula({Atom.N: 1, Atom.O: 2}) + self.assertEqual(f, Formula({Atom.H: 2, Atom.N: 1, Atom.O: 3})) def test_formula_merge_formulas_that_cancel_out(self): - f = Formula({Atom('H'): 3}) | Formula({Atom('H'): -3}) + f = Formula({Atom.H: 3}) | Formula({Atom.H: -3}) self.assertEqual(f, Formula()) def test_formula_multiply_number(self): - f = Formula({Atom('H'): 2, Atom('O'): 1}) * 4 - self.assertEqual(f, Formula({Atom('H'): 8, Atom('O'): 4})) + f = Formula({Atom.H: 2, Atom.O: 1}) * 4 + self.assertEqual(f, Formula({Atom.H: 8, Atom.O: 4})) def test_formula_multiply_one(self): - f = Formula({Atom('H'): 2, Atom('O'): 1}) * 1 + f = Formula({Atom.H: 2, Atom.O: 1}) * 1 self.assertEqual(f, f) def test_formula_multiply_zero(self): - f = Formula({Atom('H'): 2, Atom('O'): 1}) * 0 + f = Formula({Atom.H: 2, Atom.O: 1}) * 0 self.assertEqual(f, Formula()) def test_formula_right_multiply_number(self): - f = 2 * Formula({Atom('H'): 2, Atom('O'): 1}) - self.assertEqual(f, Formula({Atom('H'): 4, Atom('O'): 2})) + f = 2 * Formula({Atom.H: 2, Atom.O: 1}) + self.assertEqual(f, Formula({Atom.H: 4, Atom.O: 2})) def test_formula_repeat(self): - f = Formula({Atom('H'): 2, Atom('O'): 1}).repeat(4) - self.assertEqual(f, Formula({ Formula({Atom('H'): 2, Atom('O'): 1}): 4 })) + f = Formula({Atom.H: 2, Atom.O: 1}).repeat(4) + self.assertEqual(f, Formula({Formula({Atom.H: 2, Atom.O: 1}): 4})) def test_formula_equals_other_formula(self): - f = Formula({Atom('H'): 2, Atom('O'): 1}) - self.assertEqual(f, Formula({Atom('O'): 1, Atom('H'): 2})) + f = Formula({Atom.H: 2, Atom.O: 1}) + self.assertEqual(f, Formula({Atom.O: 1, Atom.H: 2})) def test_formula_not_equals_other_with_distinct_elements(self): - f = Formula({Atom('Au'): 1}) - self.assertNotEqual(f, Formula({Atom('Ag'): 1})) + f = Formula({Atom.Au: 1}) + self.assertNotEqual(f, Formula({Atom.Ag: 1})) def test_formula_not_equals_other_with_different_number(self): - f = Formula({Atom('Au'): 1}) - self.assertNotEqual(f, Formula({Atom('Au'): 2})) + f = Formula({Atom.Au: 1}) + self.assertNotEqual(f, Formula({Atom.Au: 2})) def test_formula_items(self): - f = Formula({Atom('H'): 12, Atom('C'): 6, Atom('O'): 6}) + f = Formula({Atom.H: 12, Atom.C: 6, Atom.O: 6}) self.assertEqual(dict(f.items()), { - Atom('C'): 6, - Atom('H'): 12, - Atom('O'): 6 + Atom.C: 6, + Atom.H: 12, + Atom.O: 6 }) def test_formula_contains(self): - f = Formula({Atom('H'): 12, Atom('C'): 6, Atom('O'): 6}) - self.assertIn(Atom('C'), f) - self.assertNotIn(Atom('Ag'), f) + f = Formula({Atom.H: 12, Atom.C: 6, Atom.O: 6}) + self.assertIn(Atom.C, f) + self.assertNotIn(Atom.Ag, f) def test_formula_to_string(self): - f = Formula({Atom('H'): 12, Atom('C'): 6, Atom('O'): 6}) + f = Formula({Atom.H: 12, Atom.C: 6, Atom.O: 6}) self.assertEqual(str(f), 'C6H12O6') def test_formula_to_string_with_group(self): f = Formula({ - Atom('C'): 1, - Atom('H'): 3, - Formula({Atom('C'): 1, Atom('H'): 2}): 14, + Atom.C: 1, + Atom.H: 3, + Formula({Atom.C: 1, Atom.H: 2}): 14, Formula({ - Atom('C'): 1, Atom('O'): 1, - Formula({Atom('H'): 1, Atom('O'): 1}): 1 + Atom.C: 1, Atom.O: 1, + Formula({Atom.H: 1, Atom.O: 1}): 1 }): 1 }) # Ideally: self.assertEqual(str(f), 'CH3(CH2)14COOH') @@ -155,18 +162,18 @@ def test_formula_to_string_with_group(self): def test_formula_flattened(self): f = Formula({ - Atom('C'): 1, - Atom('H'): 3, - Formula({Atom('C'): 1, Atom('H'): 2}): 14, + Atom.C: 1, + Atom.H: 3, + Formula({Atom.C: 1, Atom.H: 2}): 14, Formula({ - Atom('C'): 1, Atom('O'): 1, - Formula({Atom('H'): 1, Atom('O'): 1}): 1 + Atom.C: 1, Atom.O: 1, + Formula({Atom.H: 1, Atom.O: 1}): 1 }): 1 }) self.assertEqual(f.flattened(), Formula({ - Atom('C'): 16, - Atom('H'): 32, - Atom('O'): 2 + Atom.C: 16, + Atom.H: 32, + Atom.O: 2 })) def test_formula_substitute_non_positive(self): @@ -189,75 +196,83 @@ def test_formula_is_variable(self): def test_formula_balance_missing_on_one_side(self): f1, f2 = Formula.balance(Formula.parse('H2O'), Formula.parse('OH')) self.assertEqual(f1, Formula()) - self.assertEqual(f2, Formula({Atom('H'): 1})) + self.assertEqual(f2, Formula({Atom.H: 1})) def test_formula_balance_missing_on_both_sides(self): - f1, f2 = Formula.balance(Formula.parse('C3H6OH'), Formula.parse('CH6O2')) - self.assertEqual(f1, Formula({Atom('O'): 1})) - self.assertEqual(f2, Formula({Atom('C'): 2, Atom('H'): 1})) + f1, f2 = Formula.balance( + Formula.parse('C3H6OH'), Formula.parse('CH6O2')) + self.assertEqual(f1, Formula({Atom.O: 1})) + self.assertEqual(f2, Formula({Atom.C: 2, Atom.H: 1})) def test_formula_balance_subgroups_cancel_out(self): - f1, f2 = Formula.balance(Formula.parse('H2(CH2)n'), Formula.parse('CH3O(CH2)n')) - self.assertEqual(f1, Formula({Atom('C'): 1, Atom('H'): 1, Atom('O'): 1})) + f1, f2 = Formula.balance( + Formula.parse('H2(CH2)n'), Formula.parse('CH3O(CH2)n')) + self.assertEqual(f1, Formula({Atom.C: 1, Atom.H: 1, Atom.O: 1})) self.assertEqual(f2, Formula()) class TestFormulaParser(unittest.TestCase): def test_formula_parse_with_final_digit(self): f = Formula.parse('H2O2') - self.assertEqual(f, Formula({Atom('H'): 2, Atom('O'): 2})) + self.assertEqual(f, Formula({Atom.H: 2, Atom.O: 2})) def test_formula_parse_with_implicit_final_digit(self): f = Formula.parse('H2O') - self.assertEqual(f, Formula({Atom('H'): 2, Atom('O'): 1})) + self.assertEqual(f, Formula({Atom.H: 2, Atom.O: 1})) def test_formula_parse_with_implicit_digit(self): f = Formula.parse('C2H5NO2') - self.assertEqual(f, Formula({Atom('C'): 2, Atom('H'): 5, Atom('N'): 1, Atom('O'): 2})) + self.assertEqual(f, Formula( + {Atom.C: 2, Atom.H: 5, Atom.N: 1, Atom.O: 2})) def test_formula_parse_with_wide_element(self): f = Formula.parse('ZnO') - self.assertEqual(f, Formula({Atom('Zn'): 1, Atom('O'): 1})) + self.assertEqual(f, Formula({Atom.Zn: 1, Atom.O: 1})) def test_formula_parse_with_wide_count(self): f = Formula.parse('C6H10O2') - self.assertEqual(f, Formula({Atom('C'): 6, Atom('H'): 10, Atom('O'): 2})) + self.assertEqual(f, Formula({Atom.C: 6, Atom.H: 10, Atom.O: 2})) def test_formula_parse_with_implicitly_counted_subgroup(self): f = Formula.parse('C2H6O2(CH)') - self.assertEqual(f, Formula({Atom('C'): 2, Atom('H'): 6, Atom('O'): 2, - Formula({Atom('C'): 1, Atom('H'): 1}): 1})) + self.assertEqual(f, Formula({ + Atom.C: 2, Atom.H: 6, Atom.O: 2, + Formula({Atom.C: 1, Atom.H: 1}): 1})) def test_formula_parse_with_counted_subgroup(self): f = Formula.parse('C2H6O2(CH)2') - self.assertEqual(f, Formula({Atom('C'): 2, Atom('H'): 6, Atom('O'): 2, - Formula({Atom('C'): 1, Atom('H'): 1}): 2})) + self.assertEqual(f, Formula({ + Atom.C: 2, Atom.H: 6, Atom.O: 2, + Formula({Atom.C: 1, Atom.H: 1}): 2})) def test_formula_parse_with_two_identical_counted_subgroups(self): f = Formula.parse('C2H6O2(CH)2(CH)2') - self.assertEqual(f, Formula({Atom('C'): 2, Atom('H'): 6, Atom('O'): 2, - Formula({Atom('C'): 1, Atom('H'): 1}): 4})) + self.assertEqual(f, Formula({ + Atom.C: 2, Atom.H: 6, Atom.O: 2, + Formula({Atom.C: 1, Atom.H: 1}): 4})) def test_formula_parse_with_two_distinct_counted_subgroups(self): f = Formula.parse('C2H6O2(CH)2(CH2)2') - self.assertEqual(f, Formula({Atom('C'): 2, Atom('H'): 6, Atom('O'): 2, - Formula({Atom('C'): 1, Atom('H'): 1}): 2, - Formula({Atom('C'): 1, Atom('H'): 2}): 2})) + self.assertEqual(f, Formula({ + Atom.C: 2, Atom.H: 6, Atom.O: 2, + Formula({Atom.C: 1, Atom.H: 1}): 2, + Formula({Atom.C: 1, Atom.H: 2}): 2})) def test_formula_parse_with_wide_counted_subgroup(self): f = Formula.parse('C2(CH)10NO2') - self.assertEqual(f, Formula({Atom('C'): 2, Atom('N'): 1, Atom('O'): 2, - Formula({Atom('C'): 1, Atom('H'): 1}): 10})) + self.assertEqual(f, Formula({ + Atom.C: 2, Atom.N: 1, Atom.O: 2, + Formula({Atom.C: 1, Atom.H: 1}): 10})) def test_formula_parse_with_radical(self): f = Formula.parse('C2H4NO2R') - self.assertEqual(f, Formula({Atom('C'): 2, Atom('H'): 4, Atom('N'): 1, - Atom('O'): 2, Radical('R'): 1})) + self.assertEqual(f, Formula({ + Atom.C: 2, Atom.H: 4, Atom.N: 1, Atom.O: 2, Radical('R'): 1})) def test_formula_parse_with_numbered_radical(self): f = Formula.parse('C2H4NO2(R1)') - self.assertEqual(f, Formula({Atom('C'): 2, Atom('H'): 4, Atom('N'): 1, - Atom('O'): 2, Radical('R1'): 1})) + self.assertEqual(f, Formula({ + Atom.C: 2, Atom.H: 4, Atom.N: 1, Atom.O: 2, Radical('R1'): 1})) if __name__ == '__main__': diff --git a/psamm/tests/test_randomsparse.py b/psamm/tests/test_randomsparse.py new file mode 100644 index 00000000..3ea27675 --- /dev/null +++ b/psamm/tests/test_randomsparse.py @@ -0,0 +1,219 @@ +# This file is part of PSAMM. +# +# PSAMM is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# PSAMM is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with PSAMM. If not, see . +# +# Copyright 2014-2015 Jon Lund Steffensen +# Copyright 2015 Keith Dufault-Thompson +# Copyright 2016 Chao Liu + +import os +import unittest +import tempfile +import shutil + +from psamm.datasource.native import NativeModel +from psamm.metabolicmodel import MetabolicModel +from psamm.database import DictDatabase +from psamm.datasource.reaction import parse_reaction +from psamm.expression import boolean +from psamm.lpsolver import generic +from psamm import fluxanalysis, randomsparse + + +class TestGetGeneAssociation(unittest.TestCase): + def setUp(self): + self._model_dir = tempfile.mkdtemp() + with open(os.path.join(self._model_dir, 'model.yaml'), 'w') as f: + f.write('\n'.join([ + '---', + 'reactions:', + ' - id: rxn_1', + ' equation: A[e] <=> B[c]', + ' genes: gene_1 and gene_2', + ' - id: rxn_2', + ' equation: A[e] => C[c]', + ' genes: gene_3', + ' - id: rxn_3', + ' equation: A[e] => D[e]', + ' - id: rxn_4', + ' equation: C[c] => D[e]', + ' genes: [gene_4, gene_5, gene_6]', + 'compounds:', + ' - id: A', + ' - id: B', + ' - id: C', + ' - id: D', + 'media:', + ' - compartment: e', + ' compounds:', + ' - id: A', + ' reaction: rxn_5', + ' - id: D', + ' reaction: rxn_6', + 'model:', + ' - reactions:', + ' - rxn_3', + ' - rxn_4', + ])) + self._model = NativeModel(self._model_dir) + + def tearDown(self): + shutil.rmtree(self._model_dir) + + def test_get_gene_association(self): + expected_association = { + 'rxn_1': boolean.Expression('gene_1 and gene_2'), + 'rxn_2': boolean.Expression('gene_3'), + 'rxn_4': boolean.Expression('gene_4 and gene_6 and gene_5') + } + + self.assertEqual( + dict(randomsparse.get_gene_associations(self._model)), + expected_association) + + +class TestGetExchangeReactions(unittest.TestCase): + def setUp(self): + self._database = DictDatabase() + self._database.set_reaction('rxn_1', parse_reaction('|A| <=>')) + self._database.set_reaction('rxn_2', parse_reaction('|A| <=> |B|')) + self._database.set_reaction('rxn_3', parse_reaction('|C| <=> |B|')) + self._database.set_reaction('rxn_4', parse_reaction('|C| <=>')) + self._mm = MetabolicModel.load_model( + self._database, self._database.reactions) + + def test_get_exchange_reactions(self): + expected_reactions = {'rxn_1', 'rxn_4'} + self.assertEqual( + set(randomsparse.get_exchange_reactions(self._mm)), + expected_reactions) + + +class TestReactionDeletionStrategy(unittest.TestCase): + def setUp(self): + self._database = DictDatabase() + self._database.set_reaction('rxn_1', parse_reaction('|A| <=>')) + self._database.set_reaction('rxn_2', parse_reaction('|A| <=> |B|')) + self._database.set_reaction('rxn_3', parse_reaction('|C| <=> |B|')) + self._database.set_reaction('rxn_4', parse_reaction('|C| <=>')) + self._mm = MetabolicModel.load_model( + self._database, self._database.reactions) + + self._strategy = randomsparse.ReactionDeletionStrategy(self._mm) + + def test_method_get_all(self): + expected_total = {'rxn_1', 'rxn_2', 'rxn_3', 'rxn_4'} + self.assertEqual(set(self._strategy.entities), expected_total) + + def test_method_tests(self): + expected_reactions = { + 'rxn_1': {'rxn_1'}, + 'rxn_2': {'rxn_2'}, + 'rxn_3': {'rxn_3'}, + 'rxn_4': {'rxn_4'} + } + self.assertEqual(dict(self._strategy.iter_tests()), expected_reactions) + + +class TestGeneDeletionStrategy(unittest.TestCase): + def setUp(self): + self._database = DictDatabase() + self._database.set_reaction('rxn_1', parse_reaction('|A| <=>')) + self._database.set_reaction('rxn_2', parse_reaction('|A| <=> |B|')) + self._database.set_reaction('rxn_3', parse_reaction('|C| <=> |B|')) + self._database.set_reaction('rxn_4', parse_reaction('|C| <=>')) + self._mm = MetabolicModel.load_model( + self._database, self._database.reactions) + self._assoc = {'rxn_2': boolean.Expression('gene_1 or gene_2')} + + self._strategy = randomsparse.GeneDeletionStrategy( + self._mm, self._assoc) + + def test_method_get_all(self): + expected_total = {'gene_1', 'gene_2'} + self.assertEqual(set(self._strategy.entities), expected_total) + + def test_method_tests_and_delete(self): + expected_genes = {'gene_1', 'gene_2'} + expected_reaction_set = {'rxn_2'} + test_dict = {} + + for i, (gene, deleted_reac) in enumerate(self._strategy.iter_tests()): + self._strategy.delete(gene, deleted_reac) + if i == 0: + self.assertEqual(deleted_reac, set()) + else: + self.assertEqual(deleted_reac, {'rxn_2'}) + test_dict[gene] = deleted_reac + + self.assertTrue(all(x in test_dict for x in expected_genes)) + self.assertTrue(expected_reaction_set in test_dict.values()) + + +class TestRandomSparse(unittest.TestCase): + def setUp(self): + self._database = DictDatabase() + self._database.set_reaction('rxn_1', parse_reaction('A[e] => B[e]')) + self._database.set_reaction('rxn_2', parse_reaction('B[e] => C[e]')) + self._database.set_reaction('rxn_3', parse_reaction('B[e] => D[e]')) + self._database.set_reaction('rxn_4', parse_reaction('C[e] => E[e]')) + self._database.set_reaction('rxn_5', parse_reaction('D[e] => E[e]')) + self._database.set_reaction('rxn_6', parse_reaction('E[e] =>')) + self._database.set_reaction('ex_A', parse_reaction('A[e] <=>')) + + self._mm = MetabolicModel.load_model( + self._database, self._database.reactions) + self._assoc = { + 'rxn_1': boolean.Expression('gene_1'), + 'rxn_2': boolean.Expression('gene_2 or gene_3'), + 'rxn_5': boolean.Expression('gene_3 and gene_4') + } + self._obj_reaction = 'rxn_6' + + try: + self._solver = generic.Solver() + except generic.RequirementsError: + self.skipTest('Unable to find an LP solver for tests') + + self._prob = fluxanalysis.FluxBalanceProblem(self._mm, self._solver) + + def test_random_sparse_reaction_strategy(self): + expected = [ + ({'rxn_1', 'rxn_6', 'ex_A', 'rxn_2', 'rxn_4'}, {'rxn_3', 'rxn_5'}), + ({'rxn_1', 'rxn_6', 'ex_A', 'rxn_3', 'rxn_5'}, {'rxn_2', 'rxn_4'}) + ] + + strategy = randomsparse.ReactionDeletionStrategy(self._mm) + essential, deleted = randomsparse.random_sparse( + strategy, + self._prob, + self._obj_reaction, + flux_threshold=100) + + self.assertTrue((essential, deleted) in expected) + + def test_random_sparse_gene_strategy(self): + expected = [ + ({'gene_1', 'gene_2'}, {'gene_3', 'gene_4'}), + ({'gene_1', 'gene_3'}, {'gene_2', 'gene_4'}) + ] + strategy = randomsparse.GeneDeletionStrategy( + self._mm, self._assoc) + essential, deleted = randomsparse.random_sparse( + strategy, + self._prob, + self._obj_reaction, + flux_threshold=100) + + self.assertTrue((essential, deleted) in expected) diff --git a/psamm/tests/test_yaml.py b/psamm/tests/test_yaml.py index 757e199d..9330d678 100644 --- a/psamm/tests/test_yaml.py +++ b/psamm/tests/test_yaml.py @@ -369,5 +369,22 @@ def test_parse_model_yaml_file(self): self.assertEqual(reactions, ['rxn_1', 'rxn_2', 'rxn_3', 'rxn_4']) +class TestCheckId(unittest.TestCase): + def test_check_id_none(self): + with self.assertRaises(native.ParseError): + native._check_id(None, 'Compound') + + def test_check_id_not_string(self): + with self.assertRaises(native.ParseError): + native._check_id(False, 'Compound') + + def test_check_id_empty_string(self): + with self.assertRaises(native.ParseError): + native._check_id('', 'Reaction') + + def test_check_id_success(self): + native._check_id(u'\u222b', 'Compound') + + if __name__ == '__main__': unittest.main() diff --git a/setup.py b/setup.py index 938b4f3d..34e86e93 100755 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ setup( name='psamm', - version='0.21', + version='0.22', description='PSAMM metabolic modeling tools', maintainer='Jon Lund Steffensen', maintainer_email='jon_steffensen@uri.edu',