From 7569a814f4a9bb20f5faa7590b4b2f4529dc2717 Mon Sep 17 00:00:00 2001 From: Eduard Kerkhoven Date: Sun, 13 Oct 2024 17:12:34 +0200 Subject: [PATCH] feat: alternative path of DLKcat in/output --- .../gather_kcats/readDLKcatOutput.html | 148 +++++---- doc/src/geckomat/gather_kcats/runDLKcat.html | 93 +++--- .../gather_kcats/writeDLKcatInput.html | 298 +++++++++--------- src/geckomat/gather_kcats/readDLKcatOutput.m | 7 +- src/geckomat/gather_kcats/runDLKcat.m | 23 +- src/geckomat/gather_kcats/writeDLKcatInput.m | 7 +- 6 files changed, 302 insertions(+), 274 deletions(-) diff --git a/doc/src/geckomat/gather_kcats/readDLKcatOutput.html b/doc/src/geckomat/gather_kcats/readDLKcatOutput.html index 6afb8e597..cb45963fd 100644 --- a/doc/src/geckomat/gather_kcats/readDLKcatOutput.html +++ b/doc/src/geckomat/gather_kcats/readDLKcatOutput.html @@ -34,10 +34,9 @@

DESCRIPTION ^SOURCE CODE ^% 0007 % Input: 0008 % model an ecModel in GECKO 3 format (with ecModel.ec structure) -0009 % outFile name and path of the DLKcat output file. If nothing is -0010 % provided, an attempt will be made to read -0011 % data/DLKcat.tsv from the obj.params.path folder -0012 % specified in the modelAdapter. -0013 % modelAdapter a loaded model adapter (Optional, will otherwise use the -0014 % default model adapter). -0015 % -0016 % Output: -0017 % kcatList structure array with list of DLKcat derived kcat values, -0018 % with separate entries for each kcat value -0019 % source 'DLKcat' -0020 % rxns reaction identifiers -0021 % genes gene identifiers -0022 % substrate substrate names -0023 % kcat predicted kcat value in /sec -0024 % -0025 % Usage: -0026 % kcatList = readDLKcatOutput(model, outFile, modelAdapter) -0027 -0028 if nargin < 3 || isempty(modelAdapter) -0029 modelAdapter = ModelAdapterManager.getDefault(); -0030 if isempty(modelAdapter) -0031 error('Either send in a modelAdapter or set the default model adapter in the ModelAdapterManager.') -0032 end -0033 end -0034 params = modelAdapter.params; -0035 -0036 if nargin<2 || isempty(outFile) -0037 fID = fopen(fullfile(params.path,'data','DLKcat.tsv'),'r'); -0038 else -0039 fID = fopen(outFile); -0040 end -0041 DLKcatOutput = textscan(fID,'%s %s %s %s %s %s','Delimiter','\t','HeaderLines',1); -0042 fclose(fID); -0043 -0044 % Check that DLKcat output file and model match (not fool proof, but good enough) -0045 [rxns, genes, subs, kcats] = deal(DLKcatOutput{[1,2,3,6]}); -0046 -0047 % Check if it contains any kcat values -0048 if all(cellfun(@isempty,kcats)) || all(strcmpi(kcats,'NA')) -0049 error('DLKcat file does not contain any kcat values, please run runDLKcat() first.') -0050 end -0051 -0052 % Check that all substrates are in the model -0053 matchMets = ismember(subs,model.metNames); -0054 if ~all(matchMets) -0055 errorText = 'DLKcat was likely run with an input file that was generated from another ecModel, as the following substrates from DLKcat output cannot be found in model.metNames:'; -0056 dispEM(errorText,true,subs(~matchMets),false) -0057 end -0058 -0059 % Check that all reactions are in model.ec.rxns -0060 matchRxns = ismember(rxns,model.ec.rxns); -0061 if ~all(matchRxns) -0062 errorText = 'DLKcat was likely run with an input file that was generated from another ecModel, as the following reactions from DLKcat output cannot be found in model.metNames:'; -0063 dispEM(errorText,true,rxns(~matchRxns),false) -0064 end -0065 -0066 % Filter out entries with no numeric value -0067 noOutput = cellfun(@isempty,regexpi(kcats,'[0-9]')); -0068 kcats = str2double(kcats(~noOutput)); -0069 rxns(noOutput) = []; -0070 genes(noOutput) = []; -0071 subs(noOutput) = []; -0072 -0073 % Make kcatList structure -0074 kcatList.source = 'DLKcat'; -0075 kcatList.rxns = rxns; -0076 kcatList.genes = genes; -0077 kcatList.substrates = subs; -0078 kcatList.kcats = kcats; -0079 end +0009 % outFile name and path of the DLKcat output file. (Optional, +0010 % default is data/DLKcat.tsv from the obj.params.path +0011 % folder specified in the modelAdapter) +0012 % modelAdapter a loaded model adapter (Optional, will otherwise use the +0013 % default model adapter). +0014 % +0015 % Output: +0016 % kcatList structure array with list of DLKcat derived kcat values, +0017 % with separate entries for each kcat value +0018 % source 'DLKcat' +0019 % rxns reaction identifiers +0020 % genes gene identifiers +0021 % substrate substrate names +0022 % kcat predicted kcat value in /sec +0023 % +0024 % Usage: +0025 % kcatList = readDLKcatOutput(model, outFile, modelAdapter) +0026 +0027 if nargin < 3 || isempty(modelAdapter) +0028 modelAdapter = ModelAdapterManager.getDefault(); +0029 if isempty(modelAdapter) +0030 error('Either send in a modelAdapter or set the default model adapter in the ModelAdapterManager.') +0031 end +0032 end +0033 params = modelAdapter.params; +0034 +0035 if nargin<2 || isempty(outFile) +0036 fID = fopen(fullfile(params.path,'data','DLKcat.tsv'),'r'); +0037 else +0038 fID = fopen(outFile); +0039 end +0040 DLKcatOutput = textscan(fID,'%s %s %s %s %s %s','Delimiter','\t','HeaderLines',1); +0041 fclose(fID); +0042 +0043 % Check that DLKcat output file and model match (not fool proof, but good enough) +0044 [rxns, genes, subs, kcats] = deal(DLKcatOutput{[1,2,3,6]}); +0045 +0046 % Check if it contains any kcat values +0047 if all(cellfun(@isempty,kcats)) || all(strcmpi(kcats,'NA')) +0048 error('DLKcat file does not contain any kcat values, please run runDLKcat() first.') +0049 end +0050 +0051 % Check that all substrates are in the model +0052 matchMets = ismember(subs,model.metNames); +0053 if ~all(matchMets) +0054 errorText = 'DLKcat was likely run with an input file that was generated from another ecModel, as the following substrates from DLKcat output cannot be found in model.metNames:'; +0055 dispEM(errorText,true,subs(~matchMets),false) +0056 end +0057 +0058 % Check that all reactions are in model.ec.rxns +0059 matchRxns = ismember(rxns,model.ec.rxns); +0060 if ~all(matchRxns) +0061 errorText = 'DLKcat was likely run with an input file that was generated from another ecModel, as the following reactions from DLKcat output cannot be found in model.metNames:'; +0062 dispEM(errorText,true,rxns(~matchRxns),false) +0063 end +0064 +0065 % Filter out entries with no numeric value +0066 noOutput = cellfun(@isempty,regexpi(kcats,'[0-9]')); +0067 kcats = str2double(kcats(~noOutput)); +0068 rxns(noOutput) = []; +0069 genes(noOutput) = []; +0070 subs(noOutput) = []; +0071 +0072 % Make kcatList structure +0073 kcatList.source = 'DLKcat'; +0074 kcatList.rxns = rxns; +0075 kcatList.genes = genes; +0076 kcatList.substrates = subs; +0077 kcatList.kcats = kcats; +0078 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/src/geckomat/gather_kcats/runDLKcat.html b/doc/src/geckomat/gather_kcats/runDLKcat.html index b11b22eef..313f2fbe8 100644 --- a/doc/src/geckomat/gather_kcats/runDLKcat.html +++ b/doc/src/geckomat/gather_kcats/runDLKcat.html @@ -24,7 +24,7 @@

PURPOSE ^runDLKcat

SYNOPSIS ^

-
function runDLKcat(modelAdapter)
+
function runDLKcat(modelAdapter,filePath)

DESCRIPTION ^

 runDLKcat
@@ -36,6 +36,8 @@ 

DESCRIPTION ^CROSS-REFERENCE INFORMATION ^
 
 
 <h2><a name=SOURCE CODE ^

-
0001 function runDLKcat(modelAdapter)
+
0001 function runDLKcat(modelAdapter,filePath)
 0002 % runDLKcat
 0003 %   Runs DLKcat to predict kcat values from a Docker image. Once DLKcat is succesfully
 0004 %   run, the DLKcatFile will be overwritten with the DLKcat
@@ -63,46 +65,57 @@ 

SOURCE CODE ^% Input 0009 % modelAdapter a loaded model adapter. (Optional, will otherwise use 0010 % the default model adapter) -0011 % -0012 % NOTE: 1. Requires Docker to be installed, and Docker Desktop running. Visit "https://www.docker.com" -0013 % 2. Runtime will depend on whether the image is to be downloaded or not. -0014 -0015 if nargin < 1 || isempty(modelAdapter) -0016 modelAdapter = ModelAdapterManager.getDefault(); -0017 if isempty(modelAdapter) -0018 error('Either send in a modelAdapter or set the default model adapter in the ModelAdapterManager.') -0019 end -0020 end -0021 -0022 params = modelAdapter.params; -0023 % Make sure path is full, not relative -0024 [~, params.path] = fileattrib(params.path); -0025 params.path=params.path.Name; -0026 -0027 %% Check and install requirements -0028 % On macOS, Docker might not be properly loaded if MATLAB is started via -0029 % launcher and not terminal. -0030 if ismac -0031 setenv('PATH', strcat('/usr/local/bin', ':', getenv("PATH"))); +0011 % filePath path to the DLKcat.tsv file. (Optional, will otherwise +0012 % assume data/DLKcat.tsv) +0013 % +0014 % NOTE: 1. Requires Docker to be installed, and Docker Desktop running. Visit "https://www.docker.com" +0015 % 2. Runtime will depend on whether the image is to be downloaded or not. +0016 +0017 if nargin < 1 || isempty(modelAdapter) +0018 modelAdapter = ModelAdapterManager.getDefault(); +0019 if isempty(modelAdapter) +0020 error('Either send in a modelAdapter or set the default model adapter in the ModelAdapterManager.') +0021 end +0022 end +0023 params = modelAdapter.params; +0024 % Make sure path is full, not relative +0025 [~, params.path] = fileattrib(params.path); +0026 params.path=params.path.Name; +0027 +0028 if nargin < 2 || isempty(filePath) +0029 filePath = fullfile(params.path,'data','DLKcat.tsv'); +0030 elseif strcmp(filePath(end),{'\','/'}) +0031 filePath = fullfile(filePath,'DLKcat.tsv'); 0032 end -0033 -0034 % Check if Docker is installed -0035 [checks.docker.status, checks.docker.out] = system('docker --version'); -0036 if checks.docker.status ~= 0 -0037 error('Cannot find Docker, make sure it is installed. If it is, it might be required to start Matlab from the command-line instead of the launcher in order for Docker to be detected and used.') -0038 end -0039 -0040 disp('Running DLKcat prediction, this may take many minutes, especially the first time.') -0041 status = system(['docker run --rm -v "' fullfile(params.path,'/data') '":/data ghcr.io/sysbiochalmers/dlkcat-gecko:0.1 /bin/bash -c "python DLKcat.py /data/DLKcat.tsv /data/DLKcatOutput.tsv"']); -0042 -0043 if status == 0 && exist(fullfile(params.path,'data/DLKcatOutput.tsv')) -0044 delete(fullfile(params.path,'/data/DLKcat.tsv')); -0045 movefile(fullfile(params.path,'/data/DLKcatOutput.tsv'), fullfile(params.path,'/data/DLKcat.tsv')); -0046 disp('DKLcat prediction completed.'); -0047 else -0048 error('DLKcat encountered an error or it did not create any output file.') +0033 filePath = checkFileExistence(filePath,1); +0034 +0035 copyfile(filePath, fullfile(params.path,'data','tempDLKcat.tsv')); +0036 +0037 +0038 %% Check and install requirements +0039 % On macOS, Docker might not be properly loaded if MATLAB is started via +0040 % launcher and not terminal. +0041 if ismac +0042 setenv('PATH', strcat('/usr/local/bin', ':', getenv("PATH"))); +0043 end +0044 +0045 % Check if Docker is installed +0046 [checks.docker.status, checks.docker.out] = system('docker --version'); +0047 if checks.docker.status ~= 0 +0048 error('Cannot find Docker, make sure it is installed. If it is, it might be required to start Matlab from the command-line instead of the launcher in order for Docker to be detected and used.') 0049 end -0050

+0050 +0051 disp('Running DLKcat prediction, this may take many minutes, especially the first time.') +0052 status = system(['docker run --rm -v "' fullfile(params.path,'/data') '":/data ghcr.io/sysbiochalmers/dlkcat-gecko:0.1 /bin/bash -c "python DLKcat.py /data/tempDLKcat.tsv /data/tempDLKcatOutput.tsv"']); +0053 delete(fullfile(params.path,'/data/tempDLKcat.tsv')); +0054 +0055 if status == 0 && exist(fullfile(params.path,'data/tempDLKcatOutput.tsv')) +0056 movefile(fullfile(params.path,'/data/tempDLKcatOutput.tsv'), filePath); +0057 disp('DKLcat prediction completed.'); +0058 else +0059 error('DLKcat encountered an error or it did not create any output file.') +0060 end +0061

Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/src/geckomat/gather_kcats/writeDLKcatInput.html b/doc/src/geckomat/gather_kcats/writeDLKcatInput.html index 030c07d7b..b8a1b4ae2 100644 --- a/doc/src/geckomat/gather_kcats/writeDLKcatInput.html +++ b/doc/src/geckomat/gather_kcats/writeDLKcatInput.html @@ -41,8 +41,9 @@

DESCRIPTION ^SOURCE CODE ^% default model adapter). 0014 % onlyWithSmiles logical whether to only include metabolites with SMILES 0015 % (optional, default true) -0016 % filename Filename (Optional). Normally this parameter should not be -0017 % supplied, but it is useful for test cases. -0018 % overwrite logical whether existing file should be overwritten. -0019 % (Optional, default false, to prevent overwriting file -0020 % that already contains DLKcat-predicted kcat values). -0021 % -0022 % Output: -0023 % writtenTable The table written, mainly to be used for testing purposes. -0024 % -0025 % Usage: -0026 % writtenTable = writeDLKcatInput(model, ecRxns, modelAdapter, onlyWithSmiles, filename, overwrite) -0027 -0028 [geckoPath, ~] = findGECKOroot(); -0029 -0030 if nargin<2 || isempty(ecRxns) -0031 ecRxns = true(numel(model.ec.rxns),1); -0032 elseif ~logical(ecRxns) -0033 error('ecRxns should be provided as logical vector') -0034 elseif numel(ecRxns)~=numel(model.ec.rxns) -0035 error('Length of ecRxns is not the same as model.ec.rxns') -0036 end -0037 ecRxns = find(ecRxns); % Change to indices -0038 -0039 if nargin < 3 || isempty(modelAdapter) -0040 modelAdapter = ModelAdapterManager.getDefault(); -0041 if isempty(modelAdapter) -0042 error('Either send in a modelAdapter or set the default model adapter in the ModelAdapterManager.') -0043 end -0044 end -0045 params = modelAdapter.params; -0046 -0047 if nargin<4 || isempty(onlyWithSmiles) -0048 onlyWithSmiles=true; -0049 end -0050 -0051 if nargin<5 || isempty(filename) -0052 filename = fullfile(params.path,'data','DLKcat.tsv'); -0053 end -0054 -0055 if nargin<6 || isempty(overwrite) || ~overwrite % If is true -0056 if exist(filename,'file') -0057 error([filename ' already exists, either delete it first, or set the overwrite input argument as true']) -0058 end -0059 end -0060 -0061 if ~model.ec.geckoLight -0062 origRxns = model.ec.rxns; -0063 else -0064 origRxns = extractAfter(model.ec.rxns,4); -0065 end -0066 origRxnsToInclude = origRxns(ecRxns); -0067 -0068 % Map back to original reactions, to extract substrates -0069 [sanityCheck,origRxnIdxs] = ismember(origRxnsToInclude,model.rxns); -0070 if ~all(sanityCheck) -0071 error('Not all reactions in model.ec.rxns are found in model.rxns') -0072 end -0073 -0074 % Ignore selected metabolites (metal ions, proteins etc.). First check by -0075 % name (case insensitive, without white spaces and special characters), -0076 % then also try to match with metSmiles (if available). -0077 metsNoSpecialChars = lower(regexprep(model.metNames,'[^0-9a-zA-Z]+','')); -0078 if exist(fullfile(params.path,'data','DLKcatIgnoreMets.tsv'),'file') -0079 fID = fopen(fullfile(params.path,'data','DLKcatIgnoreMets.tsv')); -0080 else -0081 fID = fopen(fullfile(geckoPath,'databases','DLKcatIgnoreMets.tsv')); -0082 end -0083 fileData = textscan(fID,'%s %s','delimiter','\t'); -0084 fclose(fID); -0085 [ignoreMets, ignoreSmiles] = deal(fileData{[1,2]}); -0086 ignoreMets = lower(regexprep(ignoreMets,'[^0-9a-zA-Z]+','')); -0087 ignoreSmiles(cellfun(@isempty,ignoreSmiles)) = []; -0088 -0089 ignoreMetsIdx = logical(ismember(metsNoSpecialChars,ignoreMets)); -0090 if isfield(model,'metSmiles') -0091 ignoreMetsIdx = ignoreMetsIdx | logical(ismember(model.metSmiles,ignoreSmiles)); -0092 end -0093 % Also leave out protein-usage pseudometabolites -0094 ignoreMetsIdx = ignoreMetsIdx | startsWith(model.mets,'prot_'); -0095 reducedS = model.S; -0096 reducedS(ignoreMetsIdx,:) = 0; -0097 -0098 % Ignore currency metabolites if they occur in pairs. First check by -0099 % name (case insensitive, without white spaces and special characters), -0100 % then also try to match with metSmiles (if available). -0101 if exist(fullfile(params.path,'data','DLKcatCurrencyMets.tsv'),'file') -0102 fID = fopen(fullfile(params.path,'data','DLKcatCurrencyMets.tsv')); -0103 else -0104 fID = fopen(fullfile(geckoPath,'databases','DLKcatCurrencyMets.tsv')); -0105 end -0106 fileData = textscan(fID,'%s %s','delimiter','\t'); -0107 fclose(fID); -0108 [currencyMets(:,1), currencyMets(:,2)] = deal(fileData{[1,2]}); -0109 currencyMets = lower(regexprep(currencyMets,'[^0-9a-zA-Z]+','')); -0110 -0111 for i=1:size(currencyMets,1) -0112 subs = strcmp(currencyMets(i,1),metsNoSpecialChars); -0113 prod = strcmp(currencyMets(i,2),metsNoSpecialChars); -0114 [~,subsRxns]=find(reducedS(subs,:)); -0115 [~,prodRxns]=find(reducedS(prod,:)); -0116 pairRxns = intersect(subsRxns,prodRxns); -0117 tempRedS=reducedS; -0118 tempRedS([find(subs);find(prod)],pairRxns) = 0; -0119 % Do not remove currency mets if no substrate remains -0120 rxnsWithRemainingSubstrates = any(tempRedS(:,pairRxns) < 0,1); -0121 reducedS([find(subs);find(prod)],intersect(pairRxns,pairRxns(rxnsWithRemainingSubstrates))) = 0; -0122 end -0123 -0124 %filter out the reactions we're not interested in - will solve the problem for both full and light -0125 clearedRedS = reducedS(:,origRxnIdxs); -0126 rxnsToClear = true(length(origRxnIdxs),1); -0127 rxnsToClear(ecRxns) = false; -0128 clearedRedS(:,rxnsToClear) = 0; -0129 -0130 % Enumerate all substrates for each reaction -0131 [substrates, reactions] = find(clearedRedS<0); %the reactions here are in model.ec.rxns space +0016 % filename path to the input file, including the filename and .tsv +0017 % extension (Optional, default is data/DLKcat.tsv from +0018 % the obj.params.path folder specified in the modelAdapter) +0019 % overwrite logical whether existing file should be overwritten. +0020 % (Optional, default false, to prevent overwriting file +0021 % that already contains DLKcat-predicted kcat values). +0022 % +0023 % Output: +0024 % writtenTable The table written, mainly to be used for testing purposes. +0025 % +0026 % Usage: +0027 % writtenTable = writeDLKcatInput(model, ecRxns, modelAdapter, onlyWithSmiles, filename, overwrite) +0028 +0029 [geckoPath, ~] = findGECKOroot(); +0030 +0031 if nargin<2 || isempty(ecRxns) +0032 ecRxns = true(numel(model.ec.rxns),1); +0033 elseif ~logical(ecRxns) +0034 error('ecRxns should be provided as logical vector') +0035 elseif numel(ecRxns)~=numel(model.ec.rxns) +0036 error('Length of ecRxns is not the same as model.ec.rxns') +0037 end +0038 ecRxns = find(ecRxns); % Change to indices +0039 +0040 if nargin < 3 || isempty(modelAdapter) +0041 modelAdapter = ModelAdapterManager.getDefault(); +0042 if isempty(modelAdapter) +0043 error('Either send in a modelAdapter or set the default model adapter in the ModelAdapterManager.') +0044 end +0045 end +0046 params = modelAdapter.params; +0047 +0048 if nargin<4 || isempty(onlyWithSmiles) +0049 onlyWithSmiles=true; +0050 end +0051 +0052 if nargin<5 || isempty(filename) +0053 filename = fullfile(params.path,'data','DLKcat.tsv'); +0054 elseif ~endsWith(filename,'.tsv') +0055 error('If filename is provided, it should include the .tsv extension.') +0056 end +0057 +0058 if nargin<6 || isempty(overwrite) || ~overwrite % If is true +0059 if exist(filename,'file') +0060 error([filename ' already exists, either delete it first, or set the overwrite input argument as true']) +0061 end +0062 end +0063 +0064 if ~model.ec.geckoLight +0065 origRxns = model.ec.rxns; +0066 else +0067 origRxns = extractAfter(model.ec.rxns,4); +0068 end +0069 origRxnsToInclude = origRxns(ecRxns); +0070 +0071 % Map back to original reactions, to extract substrates +0072 [sanityCheck,origRxnIdxs] = ismember(origRxnsToInclude,model.rxns); +0073 if ~all(sanityCheck) +0074 error('Not all reactions in model.ec.rxns are found in model.rxns') +0075 end +0076 +0077 % Ignore selected metabolites (metal ions, proteins etc.). First check by +0078 % name (case insensitive, without white spaces and special characters), +0079 % then also try to match with metSmiles (if available). +0080 metsNoSpecialChars = lower(regexprep(model.metNames,'[^0-9a-zA-Z]+','')); +0081 if exist(fullfile(params.path,'data','DLKcatIgnoreMets.tsv'),'file') +0082 fID = fopen(fullfile(params.path,'data','DLKcatIgnoreMets.tsv')); +0083 else +0084 fID = fopen(fullfile(geckoPath,'databases','DLKcatIgnoreMets.tsv')); +0085 end +0086 fileData = textscan(fID,'%s %s','delimiter','\t'); +0087 fclose(fID); +0088 [ignoreMets, ignoreSmiles] = deal(fileData{[1,2]}); +0089 ignoreMets = lower(regexprep(ignoreMets,'[^0-9a-zA-Z]+','')); +0090 ignoreSmiles(cellfun(@isempty,ignoreSmiles)) = []; +0091 +0092 ignoreMetsIdx = logical(ismember(metsNoSpecialChars,ignoreMets)); +0093 if isfield(model,'metSmiles') +0094 ignoreMetsIdx = ignoreMetsIdx | logical(ismember(model.metSmiles,ignoreSmiles)); +0095 end +0096 % Also leave out protein-usage pseudometabolites +0097 ignoreMetsIdx = ignoreMetsIdx | startsWith(model.mets,'prot_'); +0098 reducedS = model.S; +0099 reducedS(ignoreMetsIdx,:) = 0; +0100 +0101 % Ignore currency metabolites if they occur in pairs. First check by +0102 % name (case insensitive, without white spaces and special characters), +0103 % then also try to match with metSmiles (if available). +0104 if exist(fullfile(params.path,'data','DLKcatCurrencyMets.tsv'),'file') +0105 fID = fopen(fullfile(params.path,'data','DLKcatCurrencyMets.tsv')); +0106 else +0107 fID = fopen(fullfile(geckoPath,'databases','DLKcatCurrencyMets.tsv')); +0108 end +0109 fileData = textscan(fID,'%s %s','delimiter','\t'); +0110 fclose(fID); +0111 [currencyMets(:,1), currencyMets(:,2)] = deal(fileData{[1,2]}); +0112 currencyMets = lower(regexprep(currencyMets,'[^0-9a-zA-Z]+','')); +0113 +0114 for i=1:size(currencyMets,1) +0115 subs = strcmp(currencyMets(i,1),metsNoSpecialChars); +0116 prod = strcmp(currencyMets(i,2),metsNoSpecialChars); +0117 [~,subsRxns]=find(reducedS(subs,:)); +0118 [~,prodRxns]=find(reducedS(prod,:)); +0119 pairRxns = intersect(subsRxns,prodRxns); +0120 tempRedS=reducedS; +0121 tempRedS([find(subs);find(prod)],pairRxns) = 0; +0122 % Do not remove currency mets if no substrate remains +0123 rxnsWithRemainingSubstrates = any(tempRedS(:,pairRxns) < 0,1); +0124 reducedS([find(subs);find(prod)],intersect(pairRxns,pairRxns(rxnsWithRemainingSubstrates))) = 0; +0125 end +0126 +0127 %filter out the reactions we're not interested in - will solve the problem for both full and light +0128 clearedRedS = reducedS(:,origRxnIdxs); +0129 rxnsToClear = true(length(origRxnIdxs),1); +0130 rxnsToClear(ecRxns) = false; +0131 clearedRedS(:,rxnsToClear) = 0; 0132 -0133 % Enumerate all proteins for each reaction -0134 [proteins, ecRxns] = find(transpose(model.ec.rxnEnzMat(reactions,:))); +0133 % Enumerate all substrates for each reaction +0134 [substrates, reactions] = find(clearedRedS<0); %the reactions here are in model.ec.rxns space 0135 -0136 % Prepare output -0137 out(1,:) = model.ec.rxns(reactions(ecRxns)); -0138 out(2,:) = model.ec.genes(proteins); -0139 out(3,:) = model.metNames(substrates(ecRxns)); -0140 if isfield(model,'metSmiles') -0141 out(4,:) = model.metSmiles(substrates(ecRxns)); -0142 else -0143 out(4,:) = cell(numel(substrates(ecRxns)),1); -0144 end -0145 -0146 out(5,:) = model.ec.sequence(proteins); -0147 if onlyWithSmiles -0148 out(:,cellfun(@isempty,out(4,:))) = []; -0149 else -0150 out(4,cellfun(@isempty,out(4,:))) = {'None'}; -0151 end -0152 out(6,:) = cell(numel(out(1,:)),1); -0153 out(6,:) = {'NA'}; -0154 -0155 % Write file -0156 fID = fopen(filename,'w'); -0157 fprintf(fID,'%s\t%s\t%s\t%s\t%s\t%s\n',out{:}); -0158 fclose(fID); -0159 fprintf('Model-specific DLKcat input stored at %s\n',filename); -0160 -0161 writtenTable = out; -0162 end

+0136 % Enumerate all proteins for each reaction +0137 [proteins, ecRxns] = find(transpose(model.ec.rxnEnzMat(reactions,:))); +0138 +0139 % Prepare output +0140 out(1,:) = model.ec.rxns(reactions(ecRxns)); +0141 out(2,:) = model.ec.genes(proteins); +0142 out(3,:) = model.metNames(substrates(ecRxns)); +0143 if isfield(model,'metSmiles') +0144 out(4,:) = model.metSmiles(substrates(ecRxns)); +0145 else +0146 out(4,:) = cell(numel(substrates(ecRxns)),1); +0147 end +0148 +0149 out(5,:) = model.ec.sequence(proteins); +0150 if onlyWithSmiles +0151 out(:,cellfun(@isempty,out(4,:))) = []; +0152 else +0153 out(4,cellfun(@isempty,out(4,:))) = {'None'}; +0154 end +0155 out(6,:) = cell(numel(out(1,:)),1); +0156 out(6,:) = {'NA'}; +0157 +0158 % Write file +0159 fID = fopen(filename,'w'); +0160 fprintf(fID,'%s\t%s\t%s\t%s\t%s\t%s\n',out{:}); +0161 fclose(fID); +0162 fprintf('Model-specific DLKcat input stored at %s\n',filename); +0163 +0164 writtenTable = out; +0165 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/src/geckomat/gather_kcats/readDLKcatOutput.m b/src/geckomat/gather_kcats/readDLKcatOutput.m index 1effd5fe8..290c7d012 100644 --- a/src/geckomat/gather_kcats/readDLKcatOutput.m +++ b/src/geckomat/gather_kcats/readDLKcatOutput.m @@ -6,10 +6,9 @@ % % Input: % model an ecModel in GECKO 3 format (with ecModel.ec structure) -% outFile name and path of the DLKcat output file. If nothing is -% provided, an attempt will be made to read -% data/DLKcat.tsv from the obj.params.path folder -% specified in the modelAdapter. +% outFile name and path of the DLKcat output file. (Optional, +% default is data/DLKcat.tsv from the obj.params.path +% folder specified in the modelAdapter) % modelAdapter a loaded model adapter (Optional, will otherwise use the % default model adapter). % diff --git a/src/geckomat/gather_kcats/runDLKcat.m b/src/geckomat/gather_kcats/runDLKcat.m index 4701cc268..3c9807acb 100644 --- a/src/geckomat/gather_kcats/runDLKcat.m +++ b/src/geckomat/gather_kcats/runDLKcat.m @@ -1,4 +1,4 @@ -function runDLKcat(modelAdapter) +function runDLKcat(modelAdapter,filePath) % runDLKcat % Runs DLKcat to predict kcat values from a Docker image. Once DLKcat is succesfully % run, the DLKcatFile will be overwritten with the DLKcat @@ -8,6 +8,8 @@ function runDLKcat(modelAdapter) % Input % modelAdapter a loaded model adapter. (Optional, will otherwise use % the default model adapter) +% filePath path to the DLKcat.tsv file. (Optional, will otherwise +% assume data/DLKcat.tsv) % % NOTE: 1. Requires Docker to be installed, and Docker Desktop running. Visit "https://www.docker.com" % 2. Runtime will depend on whether the image is to be downloaded or not. @@ -18,12 +20,21 @@ function runDLKcat(modelAdapter) error('Either send in a modelAdapter or set the default model adapter in the ModelAdapterManager.') end end - params = modelAdapter.params; % Make sure path is full, not relative [~, params.path] = fileattrib(params.path); params.path=params.path.Name; +if nargin < 2 || isempty(filePath) + filePath = fullfile(params.path,'data','DLKcat.tsv'); +elseif strcmp(filePath(end),{'\','/'}) + filePath = fullfile(filePath,'DLKcat.tsv'); +end +filePath = checkFileExistence(filePath,1); + +copyfile(filePath, fullfile(params.path,'data','tempDLKcat.tsv')); + + %% Check and install requirements % On macOS, Docker might not be properly loaded if MATLAB is started via % launcher and not terminal. @@ -38,11 +49,11 @@ function runDLKcat(modelAdapter) end disp('Running DLKcat prediction, this may take many minutes, especially the first time.') -status = system(['docker run --rm -v "' fullfile(params.path,'/data') '":/data ghcr.io/sysbiochalmers/dlkcat-gecko:0.1 /bin/bash -c "python DLKcat.py /data/DLKcat.tsv /data/DLKcatOutput.tsv"']); +status = system(['docker run --rm -v "' fullfile(params.path,'/data') '":/data ghcr.io/sysbiochalmers/dlkcat-gecko:0.1 /bin/bash -c "python DLKcat.py /data/tempDLKcat.tsv /data/tempDLKcatOutput.tsv"']); +delete(fullfile(params.path,'/data/tempDLKcat.tsv')); -if status == 0 && exist(fullfile(params.path,'data/DLKcatOutput.tsv')) - delete(fullfile(params.path,'/data/DLKcat.tsv')); - movefile(fullfile(params.path,'/data/DLKcatOutput.tsv'), fullfile(params.path,'/data/DLKcat.tsv')); +if status == 0 && exist(fullfile(params.path,'data/tempDLKcatOutput.tsv')) + movefile(fullfile(params.path,'/data/tempDLKcatOutput.tsv'), filePath); disp('DKLcat prediction completed.'); else error('DLKcat encountered an error or it did not create any output file.') diff --git a/src/geckomat/gather_kcats/writeDLKcatInput.m b/src/geckomat/gather_kcats/writeDLKcatInput.m index fd802746a..090b02e77 100644 --- a/src/geckomat/gather_kcats/writeDLKcatInput.m +++ b/src/geckomat/gather_kcats/writeDLKcatInput.m @@ -13,8 +13,9 @@ % default model adapter). % onlyWithSmiles logical whether to only include metabolites with SMILES % (optional, default true) -% filename Filename (Optional). Normally this parameter should not be -% supplied, but it is useful for test cases. +% filename path to the input file, including the filename and .tsv +% extension (Optional, default is data/DLKcat.tsv from +% the obj.params.path folder specified in the modelAdapter) % overwrite logical whether existing file should be overwritten. % (Optional, default false, to prevent overwriting file % that already contains DLKcat-predicted kcat values). @@ -50,6 +51,8 @@ if nargin<5 || isempty(filename) filename = fullfile(params.path,'data','DLKcat.tsv'); +elseif ~endsWith(filename,'.tsv') + error('If filename is provided, it should include the .tsv extension.') end if nargin<6 || isempty(overwrite) || ~overwrite % If is true