Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refining functions #2382

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion src/dataIntegration/chemoInformatics/molecularWeight/computeMW.m
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,26 @@
% molecular weight of elements in a structure
elementMwStruct = struct('H', 1, 'C', 12, 'N', 14, 'O', 16, 'Na', 23, 'Mg', 24, 'P', 31, ...
'S', 32, 'Cl', 35, 'K', 39, 'Ca', 40, 'Mn', 55, 'Fe', 56, 'Ni', 58, 'Co', 59, ...
'Cu', 63, 'Zn', 65, 'As', 75, 'Se', 80, 'Ag', 107, 'Cd', 114, 'W', 184, 'Hg', 202);
'Cu', 63, 'Zn', 65, 'As', 75, 'Se', 80, 'Ag', 107, 'Cd', 114, 'W', 184, 'Hg', 202,...
'F', 19, 'I', 127, 'Ba', 137, 'Y', 89 );
elementNames = fieldnames(elementMwStruct); % elements' names
MW = zeros(size(metIDs));

for n = 1:length(metIDs)
i = metIDs(n);
formula = model.metFormulas(i);

% Skip calculations for metabolites containing 'FULLRCO'
if contains(formula, 'FULLR')|| contains(formula,'R')
MW(n) = NaN; % Assign NaN to indicate R-group
identifire = 'R-group';
continue;
elseif contains(formula, 'X')
MW(n) = NaN; % Assign NaN to indicate 'X'
identifire = 'X';
continue;
end

[compounds, tok] = regexp(formula, '([A-Z][a-z]*)(\d*)', 'match', 'tokens');
tok = tok{1, 1};
for j = 1:length(tok) % go through each token.
Expand Down
92 changes: 92 additions & 0 deletions src/visualization/entropicFBA/calculateGeneWeight.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
function geneWeight = calculateGeneWeight(model,Transcriptomic, Threshold)
% Inputs:
% model - A COBRA model with mandatory fields: grRules and SConsistentRxnBool.
% Transcriptomic - A table with entrezID and geneExpression values.
% Threshold - Threshold for transcriptomic data.


% Note: geneWeight is a value that can be used in entropicFBA to assign a weight
% that corresponds to the gene expression value to internal reactions. If you
% want to use this value for this purpose, use the following formula:
% cr = cf = -log(geneWeight + 1e-8) + 1 - ci/g
% Default values: g = 2 , ci = 0

% Author : Samira Ranjbar 2024
% (The algoithm is explained here:
% https://doi.org/10.1016/j.isci.2023.106201)
%-------------------------------------------------
if isfield(model,'grRules')
rule = model.grRules(model.SConsistentRxnBool);
else
error('grRules is missiming')
end
if ~isfield(model,'SConsistentRxnBool')
error('grRules is missiming')
end

orList =[];
% Divide the rule by "or" and iterate over each resulting subrule
for i = 1:length(rule)
% if ~isempty(rule(i))
subrules = strsplit(strjoin(rule(i)), ' or ');

for subruleIndex = 1:length(subrules)
% Split each subrule by "and" to get a list of genes
genes = strsplit(subrules{subruleIndex}, ' and ');

g_vector = {};

% Process each gene
for geneIndex = 1:length(genes)
gene = strrep(genes{geneIndex}, '(', ''); % Remove "("
gene = strrep(gene, ')', ''); % Remove ")"
gene = strrep(gene, ' ', ''); % Remove spaces
g_vector{geneIndex} = [gene];
end
% g_table = cell2table(g_vector', 'VariableNames', {'GeneID'});

% Evaluate the minimum expression value
if contains(g_vector,'rec1_')
fpkmTable1 = Transcriptomic(:,[1,2]);
values = fpkmTable1{ismember(strrep(cellstr(num2str(fpkmTable1.entrezID)),' ',''), strrep(cellstr(g_vector),'rec1_','')), 2};
elseif contains(g_vector,'rec2_')
fpkmTable2 = Transcriptomic(:,[1,3]);
values = fpkmTable2{ismember(strrep(cellstr(num2str(fpkmTable2.entrezID)),' ',''), strrep(cellstr(g_vector),'rec2_','')), 2};
else
values = Transcriptomic{ismember(strrep(cellstr((Transcriptomic.entrezID)),' ',''), cellstr(g_vector)), 2};

end
value = min(values);

% Apply the threshold
if value < Threshold
value = 0;
end

% Add the minimum to the list
orList = vertcat(orList, value);

% end
end
expList{i,1} = orList';
orList = [];
expList{i,2} = sum(expList{i,1});
% Return the sum of the list
% result = sum(orList);
end


% Access the first and second columns and convert them to numeric arrays
firstColumn = expList(:, 1);
secondColumn = cell2mat(expList(:, 2));
medianValue = median(secondColumn);
% Find empty cells in the first column
isEmptyFirstColumn = cellfun('isempty', firstColumn);

% Replace empty cells in the first column with the corresponding row median from the second column
for i = 1:numel(firstColumn)
if isEmptyFirstColumn(i) & model.SConsistentRxnBool
secondColumn(i) = medianValue;
end
end
geneWeight = secondColumn;
253 changes: 253 additions & 0 deletions src/visualization/entropicFBA/calculateReactionMasses.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,253 @@
function [substratesMass, productsMass] = calculateReactionMasses(model)
% This function calculates mass related to each reaction, products mass and
% substrates mass,using the left null space of stochiometric matrix,
% useful to unbiased flux through reactions with massive
% metabolites in entropicFBA

% Author: Samira Ranjbar 2024
%% % Check if metFormula field is provied in the model, if not add it using model0(Recon3DModel_301_xomics_input)

if ~isfield(model,'metFormulas')
model0 = readCbModel('Recon3DModel_301_xomics_input.mat');
% Loop through each entry in the first list
for i = 1:numel(model.mets)
% Find the corresponding entry in the second list
index = find(ismember(model0.mets, regexprep(model.mets{i}, ',(rec[12]|comm[12])\]$', ']')));

% If a matching entry is found, update the formula
if ~isempty(index)
model.metFormulas{i,1} = model0.metFormulas{index};
end
end

% Verify the validity of the updated model
if verifyModel(model, 'simpleCheck', true)
% Save the updated model
% writeCbModel(model, 'updated_model.mat');
disp('Model successfully updated and saved.');
else
disp('The updated model is not valid.');
end
end
%% There is a wrong formula for this a metabolite in the generic model
model.metFormulas(findMetIDs(model,model.mets(contains(model.mets,'paps[')))) = {'C10H11N5O13P2S'};
% model.metFormulas(findMetIDs(model,'paps[c,rec2]')) = {'C10H11N5O13P2S'};
% model.metFormulas(findMetIDs(model,'paps[g,rec1]')) = {'C10H11N5O13P2S'};
% model.metFormulas(findMetIDs(model,'paps[g,rec2]')) = {'C10H11N5O13P2S'};
% model.metFormulas(findMetIDs(model,'paps[c]')) = {'C10H11N5O13P2S'};
% model.metFormulas(findMetIDs(model,'paps[g]')) = {'C10H11N5O13P2S'};

%% Calculate molecular weights using the computeMW function
metList = model.mets;
[molecularWeights, ~] = computeMW(model, metList);
% model = readCbModel('scRecon3D_2-1.mat')
% Get the number of consistent reactions in the model
numReactions = numel(model.rxns(model.SConsistentRxnBool));
sConsistent = model.S(model.SConsistentMetBool,model.SConsistentRxnBool);
rxnConsistent = model.rxns(model.SConsistentRxnBool);
% Initialize variables to store results
cf = cell(numReactions, 1); % Cell array for substrate mass results
cr = cell(numReactions, 1); % Cell array for product mass results


% Iterate through all reactions
for i = 1:numReactions
% Get row indices and stoichiometric coefficients for the current reaction
[rowIndices, ~, stoichiometry] = find(sConsistent(:, i));

% Identify substrates and products
substrates = model.mets(rowIndices(stoichiometry < 0));
products = model.mets(rowIndices(stoichiometry > 0));

% Calculate mass of substrates and products using molecular weights
substrateCoefficients = stoichiometry(stoichiometry < 0);
substrateMass = abs(substrateCoefficients) .* molecularWeights(ismember(metList, substrates));

productMass = stoichiometry(stoichiometry > 0) .* molecularWeights(ismember(metList, products));

% Store results in cf and cr
cf{i} = struct('reaction', rxnConsistent{i}, 'substrates', substrates, 'mass', substrateMass);
cr{i} = struct('reaction', rxnConsistent{i}, 'products', products, 'mass', productMass);
end
for i= 1:length(cf)
Cf(i) = sum(cf{i}(1).mass);
Cr(i) = sum(cr{i}(1).mass);
end
CF = Cf';
CR = Cr';
%% Check if any mass imbalance happen
j=1;
indexmassimbalance =[];
for i = 1:length(CF)
if( round(CF(i), 2)~= round(CR(i), 2) & ~isnan(CF(i)))
indexmassimbalance(j) = i;
j = j + 1;
end
end
% if isempty(indexmassimbalance)
% disp('All reactions that do not include an R-group are mass-balanced.');
% else
% fprintf('%s is not mass-balance\n', cell2mat(model.rxns(indexmassimbalance)));
% end
if ~isempty(indexmassimbalance)

dataTable = [];

for a = indexmassimbalance%[95, 182, 384, 465, 3090, 3180, 3386, 3487, 4008]
if isfield(model,'rxnFormulas')
Formula = model.rxnFormulas(findRxnIDs(model, cf{a, 1}(1).reaction));
else
Formula = printRxnFormula(model, cf{a, 1}(1).reaction);
end
ForwardMass = sum(cf{a, 1}(1).mass);
ReverseMass = sum(cr{a, 1}(1).mass);

% Accumulate data
dataRow = [a, Formula, ReverseMass, ForwardMass];
dataTable = [dataTable; dataRow];
end

% Create a table after the loop
variableNames = {'rxn number', 'Formula', 'Reverse Mass', 'Forward Mass'};
resultTable = array2table(dataTable, 'VariableNames', variableNames);
disp(resultTable);
else
disp('All reactions that do not include an R-group are mass-balanced.')
end
%% linear programming using lsqnonneg method

N = model.S(model.SConsistentMetBool,model.SConsistentRxnBool);
A = N';
% Objective function: L2 regularization
objective = @(x) sum(x.^2);

% Nonlinear equality constraint: A*x = 0
nonlinearConstraint = @(x) A*x;

% Initial guess for x (make sure it satisfies A*x = 0 and x > 0)
x0 = ones(size(A, 2), 1);

% Non-negative least squares
x = lsqnonneg(A, zeros(size(A, 1), 1));

% Display the result
if(x >= 0)
LeftNullSpace_nonzero = nnz(x)
% figure('Renderer', 'painters', 'Position', [10 10 1600 800])
% bar(x,'FaceColor', [1, 0, 0], 'FaceAlpha',0.5)
% xlabel('met Index', FontSize=14, FontWeight='bold');
% ylabel('Left null space value', FontSize=14, FontWeight='bold');
else
disp('There is no strictly positive left- null space')
end
%% set undifined molecularweight to zero
for i = 1: length(molecularWeights)
if isnan(molecularWeights(i))
molecularWeights(i) = 0;
end
end
T = table((1:length(x))', model.mets, model.metFormulas, molecularWeights, x, 'VariableNames',...
{'ID','met', 'met formula', 'molecular weigth', 'left null space'});
[~, idx] = sortrows(T, {'molecular weigth', 'left null space'}, {'ascend', 'ascend'});
sortedTable = T(idx, :);
var4Values = sortedTable.("molecular weigth");
var5Values = sortedTable.("left null space");

nonzeroIndices = find(var4Values ~= 0);
%% Removing metabolites that contain R-group
y = var4Values(nonzeroIndices(1):end);
x = var5Values(nonzeroIndices(1):end);

%% Detect and remove outliers using IQR method
x_std = std(x);
y_std = std(y);

x_median = median(x);
y_median = median(y);

% Define a threshold for outliers (e.g., 7 times the standard deviation)
threshold = 7;

% Find indices of outliers
outliers_x = find(abs(x - x_median) > threshold * x_std);
outliers_y = find(abs(y - y_median) > threshold * y_std);

% Combine outlier indices
outliers_indices = unique([outliers_x; outliers_y]);

% Remove outliers from the data
x_no_outliers = x;
y_no_outliers = y;
x_no_outliers(outliers_indices) = [];
y_no_outliers(outliers_indices) = [];

% Perform polynomial regression on data without outliers
degree = 1; % Adjust the degree of the polynomial as needed
coefficients_poly = polyfit(x_no_outliers, y_no_outliers, degree);

% Evaluate the polynomial at various x values for plotting
x_fit_poly = linspace(min(x_no_outliers), max(x_no_outliers), 100);
y_fit_poly = polyval(coefficients_poly, x_fit_poly);

%% Plot the original data and the fitted polynomial
figure('Renderer', 'painters', 'Position', [10 10 1600 800])
plot(x, y, 'o', 'DisplayName', 'Original Data');
hold on;

% Plot the data without outliers
plot(x_no_outliers, y_no_outliers, 'x', 'DisplayName', 'Data without Outliers');

% Plot the fitted polynomial
plot(x_fit_poly, y_fit_poly, '-', 'DisplayName', 'Fitted Line');
hold off
legend('Location', 'Best');
xlabel('left null-space');
ylabel('molecularWeights');
title('Polynomial Regression');

%% Display the coefficients
% Construct the polynomial equation as a string
degree = length(coefficients_poly) - 1;
equation_str = 'y = ';
for i = degree:-1:1
equation_str = [equation_str num2str(coefficients_poly(degree - i + 1)) ' * x^' num2str(i) ' + '];
end
equation_str = [equation_str num2str(coefficients_poly(end))];

% Display the polynomial equation
disp('Fitted Polynomial Equation:');
disp(equation_str);

for i = 1:nonzeroIndices(1)-1
var4Values(i) = coefficients_poly(1) * var5Values(i) + coefficients_poly(2);
end
sortedTable.("molecular weigth") = var4Values;
ST=sortrows(sortedTable, {'ID'}, {'ascend'});
%% calculate mass again for metabolite contain R-group using left null space
molecularWeights = ST.("molecular weigth");
for i = 1:numReactions
% Get row indices and stoichiometric coefficients for the current reaction
[rowIndices, ~, stoichiometry] = find(sConsistent(:, i));

% Identify substrates and products
substrates = model.mets(rowIndices(stoichiometry < 0));
products = model.mets(rowIndices(stoichiometry > 0));

% Calculate mass of substrates and products using molecular weights
substrateCoefficients = stoichiometry(stoichiometry < 0);
substrateMass = abs(substrateCoefficients) .* molecularWeights(ismember(metList, substrates));

productMass = stoichiometry(stoichiometry > 0) .* molecularWeights(ismember(metList, products));

% Store results in cf and cr
cf{i} = struct('reaction', rxnConsistent{i}, 'substrates', substrates, 'mass', substrateMass);
cr{i} = struct('reaction', rxnConsistent{i}, 'products', products, 'mass', productMass);
end
for i= 1:length(cf)
Cf(i) = sum(cf{i}(1).mass);
Cr(i) = sum(cr{i}(1).mass);
end
CF = Cf';
CR = Cr';
substratesMass = CF;
productsMass = CR;
Binary file added src/visualization/entropicFBA/tutorial_eFBA.mlx
Binary file not shown.
Loading