-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathevaluate_objective_simple_EDGE.m
136 lines (101 loc) · 9.13 KB
/
evaluate_objective_simple_EDGE.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
function f = evaluate_objective(x, M, V, fbarecon, geni, reaction_expression, pos_genes_in_react_expr, ixs_geni_sorted_by_length)
% gene_importance decide l'intensita' che il numero di gene expression (normalizzato
% a 1) ha sui lower e upper bound dei flussi.
%% function f = evaluate_objective(x, M, V)
% Function to evaluate the objective functions for the given input vector
% x. x is an array of decision variables and f(1), f(2), etc are the
% objective functions. The algorithm always minimizes the objective
% function hence if you would like to maximize the function then multiply
% the function by negative one. M is the numebr of objective functions and
% V is the number of decision variables.
%
% This functions is basically written by the user who defines his/her own
% objective function. Make sure that the M and V matches your initial user
% input.
yt=x'; % x' is the transpose of x, that is the gene expression array associated with the child (two codon usages)
yt=yt(1:V); % needed because sometimes, and especially with child_3 in genetic_operator, there is a bug and all the child is passed to this function here, including the final rank, crowding distance and objective functions (instead of passing only the V decision variables)
%fbarecon.present=ones(fbarecon.nrxn,1);
%fbamodel.present = ~(fbamodel.G' * yt); %istruzione vecchio approccio knockout
%NEW CODE: DATA UN GENE EXPRESSION ARRAY, CALCOLA AUTOMATICAMENTE GLI OUTPUT DELL'FBA
%load('geni.mat');
%load('reaction_expression.mat');
%geni( cellfun(@isempty, reaction_expression) ) = {'1'}; %replaces all the empty names of genes with the name '1'
%reaction_expression( cellfun(@isempty, reaction_expression) ) = {'1'}; %replaces all the empty cells of gene expression (e.g. exchange reactions) with '1', i.e. gene expressed nomally
%reaction_expression = cellfun(@(c) c{1},reaction_expression);
%geni = cellfun(@(c) c{1},geni);
eval_reaction_expression = reaction_expression;
%indices are sorted by length of their string, so the longest get replacesd first. This avoids, for example, that if we have two genes 123.1 and 23.1, the substring '23.1' is replaced in both. If we first replace the longest and finally the shortest, this problem is avoided
for i=ixs_geni_sorted_by_length %loop over the array of the non-1 gene expressions, in order to replace the names of genes in geni_reazioni.mat with their values. All the gene set expressions with only 1s as gene values , will be left empty and at the end of this loop everything empty will be substituted with 1 anyway. This avoids looping over all the genes yt, which is very expensive
posizioni_gene = pos_genes_in_react_expr{i};
for j=1:length(posizioni_gene) %for each string of reaction_expression, we replace the substring 'bXXXX' with the number representing its gene expression
eval_reaction_expression{posizioni_gene(j)} = strrep(eval_reaction_expression{posizioni_gene(j)}, geni{i}, num2str(yt(i),'%.15f')); %Matlab strangely truncates decimal digits when using num2str. Addimg %.12f at least ensures that 12 decimal digits are included in the number converted into string
end
end
eval_reaction_expression( cellfun(@isempty, eval_reaction_expression) ) = {'1.0'}; %replaces all the empty cells of gene expression (e.g. exchange reactions) with 1, i.e. gene expressed nomally
% for i=1:numel(eval_reaction_expression)
% for j=1:numel(geni) %in the eval_reaction_expression in which already all the non-1 genes have been substituted with their values, we now have to substitute all the remaining genes with the number 1
% eval_reaction_expression{i} = strrep(eval_reaction_expression{i}, ['/' geni{j} '/'], '1.0'); %we do this job only for the non-1 reaction expressions, so we save a lot of computational time. Note that we need '1.0' because the regecpr below is looking for NUM.NUM strings, and therefore putting only '1' will give an infinite loop
% end
% end
%eval_reaction_expression( cellfun(@isempty, eval_reaction_expression) ) = {'1'}; %replaces all the empty cells of gene expression (e.g. exchange reactions or reactions whose genes have all gene expression 1) with 1, i.e. gene expressed nomally
num_reaction_expression = zeros(1,length(eval_reaction_expression));
gamma = zeros(1,length(reaction_expression));
for i=1:length(num_reaction_expression)
str = eval_reaction_expression{i};
num_parenthesis = numel(strfind(str,')'));
while (num_parenthesis > 32) %if there are more than 32 parentheses, matlab is unable to run EVAL. So we need to reduce these parentheses manually by starting to eval smaller pieces of the string
to_replace = 'min.\d*+\.+\d*,\d*+\.+\d*.|max.\d*+\.+\d*,\d*+\.+\d*.|min..\d*+\.+\d*.,\d*+\.+\d*.|max..\d*+\.+\d*.,\d*+\.+\d*.|min..\d*+\.+\d*.,.\d*+\.+\d*..|max..\d*+\.+\d*.,.\d*+\.+\d*..|min.\d*+\.+\d*,.\d*+\.+\d*..|max.\d*+\.+\d*,.\d*+\.+\d*..'; %searches for all the strings of kind min(NUM.NUM,NUM.NUM) or max(NUM.NUM,NUM.NUM) or min((NUM.NUM),NUM.NUM) or max((NUM.NUM),NUM.NUM) or min((NUM.NUM),(NUM.NUM)) or max(NUM.NUM,(NUM.NUM)) or min(NUM.NUM,(NUM.NUM)) or max((NUM.NUM),(NUM.NUM))
substrings_to_replace = regexp(str, to_replace, 'match');
if isempty(substrings_to_replace)
num_parenthesis = 0; %if num_parenthesis > 32 and there is nothing that can be replaced with regexp, we force this, in order to avoid an endless loop. Later, eval will catch an exception as it cannot evaluate when num_parenthesis>32
else
for j = 1:numel(substrings_to_replace)
ss_rep = substrings_to_replace{j};
str = strrep(str,ss_rep,num2str(eval(ss_rep),'%.15f'));
end
num_parenthesis = numel(strfind(str,')'));
end
end
str = regexprep(str,'/','');
% try
num_reaction_expression(i) = eval(str); %evaluates the cells like they are numerical expressions (so as to compute min and max of gene expressions)
% catch
% %i
% warning(['Problem using eval for reaction_expression ' num2str(i) '. Assigning a value of 1.']);
% num_reaction_expression(i) = 1;
% end
% match_geneset = find(yt == num_reaction_expression(i)); %this instruction finds the location of the resultant geneset expression in the gene expression array, so as to understand the gene responsible for this geneset expression
%
% if length(match_geneset) > 1
% gamma(i) = 1; %in this case, there are multiple matches, which meansthe expression was exactly 1, which means almost surely that the gene in the model was not present in the probe genes, and therefore its expression and its variance must be set to 1
% else
% gamma(i) = gene_importance(match_geneset);
% end
end
%something_wrong = find(num_reaction_expression > 10); %these are the cells where something has gone wrong for sure
gamma = 3.*ones(1,length(reaction_expression));
for i=1:length(num_reaction_expression) %loop over the array of the geneset expressions
fbarecon.lb(i) = fbarecon.lb(i)*num_reaction_expression(i)^gamma(i);
fbarecon.ub(i) = fbarecon.ub(i)*num_reaction_expression(i)^gamma(i);
% if num_reaction_expression(i)>=1
% fbarecon.lb(i) = fbarecon.lb(i)*(1+gamma(i)*log(num_reaction_expression(i)));
% fbarecon.ub(i) = fbarecon.ub(i)*(1+gamma(i)*log(num_reaction_expression(i)));
% else
% fbarecon.lb(i) = fbarecon.lb(i)/(1+gamma(i)*abs(log(num_reaction_expression(i))));
% fbarecon.ub(i) = fbarecon.ub(i)/(1+gamma(i)*abs(log(num_reaction_expression(i))));
% end
end
%END NEW CODE
%[v1, fmax, fmax_max] = flux_balance_trilevel(fbarecon,true);
[v1, fmax, fmin] = flux_balance(fbarecon,true);
% objective functions number M is 2
f(1) = fbarecon.f' * v1; % Biomass
%f(2) = fmax; % max of the 1st synthetic obj
%f(3) = sum(abs(round(yt*1e15)/1e15-ones(numel(yt),1))); % !!!! change also in expFBA at the beginning for the initial population !!!!! (the 1e15 rounding allows to truncate at the 15th decimal digit so as to avoid nonzero very small values due to the loss of precision of matlab when subtracting two arrays)
format longG; format compact;
%f
f(1) = -f(1); %put - if we maximise, + if we minimise (because nsga is by default minimising the objectives, so the mminus sign means it will minimise the negative objective, which means it will maximise the objective
%f(2) = -f(2); %if the second objective is lactate dehygrogenase written the other way round (and called 'NAD+ oxidoreductase'), we put + because, for cancer cells, we minimize the flux 'nad[x] + lac_L[x] <=> h[x] + nadh[x] + pyr[x]' which needs to be negative because we need to go from pyruvate to lactate!
%f(3) = f(3); % !!!! change also in expFBA at the beginning for the initial population !!!!! (NOT NEEDED IN THE NEW VERSION WHERE I EVALUATE ALSO THE INITIAL INDIVIDUALS USING EVALUATE_OBJECTIVE
f
%[concentrationMatrix,excRxnNames,timeVec,biomassVec] = my_dynamicFBA_antibiotic(fbamodel,substrateRxns,initConcentrations,initBiomass,timeStep,nSteps,plotRxns);