-
Notifications
You must be signed in to change notification settings - Fork 3
/
panda_run.m
122 lines (109 loc) · 4.2 KB
/
panda_run.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
%% Using PANDA to infer gene regulatory network.
%%
%% 1. Reading in input data (expression data, motif prior, TF PPI data)
%% 2. Computing coexpression network
%% 3. Normalizing networks
%% 4. Running PANDA algorithm
%% 5. Writing out PANDA network (optional)
%%
%% Authors: cychen, marieke, kglass
%% Script adapted from Marieke's pLIONESSpcss.m, modified to run PANDA only.
disp(datestr(now));
%% ============================================================================
%% Set Program Parameters and Path
%% ============================================================================
% Run configuration to set parameter first (e.g., run('panda_config.m');)
fprintf('Input expression file: %s\n', exp_file);
fprintf('Input motif file: %s\n', motif_file);
fprintf('Input PPI file: %s\n', ppi_file);
fprintf('Output PANDA network: %s\n', panda_out);
fprintf('Output temp data: %s\n', save_temp);
fprintf('Alpha: %.2f\n', alpha);
addpath(lib_path);
%% ============================================================================
%% Read in Data
%% ============================================================================
disp('Reading in expression data!');
tic
fid = fopen(exp_file, 'r');
headings = fgetl(fid);
n = length(regexp(headings, '\t'));
frewind(fid);
%Exp = textscan(fid, ['%s', repmat('%f', 1, n)], 'delimiter', '\t', 'CommentStyle', '#');
Exp = textscan(fid, ['%s', repmat('%f', 1, n)], 'delimiter', '\t'); % tiny speed-up by not checking for comments
fclose(fid);
GeneNames = Exp{1};
Exp = cat(2, Exp{2:end});
[NumGenes, NumConditions] = size(Exp);
fprintf('%d genes and %d conditions!\n', NumGenes, NumConditions);
Exp = Exp'; % transpose expression matrix from gene-by-sample to sample-by-gene
toc
disp('Reading in motif data!');
tic
[TF, gene, weight] = textread(motif_file, '%s%s%f');
TFNames = unique(TF);
NumTFs = length(TFNames);
[~,i] = ismember(TF, TFNames);
[~,j] = ismember(gene, GeneNames);
RegNet = zeros(NumTFs, NumGenes);
RegNet(sub2ind([NumTFs, NumGenes], i, j)) = weight;
fprintf('%d TFs and %d edges!\n', NumTFs, length(weight));
toc
disp('Reading in ppi data!');
tic
TFCoop = eye(NumTFs);
if(~isempty(ppi_file))
[TF1, TF2, weight] = textread(ppi_file, '%s%s%f');
[~,i] = ismember(TF1, TFNames);
[~,j] = ismember(TF2, TFNames);
TFCoop(sub2ind([NumTFs, NumTFs], i, j)) = weight;
TFCoop(sub2ind([NumTFs, NumTFs], j, i)) = weight;
fprintf('%d PPIs!\n', length(weight));
end
toc
% Clean up variables to release memory
clear headings n GeneNames TF gene TF1 TF2 weight;
%% ============================================================================
%% Run PANDA
%% ============================================================================
disp('Computing coexpresison network:');
tic; GeneCoReg = Coexpression(Exp); toc;
disp('Normalizing Networks:');
tic
RegNet = NormalizeNetwork(RegNet);
GeneCoReg = NormalizeNetwork(GeneCoReg);
TFCoop = NormalizeNetwork(TFCoop);
toc
if ~isempty(save_temp)
disp('Saving the transposed expression matrix and normalized networks:');
if ~exist(save_temp, 'dir')
mkdir(save_temp);
end
tic
save(fullfile(save_temp, 'expression.transposed.mat'), 'Exp', '-v7.3'); % 2G+
save(fullfile(save_temp, 'motif.normalized.mat'), 'RegNet', '-v6'); % fast
save(fullfile(save_temp, 'ppi.normalized.mat'), 'TFCoop', '-v6'); % fast
toc
end
clear Exp; % Clean up Exp to release memory (for low-memory machine)
disp('Running PANDA algorithm:');
AgNet = PANDA(RegNet, GeneCoReg, TFCoop, alpha);
%% ============================================================================
%% Saving PANDA network output
%% ============================================================================
if ~isempty(panda_out)
disp('Saving PANDA network!');
tic
[pathstr, name, ext] = fileparts(panda_out);
switch ext
case '.txt'
save(panda_out, 'AgNet', '-ascii');
case '.tsv'
save(panda_out, 'AgNet', '-ascii', '-tabs');
otherwise
save(panda_out, 'AgNet', '-v6');
end
toc
end
disp('All done!');
disp(datestr(now));