-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmpca
123 lines (111 loc) · 4.06 KB
/
mpca
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#!/usr/bin/perl
use strict;
use warnings;
use Getopt::Long 'HelpMessage';
use List::MoreUtils qw(firstidx);
my $usage=<<_EOH_;;
------------------------------------------------------------------------------------------
This script is used to output the PCA plot from multiple reads number matrixs based on all genes in your matrix
print command to a Rscript and then run it to output the plot
Usage:
mpca --matrix Blenny_control_read_nb.xls Blue_eyed_control_read_nb.xls Common_control_read_nb.xls Yaldwyn_control_read_nb.xls \
--samples coldata_Blenny_control.txt coldata_Blue_eyed_control.txt coldata_Common_control.txt coldata_Yaldwyn_control.txt \
--column Site_1 Site_1 Site_1 Site_1 \
--title Blenny Blue_eyed Common Yaldwyn \
--label \
--prefix total_pca_all_gene
Example:
1. --matrix: Blenny_read_nb.xls
B61 B62 B63 B64 B65 B66 B67 B68 B69 B71 B72 B73 B74 B75 B76 B77 B78 B79
OG0038649 430 218 222 486 402 612 266 159 278 334 365 190 614 464 346 543 477 490
OG0039547 0 1 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0
2. --samples: coldata_Blenny.txt
Site_1 Site_2 Species
B61 Vn Vent Blenny
B62 Vn Vent Blenny
3. --label: use this parameter to whether have the label of the indviduals in the plot
Kang 2021-8-24
------------------------------------------------------------------------------------------
_EOH_
;
GetOptions(
'matrix:s{1,}', \my @matrix, # the overall reads number matrix
'samples:s{1,}', \ my @sample_file, # the sample information
'column:s{1,}', \my @col, # the column selected from the sample information
'label', \my $label, # make a decision that have the label of the individuals in the plot
'title:s{1,}', \my @title, # the title of each plot
'prefix:s', \my $prefix, # the name of output pdf
'help', \ my $help
);
if ($help || (! @matrix) && (! @sample_file) || (!@col) || (!@title) || (!$prefix) ) {
die $usage; # all of these options are mandatory requirements
}
if ($label) {
$label="all";
}
main: {
&creat_R_header();
my $nb=@matrix;
for (my $i = 0; $i < $nb; $i++) {
my $j=$i+1;
my $p="p".$j;
my $matrix=$matrix[$i];
my $sample_file=$sample_file[$i];
my $title=$title[$i];
my $col=$col[$i];
&creat_R_common($matrix, $sample_file, $col, $label, $title, $p);
}
&plot_arrange();
exit(0);
}
###################################################################################
sub creat_R_header {
open PCA, ">pca.R" or die "can not creat pca.R\n";
print PCA "library(ade4)\n";
print PCA "library(factoextra)\n";
print PCA "library(magrittr)\n";
print PCA "library(ggpubr)\n";
close PCA;
}
sub creat_R_common {
my ($matrix, $sample_file, $col, $label, $title, $p)=@_;
open PCA, ">>pca.R" or die "can not creat pca.R\n";
print PCA "cts <- read.table(file=\"$matrix\",header=TRUE,row.names = 1)\n";
print PCA "data2 <- as.data.frame(t(cts))\n";
print PCA "coldata <- read.table(file=\"$sample_file\")\n";
print PCA "res.pca <- dudi.pca(data2, scannf = FALSE, nf = 5)\n";
print PCA "groups <- as.factor(coldata\$$col)\n";
my $para;
$label?($para="res.pca, label=\"$label\", col.ind = groups, palette = \"Dark2\", "):($para="res.pca, abel=\"\",col.ind = groups, palette = \"Dark2\", ");
$para.="addEllipses = T, ellipse.type = \"confidence\",legend.title = \"\", ";
$para.="repel = TRUE, title = \"$title\"";
print PCA "$p<-fviz_pca_ind($para)\n";
close PCA;
}
sub plot_arrange {
open PCA, ">>pca.R" or die "can not creat PCA.R\n";
my $output=$prefix.".pdf";
print PCA "pdf(file = \"$output\", width = 11.69, height = 8.27, onefile=FALSE)\n";
my $plot_nb=@matrix;
my ($nrow, $ncol);
if ($plot_nb==1) {
$nrow=1; $ncol=1;
} elsif ($plot_nb==2) {
$nrow=1; $ncol=2;
} elsif ($plot_nb==3 || $plot_nb==4) {
$nrow=2; $ncol=2;
} elsif ($plot_nb==5 || $plot_nb==6) {
$nrow=2; $ncol=3;
}
my $plot_names;
for (my $i = 1; $i <= $plot_nb; $i++) {
$plot_names.="p".$i.", ";
}
$plot_names=~s/\, $//;
my $ggarrange="ggarrange($plot_names,nrow=$nrow, ncol=$ncol,common.legend = T,align = \"h\")";
print PCA "$ggarrange\n";
print PCA "dev.off()";
close PCA;
`R --no-save --no-restore --no-site-file --no-init-file -q < pca.R 1>&2`;
`rm pca.R`;
}