-
Notifications
You must be signed in to change notification settings - Fork 1
/
MethSearchRoadMapByLoc.pl
148 lines (134 loc) · 4.16 KB
/
MethSearchRoadMapByLoc.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#!/usr/bin/perl -w
use strict;
use Cwd;
use Statistics::Basic qw(:all);
# Check the Methylation Status for Regions in All Human Normal Tissues
# Contact: Shicheng Guo([email protected])
# Esophagus=23
# Version 1.3
# Update: 2016-05-12
die &USAGE if scalar @ARGV<3;
my $gap=2;
my $input=shift @ARGV;
my $header="header";
my $OUTPUT=shift @ARGV;
$gap=shift @ARGV;
open F,$input;
my @cor;
while(<F>){
my ($chr,$start,$end)=split/\s+/;
my $cor="$chr:$start-$end";
push(@cor,$cor);
}
open F,$header;
open OUT,">$OUTPUT.fm.rlt.txt";
my @header;
while(<F>){
next if /^\s+$/;
chomp;
@header=split/\s+/;
my $hrr=join("\t",@header);
print OUT "$hrr\n";
}
foreach my $cor(@cor){
my %data;
my ($chr,$start,$end)=split /:|-/,$cor;
open F2,"$chr.fm" || die "cannot open $chr.fm";
my @fm;
while(<F2>){
chomp;
my @line=split/\s+/;
if($line[0]>=$start-$gap && $line[0]<=$end+$gap){
my $rid="$cor.$line[0]";
foreach my $i(1..$#line){
$data{$header[$i]}{$rid}=$line[$i];
}
}
if($line[0]>$end){
last;
}
}
close F2;
my %rlt;
foreach my $sam(sort keys %data){
my @fm;
my $loc;
foreach my $cpg(sort keys %{$data{$sam}}){
($loc,undef)=split/\./,$cpg;
push(@fm,$data{$sam}{$cpg});
}
my $avg = median(@fm);
my $sd = stddev(@fm);
$rlt{$loc}{$sam}=$avg;
}
foreach my $loc(sort keys %rlt){
print OUT "$loc";
foreach my $sam(sort keys %{$rlt{$loc}}){
print OUT "\t$rlt{$loc}{$sam}";
}
print OUT "\n";
}
}
close OUT;
sub USAGE{
print "\nUsage: perl $0 TargetBed Prefix GAP\n";
print "Check the Methylation Status for Regions in All Human Normal Tissues\n";
print '
Format For: TargetBed:
chr12 95942750 95942970
chr17 43339200 43339400
chr6 110678910 110679110
chr6 133562350 133562550
chr8 67344530 67344730
chr16 51184245 51184465
';
print '
Roadmap Dataset Download:
wget ftp://ftp.bcgsc.ca/public/mbilenky/112epigenomes/5mC/SBS_Removed_E027_E064_Fixed_E012/EG.mnemonics.name.xls
wget ftp://ftp.bcgsc.ca/public/mbilenky/112epigenomes/5mC/SBS_Removed_E027_E064_Fixed_E012/FractionalMethylation.tar.gz
wget ftp://ftp.bcgsc.ca/public/mbilenky/112epigenomes/5mC/SBS_Removed_E027_E064_Fixed_E012/FractionalMethylation.tar.gz.md5sum
wget ftp://ftp.bcgsc.ca/public/mbilenky/112epigenomes/5mC/SBS_Removed_E027_E064_Fixed_E012/header
wget ftp://ftp.bcgsc.ca/public/mbilenky/112epigenomes/5mC/SBS_Removed_E027_E064_Fixed_E012/ReadCoverage.tar.gz
wget ftp://ftp.bcgsc.ca/public/mbilenky/112epigenomes/5mC/SBS_Removed_E027_E064_Fixed_E012/ReadCoverage.tar.gz.md5sum
';
print '
FM file Header:
E003 ESC.H1 H1_Cell_Line
E004 ESDR.H1.BMP4.MESO H1_BMP4_Derived_Mesendoderm_Cultured_Cells
E005 ESDR.H1.BMP4.TROP H1_BMP4_Derived_Trophoblast_Cultured_Cells
E006 ESDR.H1.MSC H1_Derived_Mesenchymal_Stem_Cells
E007 ESDR.H1.NEUR.PROG H1_Derived_Neuronal_Progenitor_Cultured_Cells
E008 ESC.H9 H9_Cell_Line
E011 ESDR.CD184.ENDO hESC_Derived_CD184+_Endoderm_Cultured_Cells
E012 ESDR.CD56.ECTO hESC_Derived_CD56+_Ectoderm_Cultured_Cells
E013 ESDR.CD56.MESO hESC_Derived_CD56+_Mesoderm_Cultured_Cells
E016 ESC.HUES64 HUES64_Cell_Line
E017 LNG.IMR90 IMR90_Cell_Line
E021 IPSC.DF.6.9 iPS_DF_6.9_Cell_Line
E022 IPSC.DF.19.11 iPS_DF_19.11_Cell_Line
E024 ESC.4STAR 4star
E050 BLD.MOB.CD34.PC.F Mobilized_CD34_Primary_Cells_Female
E053 BRN.CRTX.DR.NRSPHR Neurosphere_Cultured_Cells_Cortex_Derived
E054 BRN.GANGEM.DR.NRSPHR Neurosphere_Cultured_Cells_Ganglionic_Eminence_Derived
E058 SKIN.PEN.FRSK.KER.03 Penis_Foreskin_Keratinocyte_Primary_Cells_skin03
E065 VAS.AOR Aorta
E066 LIV.ADLT Adult_Liver
E070 BRN.GRM.MTRX Brain_Germinal_Matrix
E071 BRN.HIPP.MID Brain_Hippocampus_Middle
E079 GI.ESO Esophagus
E084 GI.L.INT.FET Fetal_Intestine_Large
E085 GI.S.INT.FET Fetal_Intestine_Small
E094 GI.STMC.GAST Gastric
E095 HRT.VENT.L Left_Ventricle
E096 LNG Lung
E097 OVRY Ovary
E098 PANC Pancreas
E100 MUS.PSOAS Psoas_Muscle
E104 HRT.ATR.R Right_Atrium
E105 HRT.VNT.R Right_Ventricle
E106 GI.CLN.SIG Sigmoid_Colon
E109 GI.S.INT Small_Intestine
E112 THYM Thymus
E113 SPLN Spleen
';
}