-
Notifications
You must be signed in to change notification settings - Fork 2
/
11_requireUndiagnosed.pl
executable file
·124 lines (101 loc) · 3.77 KB
/
11_requireUndiagnosed.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#!/usr/bin/perl
############################################################################################
# Copyright (C) Nicolas Thierry-Mieg, 2019-2024
#
# This file is part of grexome-TIMC-Secondary, written by Nicolas Thierry-Mieg
# (CNRS, France) [email protected]
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
# without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with this program.
# If not, see <https://www.gnu.org/licenses/>.
############################################################################################
# 25/03/2020
# NTM
# Takes 2 arguments: $inDir $outDir
# - $inDir must contain cohort TSVs as produced by extractCohorts.pl,
# possibly filtered and reordered by filterVariants.pl and reorderColumns.pl,
# possibly with patientIDs;
# - $outDir doesn't exist, it will be created and filled with one TSV
# per infile named $cohort.final.csv
#
# The outfiles are identical to the infiles except we remove lines where
# COUNT_$cohort_HV == 0 AND COUNT_$cohort_HET == 0 : previous steps
# kept such lines if they had COUNT_*_OTHERCAUSE_* >=1, this was
# necessary for extractSamples (for samples with a casual gene).
use strict;
use warnings;
use File::Basename qw(basename);
use POSIX qw(strftime);
# we use $0 in every stderr message but we really only want
# the program name, not the path
$0 = basename($0);
(@ARGV == 2) || die "E: $0 - needs 2 args: an inDir and a non-existant outDir\n";
my ($inDir, $outDir) = @ARGV;
(-d $inDir) ||
die "E: $0 - inDir $inDir doesn't exist or isn't a directory\n";
opendir(INDIR, $inDir) ||
die "E: $0 - cannot opendir inDir $inDir\n";
(-e $outDir) &&
die "E: $0 - found argument outDir $outDir but it already exists, remove it or choose another name.\n";
mkdir($outDir) || die "E: $0 - cannot mkdir outDir $outDir\n";
#########################################################
my $now = strftime("%F %T", localtime);
warn "I $now: $0 - starting to run\n";
while (my $inFile = readdir(INDIR)) {
($inFile =~ /^\./) && next;
my $cohort;
if ($inFile =~ /^(\w+)\..*csv$/) {
$cohort = $1;
}
else {
warn "W: $0 - cannot parse filename of inFile $inFile, skipping it\n";
next;
}
open(INFILE, "$inDir/$inFile") ||
die "E: $0 - cannot open infile $inDir/$inFile\n";
my $outFile = "$cohort.final.csv" ;
open(OUTFILE, "> $outDir/$outFile") ||
die "E: $0 - cannot open outfile $outDir/$outFile: $!\n";
###################################
# header line
my $header = <INFILE>;
chomp($header);
my @headers = split(/\t/,$header);
# columns of COUNT_$cohort_HV and HET
my ($hvCol,$hetCol) = (-1,-1);
foreach my $hi (0..$#headers) {
if ($headers[$hi] eq "COUNT_$cohort"."_HV") {
$hvCol = $hi;
}
elsif ($headers[$hi] eq "COUNT_$cohort"."_HET") {
$hetCol = $hi;
}
}
# sanity check
(($hvCol >= 0) && ($hetCol >= 0)) ||
die "E: $0 couldn't find one of HV/HET for $cohort\n";
print OUTFILE "$header\n";
# data lines
while (my $line = <INFILE>) {
chomp($line);
my @fields = split(/\t/, $line, -1) ;
if (($fields[$hvCol] == 0) && ($fields[$hetCol] == 0)) {
next;
}
else {
print OUTFILE "$line\n";
}
}
close(INFILE);
close(OUTFILE);
}
closedir(INDIR);
$now = strftime("%F %T", localtime);
warn "I $now: $0 - ALL DONE, completed successfully!\n";