-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrenamevcf.cu
155 lines (124 loc) · 4.21 KB
/
renamevcf.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#include "renamevcf.cuh"
renamevcf::renamevcf(string vcf_Folder, string sample_sheet_vcf)
{
cout << "Starting TCGA VCF file and creating PLINK phenotype file\n\n";
vcf_Folder_name = filesystem::path(vcf_Folder).filename().string();
this->vcf_Folder = vcf_Folder;
cout << "Processing vcf Folder: " << vcf_Folder_name << endl;
cout << "Finding folder in sample sheet file\n";
fstream sample_Sheet;
sample_Sheet.open(sample_sheet_vcf, ios::in);
functions_library functions = functions_library();
string sample_Type = "";
if (sample_Sheet.is_open())
{
string line;
vector<string> line_Data;
getline(sample_Sheet, line);
char delim = '\t';
functions.split(line_Data, line, delim);
if (line_Data.size() == 1)
{
delim = ',';
}
while (getline(sample_Sheet, line))
{
functions.split(line_Data, line, delim);
if (line_Data[0] == vcf_Folder_name)
{
cout << "VCF file found\n";
sample_Type = line_Data[7];
vcf_File_name_only = line_Data[1].substr(0, line_Data[1].find_last_of(".gz") - 2);
vcf_File = vcf_Folder + "/" + line_Data[1].substr(0, line_Data[1].find_last_of(".gz") - 2);
cout << "vcf File location: " << vcf_File << endl;
break;
}
}
}
else
{
cout << "ERROR: UNABLE TO OPEN SAMPLE SHEET FILE: " << sample_sheet_vcf << endl;
exit(-1);
}
if (sample_Type != "")
{
vector<string> sample_Types;
functions.split(sample_Types, sample_Type, ',');
if (sample_Types[0] == " Metastatic" || sample_Types[1] == " Metastatic" || sample_Types[0] == "Metastatic" || sample_Types[1] == "Metastatic")
{
tumor_Column_Name = vcf_Folder_name + "_METASTATIC_TUMOR";
}
else if (sample_Types[0] == "Primary Tumor" || sample_Types[1] == "Primary Tumor"||sample_Types[0] == " Primary Tumor" || sample_Types[1] == " Primary Tumor")
{
tumor_Column_Name = vcf_Folder_name + "_PRIMARY_TUMOR";
}
else
{
cout << "No tumor data found for the VCF\n";
exit(-1);
}
}
else
{
cout << "\nERROR: NO VCF folder match found\n";
exit(-1);
}
}
void renamevcf::ingress()
{
if (filesystem::exists(vcf_Folder + "/renamed_" + vcf_File_name_only))
{
cout << "Already exists\n";
}
else
{
cout << "\nReading and renaming vcf files\n";
fstream vcf_File_rename;
fstream vcf_File_original;
vcf_File_original.open(vcf_File, ios::in);
functions_library functions = functions_library();
if (vcf_File_original.is_open())
{
vcf_File_rename.open(vcf_Folder + "/renamed_" + vcf_File_name_only, ios::out);
string line;
while (getline(vcf_File_original, line))
{
if (line.substr(0, 2) == "##")
{
vcf_File_rename << line << endl;
}
else
{
break;
}
}
vector<string> line_Data;
functions.split(line_Data, line, '\t');
for (int col = 0; col < 9; col++)
{
vcf_File_rename << line_Data[col] << "\t";
}
if (line_Data[9] == "NORMAL")
{
vcf_File_rename << vcf_Folder_name << "_NORMAL\t" << tumor_Column_Name;
}
else
{
vcf_File_rename << tumor_Column_Name << "\t" << vcf_Folder_name << "_NORMAL";
}
vcf_File_rename << endl;
while (getline(vcf_File_original, line))
{
vcf_File_rename << line << endl;
}
cout << "Renaming VCF columns completed: ";
vcf_File_rename.close();
vcf_File_original.close();
cout << vcf_Folder << "/renamed_" << vcf_File_name_only << endl;
}
else
{
cout << "ERROR: UNABLE TO OPEN FILE: " << vcf_File << endl;
}
}
}