forked from kennethbruskiewicz/Sleep_Apnea_WF2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
gene_freq.py
48 lines (38 loc) · 1.18 KB
/
gene_freq.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import sys
import os
import csv
if len(sys.argv) < 3:
#No input file specified
print("ERROR! Usage: python3 gene_freq.py <input_file> <number_of_samples> [frequency_threshold]")
elif len(sys.argv) == 3:
#Set input file
infile_path = sys.argv[1]
#Set number of samples
n = int(sys.argv[2])
#Default to 0.8 if unspecified
freq_thresh = 0.8
else:
#Set input file
infile_path = sys.argv[1]
#Set number of samples
n = int(sys.argv[2])
try:
#Set to second argument
freq_thresh = float(sys.argv[3])
#Default to 0.8 if invalid
if freq_thresh > 1:
freq_thresh = 0.8
except:
#Default to 0.8 if invalid
freq_thresh = 0.8
inpath = infile_path.split('/')[0]
infile = infile_path.split('/')[1]
r = csv.reader(open(infile_path, 'r'))
gene_counts = {rows[0]: rows[1] for rows in r}
for k in gene_counts:
gene_counts[k] = int(gene_counts[k])
freq_genes = {key: gene_counts[key] for key in gene_counts if gene_counts[key] / n >= freq_thresh}
outfile = inpath + "/gene_thresh_" + str(freq_thresh) + ".csv"
w = csv.writer(open(outfile, 'w'))
for k, v in freq_genes.items():
w.writerow([k, v])