-
Notifications
You must be signed in to change notification settings - Fork 2
/
msmc2ms.py
executable file
·58 lines (47 loc) · 2.06 KB
/
msmc2ms.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/usr/bin/env python3
# takes a PSMC' (msmc) output file as input and outputs an input file for ms
def rho(prefix):
'''finds the estimated mutation and recombination rates and returns the ratio'''
with open(prefix + '.log', 'r') as infile:
for line in infile:
if line.startswith('mutationRate'):
m0 = float(line.split()[-1])
break
with open(prefix + '.loop.txt', 'r') as infile:
for line in infile:
pass
r = float(line.split()[0])
return r/m0
# code to run as script:
if __name__ == "__main__":
import sys, argparse, math
parser = argparse.ArgumentParser()
parser.add_argument("msmc", help="path/prefix for MSMC files to take as input")
parser.add_argument("--form", help="desired format of ms output", type=str, choices=['trees','snps','both'], default='both')
parser.add_argument("--chromL", help="Length of chromosome to simulate. Set to 0 to do single-locus sims.", type=int, default=10**6)
args = parser.parse_args()
MSNeT = []
with open(args.msmc + '.final.txt','r') as infile:
for line in infile:
# skip the header:
if not line.startswith('t'):
MSMCparams = [float(x) for x in line.split()]
# MSMCparams should be [index, mu t_left, mu t_right, 1/(2Ne(t) mu)]
# ms needs [t_left/(4Ne(0)), Ne(t)/Ne(0)]
if MSMCparams[0] == 0:
#the first line of data; only use it to normalize the rest (and the mutation and recombination rates)
norm = MSMCparams[-1] # = 1/(2Ne(0) mu)
else:
#check if there has been a change in Ne from the previous time step:
if MSNeT == [] or norm / MSMCparams[-1] != MSNeT[-1][-1]:
MSNeT.append([MSMCparams[1] * norm / 2.0, norm / MSMCparams[-1]])
outstring = ''
if args.form in ('trees','both'):
outstring += '-T '
if args.form in ('snps','both'):
outstring += '-t {} '.format(2 * args.chromL / norm)
if args.chromL:
rho = rho(args.msmc)
outstring += '-r {} {} -p {} '.format(2*args.chromL/norm*rho, args.chromL, math.ceil(math.log10(args.chromL)))
outstring += '-eN ' + ' -eN '.join(' '.join(str(x) for x in timestep) for timestep in MSNeT)
print(outstring)