-
Notifications
You must be signed in to change notification settings - Fork 0
/
MAGMA_automation.py
63 lines (41 loc) · 2.87 KB
/
MAGMA_automation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Mar 28 15:15:23 2019
Automation of MAGMA pipeline
@author: williamreay
"""
import os
import sys
import argparse
from subprocess import Popen
import pdb
##Add command line flags for automation of MAGMA protocol, set argsDict variable to be a dictionary containing the command line arguments
parser = argparse.ArgumentParser('MAGMA gene level and geneset level association analyses')
parser.add_argument('--gwasfile', metavar=['filename'], type = str, help = 'GWAS summary statistics in the following format - SNP, CHR, BP, P')
parser.add_argument('--geneloc', metavar=['filename'], type = str, help = 'Gene location file - ID, BP1, BP2')
parser.add_argument('--phenotype', metavar='[str]', type = str, help = 'GWAS phenotype name')
parser.add_argument('--bfile', metavar= '{prefix}', type = str, help = 'Reference population plink binary fileset - .bed, .bim + .fam')
parser.add_argument('--samplesize', metavar='[str]', type = str, help = 'Sample size which GWAS performed on - cases + controls')
parser.add_argument('--genesets', metavar=['filename'], type = str, help = 'Input pathways in standard MSigDB format, see MAGMA manual for further clarification')
inputFlags = parser.parse_args()
print(inputFlags)
##Annotate GWAS SNPs to genes, set genic boundaries to encompass 5 kb upstream and 1.5 kb downstream
def geneAnnotation(gwasfile, geneloc, phenotype):
print("Annotate SNPs from GWAS file to genes with 5kb upstream and 1.5 kb downstream boundaries")
Popen("""./magma --annotate window=5,1.5 --snp-loc """ + gwasfile + """ --gene-loc """ + geneloc + """ --out """ + phenotype, shell=True).wait()
geneAnnotation(inputFlags.gwasfile, inputFlags.geneloc, inputFlags.phenotype)
##Format GWAS file for MAGMA gene level association
def pThresholds(gwasfile, phenotype):
Popen(""" awk ' { print $1, $4} ' """ + gwasfile + """ >> """ + phenotype + """allSNPs.loc""", shell=True).wait()
pThresholds(inputFlags.gwasfile, inputFlags.phenotype)
##Gene level association analysis using SNPwise mean model at each P value threshold
def geneAssociation(bfile, samplesize, phenotype):
print("MAGMA gene level association analysis at each p value threshold")
Popen(""" ./magma --bfile """ + bfile + """ --pval """ + phenotype + """allSNPs.loc N=""" + samplesize + """ --gene-annot """ + phenotype + """.genes.annot --out """ + phenotype + """_allSNPs""", shell=True).wait()
geneAssociation(inputFlags.bfile, inputFlags.samplesize, inputFlags.phenotype)
##Geneset association on MSigDB pathways
def geneSetAssociation(genesets, phenotype):
print("MAGMA geneset association analysis at each p value threshold")
Popen(""" ./magma --gene-results """ + phenotype + """_allSNPs.genes.raw --set-annot """ + genesets + """ --out """ + phenotype + """allSNPs""", shell=True).wait()
geneSetAssociation(inputFlags.genesets, inputFlags.phenotype)