-
Notifications
You must be signed in to change notification settings - Fork 0
/
prepare_requirements.nf
120 lines (92 loc) · 2.32 KB
/
prepare_requirements.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
esm_script_path = "esm/scripts/extract.py"
go_basic_url = "https://purl.obolibrary.org/obo/go/go-basic.obo"
esm_git_url = "[email protected]:facebookresearch/esm.git"
gocheck_url = "https://current.geneontology.org/ontology/subsets/gocheck_do_not_annotate.json"
goa_all_url = 'https://ftp.ebi.ac.uk/pub/databases/GO/goa/UNIPROT/goa_uniprot_all.gaf.gz'
uniprot_url = "https://ftp.uniprot.org/pub/databases/uniprot/knowledgebase/complete/uniprot_sprot.fasta.gz"
//ProtT5 swiss_prot
prot_t5_embs_url = "https://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/embeddings/uniprot_sprot/per-protein.h5"
process download_go {
publishDir "databases", mode: 'copy'
input:
val url
output:
path "go-basic.obo", emit: go_basic
script:
"""
wget $url
"""
}
process download_gocheck_do_not_annotate {
publishDir "databases", mode: 'copy'
input:
val url
output:
path "gocheck_do_not_annotate.json", emit: gocheck_do_not_annotate
script:
"""
wget $url
"""
}
process download_esm{
publishDir "libs/", mode: 'copy'
input:
val esm_git
output:
path "esm", emit: esm_dir
script:
"""
git clone $esm_git
"""
}
process download_uniprot{
publishDir "databases", mode: 'copy'
input:
val url
output:
path "uniprot_sprot.fasta.gz", emit: uniprot_fasta
script:
"""
wget $url
"""
}
process download_prot5{
publishDir "databases", mode: 'copy'
input:
val url
output:
path "per-protein.h5", emit: prot5_embs_h5
script:
"""
wget $url
"""
}
process download_uniprot{
publishDir "databases", mode: 'copy'
output:
path "uniprot_sprot.fasta.gz", emit: uniprot_fasta
script:
"""
wget https://ftp.uniprot.org/pub/databases/uniprot/knowledgebase/complete/uniprot_sprot.fasta.gz
"""
}
process download_goa{
publishDir "databases", mode: 'copy'
input:
val url
output:
path "goa_uniprot_all.gaf.gz", emit: go_annotation_raw
script:
"""
wget $url
"""
}
workflow {
download_uniprot(uniprot_url)
download_uniprot()
//download_goa(goa_all_url)
download_go(go_basic_url)
download_esm(esm_git_url)
download_gocheck_do_not_annotate(gocheck_url)
download_prot5(prot_t5_embs_url)
}