-
Notifications
You must be signed in to change notification settings - Fork 0
/
env.mk
376 lines (282 loc) · 11.3 KB
/
env.mk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
# -*-makefile-*-
#
# settings of the environment
# - essential tools and their paths
# - system-specific settings
#
SHELL := /bin/bash
PWD ?= ${shell pwd}
MAKEDIR := $(dir $(lastword ${MAKEFILE_LIST}))
REPOHOME ?= $(dir $(lastword ${MAKEFILE_LIST}))../
TODAY := $(shell date +%F)
# job-specific settings (overwrite if necessary)
# HPC_EXTRA: additional SBATCH commands
NR_GPUS = 1
HPC_NODES = 1
# HPC_DISK = 500
HPC_QUEUE = serial
HPC_GPUQUEUE = gpu
MEM ?= 4g
CORES ?= 1
WALLTIME ?= 72
GPU = v100
DEVICE = cuda
LOAD_CPU_ENV = echo "nothing to load"
LOAD_GPU_ENV = echo "nothing to load"
## default SLURM option to allocate GPU resources
HPC_GPU_ALLOCATION = --gres=gpu:${GPU}:${NR_GPUS}
WORKHOME ?= ${PWD}/work
## anything that needs to be done to load
## the build environment for specific software
LOAD_BUILD_ENV = echo "nothing to load"
LOAD_MARIAN_BUILD_ENV = ${LOAD_BUILD_ENV}
LOAD_EXTRACTLEX_BUILD_ENV = ${LOAD_BUILD_ENV}
## load system-specific environments
ifeq (${shell hostname -d 2>/dev/null},mahti.csc.fi)
HPC_HOST = mahti
include ${MAKEDIR}env/mahti.mk
else ifeq (${shell hostname},dx6-ibs-p2)
HPC_HOST = dx6
include ${MAKEDIR}env/dx6.mk
else ifeq (${shell hostname},dx7-nkiel-4gpu)
HPC_HOST = dx7
include ${MAKEDIR}env/dx7.mk
else ifeq (${shell hostname --domain 2>/dev/null},bullx)
HPC_HOST = puhti
include ${MAKEDIR}env/puhti.mk
endif
## default settings for CPU cores to be used
CPU_CORES ?= ${CORES}
THREADS ?= ${CPU_CORES}
## set variables with HPC prefix
## (this is mostly for backwards compatibility)
HPC_TIME ?= ${WALLTIME}:00
HPC_CORES ?= ${CPU_CORES}
HPC_THREADS ?= ${HPC_CORES}
HPC_MEM ?= ${MEM}
## number parallel jobs in make
## (for slurm jobs)
ifdef JOBS
HPC_JOBS ?= ${JOBS}
else
JOBS ?= ${THREADS}
HPC_JOBS ?= ${HPC_THREADS}
endif
SUBMIT_PREFIX ?= submit
ifndef TMPDIR
TMPDIR := /tmp
endif
ifndef TMPWORKDIR
TMPWORKDIR := ${shell mktemp -d}
endif
export TMPWORKDIR
## tools and their locations
SCRIPTDIR ?= ${REPOHOME}scripts
TOOLSDIR ?= ${REPOHOME}tools
ISO639 ?= ${shell which iso639 2>/dev/null || echo 'perl ${TOOLSDIR}/LanguageCodes/ISO-639-3/bin/iso639'}
PIGZ ?= ${shell which pigz 2>/dev/null || echo ${TOOLSDIR}/pigz/pigz}
TERASHUF ?= ${shell which terashuf 2>/dev/null || echo ${TOOLSDIR}/terashuf/terashuf}
JQ ?= ${shell which jq 2>/dev/null || echo ${TOOLSDIR}/jq/jq}
PROTOC ?= ${shell which protoc 2>/dev/null || echo ${TOOLSDIR}/protobuf/bin/protoc}
MARIAN ?= ${shell which marian 2>/dev/null || echo ${TOOLSDIR}/marian-dev/build/marian}
MARIAN_HOME ?= $(dir ${MARIAN})
SPM_HOME ?= ${dir ${MARIAN}}
FASTALIGN ?= ${shell which fast_align 2>/dev/null || echo ${TOOLSDIR}/fast_align/build/fast_align}
FASTALIGN_HOME ?= ${dir ${FASTALIGN}}
ATOOLS ?= ${FASTALIGN_HOME}atools
EFLOMAL ?= ${shell which eflomal 2>/dev/null || echo ${TOOLSDIR}/eflomal/eflomal}
EFLOMAL_HOME ?= ${dir ${EFLOMAL}}
WORDALIGN ?= ${EFLOMAL_HOME}align.py
EFLOMAL ?= ${EFLOMAL_HOME}eflomal
EXTRACT_LEX ?= ${shell which extract_lex 2>/dev/null || echo ${TOOLSDIR}/extract-lex/build/extract_lex}
MOSESSCRIPTS ?= ${TOOLSDIR}/moses-scripts/scripts
TMX2MOSES ?= ${shell which tmx2moses 2>/dev/null || echo ${TOOLSDIR}/OpusTools-perl/scripts/convert/tmx2moses}
GET_ISO_CODE ?= ${ISO639} -m
## marian-nmt binaries
MARIAN_TRAIN = ${MARIAN_HOME}marian
MARIAN_DECODER = ${MARIAN_HOME}marian-decoder
MARIAN_SCORER = ${MARIAN_HOME}marian-scorer
MARIAN_VOCAB = ${MARIAN_HOME}marian-vocab
TOKENIZER = ${MOSESSCRIPTS}/tokenizer
##--------------------------------------------------------
## Tools for creating efficient student models:
##
## browsermt branch of marian-nmt
## https://github.com/browsermt/marian-dev
##--------------------------------------------------------
BROWSERMT_HOME ?= ${TOOLSDIR}/browsermt
BROWSERMT_TRAIN = ${BROWSERMT_HOME}/marian-dev/build/marian
BROWSERMT_DECODE = ${BROWSERMT_HOME}/marian-dev/build/marian-decoder
BROWSERMT_CONVERT = ${BROWSERMT_HOME}/marian-dev/build/marian-conv
## BPE
SUBWORD_BPE ?= ${shell which subword-nmt 2>/dev/null || echo ${TOOLSDIR}/subword-nmt/subword_nmt/subword_nmt.py}
SUBWORD_HOME ?= ${dir ${SUBWORD_BPE}}
ifeq (${shell which subword-nmt 2>/dev/null},)
BPE_LEARN ?= python3 ${SUBWORD_HOME}/learn_bpe.py
BPE_APPLY ?= python3 ${SUBWORD_HOME}/apply_bpe.py
else
BPE_LEARN ?= ${SUBWORD_BPE} learn-bpe
BPE_APPLY ?= ${SUBWORD_BPE} apply-bpe
endif
## SentencePiece
SPM_TRAIN = ${SPM_HOME}spm_train
SPM_ENCODE = ${SPM_HOME}spm_encode
SORT := sort -T ${TMPDIR} --parallel=${THREADS}
SHUFFLE := ${shell which ${TERASHUF} 2>/dev/null || echo "${SORT} --random-sort"}
GZIP := ${shell which ${PIGZ} 2>/dev/null || echo gzip}
GZCAT := ${GZIP} -cd
ZCAT := gzip -cd
UNIQ := ${SORT} -u
WGET := wget -T 1
## check that we have a GPU available
## TODO: this assumes that we have nvidia-smi on the system
NVIDIA_SMI := ${shell which nvidia-smi 2>/dev/null}
ifneq ($(wildcard ${NVIDIA_SMI}),)
ifeq (${shell nvidia-smi | grep failed | wc -l},1)
MARIAN_BUILD_OPTIONS += -DCOMPILE_CUDA=off
LOAD_ENV = ${LOAD_CPU_ENV}
else
GPU_AVAILABLE = 1
LOAD_ENV = ${LOAD_GPU_ENV}
endif
else
MARIAN_BUILD_OPTIONS += -DCOMPILE_CUDA=off
LOAD_ENV = ${LOAD_CPU_ENV}
endif
COMET_SCORE ?= comet-score
## install prerequisites
##
## TODO: add OpusFilter?
PREREQ_TOOLS := $(lastword ${ISO639}) ${ATOOLS} ${PIGZ} ${TERASHUF} ${MARIAN} ${EFLOMAL} ${TMX2MOSES}
PREREQ_PERL := ISO::639::3 ISO::639::5 OPUS::Tools XML::Parser
## additional tools:
## - extract-lex for extracting short lists
## - browsermt_train for quantization
## - jq to extract text from cirrus-search dumps of wikipedia (for back-transaltion)
##
## install those with `make install-all`
EXTRA_TOOLS := ${EXTRACT_LEX} ${BROWSERMT_TRAIN} ${JQ}
PIP := ${shell which pip3 2>/dev/null || echo pip}
CPAN := ${shell which cpanm 2>/dev/null || echo cpan}
PIP := ${shell ${LOAD_BUILD_ENV} >/dev/null 2>/dev/null && which pip3 2>/dev/null || echo pip}
CPAN := ${shell ${LOAD_BUILD_ENV} >/dev/null 2>/dev/null && which cpanm 2>/dev/null || echo cpan}
## setup local Perl environment
## better install local::lib and put this into your .bashrc:
##
## eval "$(perl -I$HOME/perl5/lib/perl5 -Mlocal::lib)"
export PATH := ${HOME}/perl5/bin:${PATH}:${MARIAN_HOME}:${SPM_HOME}:${FASTALIGN_HOME}
export PERL5LIB := ${HOME}/perl5/lib/perl5:${PERL5LIB}}
export PERL_LOCAL_LIB_ROOT := ${HOME}/perl5:${PERL_LOCAL_LIB_ROOT}}
export PERL_MB_OPT := --install_base "${HOME}/perl5"
export PERL_MM_OPT := INSTALL_BASE=${HOME}/perl5
## quick hack to fix a problem in marian-dev submodule fbgemm
## --> googletest changed to 'main' from 'master' (stupid)
## TODO: remove this again once it is not needed anymore!
PHONY: install install-prerequisites install-prereq install-requirements
install install-prerequisites install-prereq install-requirements:
-git submodule update --init --recursive --remote
cp tools/marian-dev/src/3rd_party/fbgemm/.gitmodules \
tools/marian-dev/src/3rd_party/fbgemm/.gitmodules.backup
cat tools/marian-dev/src/3rd_party/fbgemm/.gitmodules.backup |\
sed 's#google/googletest#google/googletest| branch = main#' | tr '|' "\n" | uniq \
> tools/marian-dev/src/3rd_party/fbgemm/.gitmodules
cp tools/browsermt/marian-dev/src/3rd_party/fbgemm/.gitmodules \
tools/browsermt/marian-dev/src/3rd_party/fbgemm/.gitmodules.backup
cat tools/browsermt/marian-dev/src/3rd_party/fbgemm/.gitmodules.backup |\
sed 's#google/googletest#google/googletest| branch = main#' | tr '|' "\n" | uniq \
> tools/browsermt/marian-dev/src/3rd_party/fbgemm/.gitmodules
git submodule update --init --recursive --remote
${LOAD_BUILD_ENV} && ${PIP} install --user -r requirements.txt
${LOAD_BUILD_ENV} && ${MAKE} install-perl-modules
${LOAD_BUILD_ENV} && ${MAKE} ${PREREQ_TOOLS}
if [ ! -e scores ]; then \
ln -s OPUS-MT-leaderboard/scores scores; \
fi
PHONY: install-all
install-all: install install-extra-tools
.PHONY: install-prereq-tools
install-prereq-tools:
${LOAD_BUILD_ENV} && ${MAKE} ${PREREQ_TOOLS}
.PHONY: install-perl-modules
install-perl-modules:
for p in ${PREREQ_PERL}; do \
perl -e "use $$p;" 2> /dev/null || ${CPAN} -i $$p; \
done
.PHONY: install-extra-tools
install-extra-tools:
${LOAD_BUILD_ENV} && ${MAKE} ${EXTRA_TOOLS}
${TOOLSDIR}/LanguageCodes/ISO-639-3/bin/iso639:
${MAKE} tools/LanguageCodes/ISO-639-5/lib/ISO/639/5.pm
${TOOLSDIR}/LanguageCodes/ISO-639-5/lib/ISO/639/5.pm:
${MAKE} -C tools/LanguageCodes all
${TOOLSDIR}/fast_align/build/atools:
mkdir -p ${dir $@}
cd ${dir $@} && cmake ..
${MAKE} -C ${dir $@}
${TOOLSDIR}/pigz/pigz:
${MAKE} -C ${dir $@}
## Don't need this anymore - it's a submodule
# mkdir -p ${TOOLSDIR}
# cd ${TOOLSDIR} && git clone https://github.com/alexandres/terashuf.git
${TOOLSDIR}/terashuf/terashuf:
${MAKE} -C ${dir $@}
${TOOLSDIR}/jq/jq:
cd ${dir $@} && git submodule update --init
cd ${dir $@} && autoreconf -fi
cd ${dir $@} && ./configure --with-oniguruma=builtin
${MAKE} -C ${dir $@} all
## For Mac users:
## - install protobuf: sudo port install protobuf3-cpp
## - install MKL (especially for cpu use):
## file:///opt/intel/documentation_2020/en/mkl/ps2020/get_started.htm
##
## TODO: do we still need to compile protobuf?
${TOOLSDIR}/marian-dev/build/marian: # ${PROTOC}
mkdir -p ${dir $@}
cd ${dir $@} && ${LOAD_MARIAN_BUILD_ENV} && cmake -DUSE_SENTENCEPIECE=on ${MARIAN_BUILD_OPTIONS} ..
${LOAD_MARIAN_BUILD_ENV} && ${MAKE} -C ${dir $@} -j8
${TOOLSDIR}/browsermt/marian-dev/build/marian: # ${PROTOC}
mkdir -p ${dir $@}
cd ${dir $@} && ${LOAD_MARIAN_BUILD_ENV} && cmake ..
${LOAD_MARIAN_BUILD_ENV} && ${MAKE} -C ${dir $@} -j8
## OBSOLETE?
${TOOLSDIR}/protobuf/bin/protoc:
mkdir -p ${TOOLSDIR}
if [ ! -e ${dir $@} ]; then \
cd ${TOOLSDIR} && git clone https://github.com/protocolbuffers/protobuf.git; \
fi
cd ${TOOLSDIR}/protobuf && git submodule update --init --recursive
cd ${TOOLSDIR}/protobuf && ./autogen.sh
cd ${TOOLSDIR}/protobuf && ./configure --prefix=${TOOLSDIR}/protobuf
${MAKE} -C ${TOOLSDIR}/protobuf
${TOOLSDIR}/extract-lex/build/extract_lex:
mkdir -p ${TOOLSDIR}
if [ ! -e ${TOOLSDIR}/extract-lex ]; then \
cd ${TOOLSDIR} && git clone https://github.com/marian-nmt/extract-lex; \
fi
mkdir -p ${dir $@}
cd ${dir $@} && ${LOAD_EXTRACTLEX_BUILD_ENV} && cmake ..
${LOAD_EXTRACTLEX_BUILD_ENV} && ${MAKE} -C ${dir $@} -j4
## for Mac users: use gcc to compile eflomal
##
## sudo port install gcc10
## gcc-mp-10 -Ofast -march=native -Wall --std=gnu99 -Wno-unused-function -g -fopenmp -c eflomal.c
## gcc-mp-10 -lm -lgomp -fopenmp eflomal.o -o eflomal
##
## sudo port install llvm-devel py-cython py-numpy
## sudo port select --set python python38
## sudo port select --set python3 python38
## sudo port select --set cython cython38
## cd tools/efmoral
## sudo env python3 setup.py install
.PHONY: install-eflomal
install-eflomal:
${TOOLSDIR}/eflomal/eflomal:
${MAKE} -C ${dir $@} all
cd ${dir $@} && python3 setup.py install --user
# python3 setup.py install --install-dir ${HOME}/.local
${TOOLSDIR}/OpusTools-perl/scripts/convert/tmx2moses:
mkdir -p ${TOOLSDIR}
cd ${TOOLSDIR} && https://github.com/Helsinki-NLP/OpusTools-perl
cd ${TOOLSDIR}/OpusTools-perl && perl Makefile.PL
cd ${TOOLSDIR}/OpusTools-perl && ${MAKE} install