-
Notifications
You must be signed in to change notification settings - Fork 0
/
config.mk
273 lines (188 loc) · 8.72 KB
/
config.mk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
# -*-makefile-*-
PWD ?= ${shell pwd}
MAKEDIR := $(dir $(lastword ${MAKEFILE_LIST}))
REPOHOME ?= $(dir $(lastword ${MAKEFILE_LIST}))../
include ${MAKEDIR}env.mk
include ${MAKEDIR}slurm.mk
EXCLUDE_BENCHMARKS = flores101-devtest
# tatoeba-test-v2020-07-28 tatoeba-test-v2021-03-30
SPACE := $(empty) $(empty)
## LEADERBOARD specifies the leaderboard category
## (OPUS-MT-leaderboard, External-MT-leaderboard, Contributed-MT-leaderboard)
LEADERBOARD ?= $(filter %-MT-leaderboard,$(subst /, ,${PWD}))
METRICS ?= bleu spbleu chrf chrf++ comet
METRIC ?= $(firstword ${METRICS})
## work directory (for the temporary models)
WORK_HOME ?= ${PWD}/work
MODEL ?= $(firstword ${MODELS})
WORK_DIR ?= ${WORK_HOME}/${MODEL}
## only translate from and to PIVOT (default = English)
PIVOTLANG ?= eng
## set a flag to use target language labels
## in multi-target models
ifneq (${words ${TRGLANGS}},1)
USE_TARGET_LABELS = 1
TARGET_LABELS ?= $(patsubst %,>>%<<,${TRGLANGS})
endif
## parameters for running Marian NMT
MARIAN_GPUS ?= 0
MARIAN_BEAM_SIZE ?= 4
MARIAN_MAX_LENGTH ?= 500
MARIAN_MINI_BATCH ?= 128
MARIAN_MAXI_BATCH ?= 256
# MARIAN_MINI_BATCH ?= 256
# MARIAN_MAXI_BATCH ?= 512
# MARIAN_MINI_BATCH = 512
# MARIAN_MAXI_BATCH = 1024
# MARIAN_MINI_BATCH = 768
# MARIAN_MAXI_BATCH = 2048
MARIAN_DECODER_WORKSPACE = 10000
ifeq ($(GPU_AVAILABLE),1)
MARIAN_DECODER_FLAGS = -b ${MARIAN_BEAM_SIZE} -n1 -d ${MARIAN_GPUS} \
--quiet-translation \
-w ${MARIAN_DECODER_WORKSPACE} \
--mini-batch ${MARIAN_MINI_BATCH} \
--maxi-batch ${MARIAN_MAXI_BATCH} --maxi-batch-sort src \
--max-length ${MARIAN_MAX_LENGTH} --max-length-crop
# --fp16
else
MARIAN_DECODER_FLAGS = -b ${MARIAN_BEAM_SIZE} -n1 --cpu-threads ${HPC_CORES} \
--quiet-translation \
--mini-batch ${HPC_CORES} \
--maxi-batch 100 --maxi-batch-sort src \
--max-length ${MARIAN_MAX_LENGTH} --max-length-crop
endif
GPUJOB_HPC_MEM = 20g
TIME := $(shell which time || echo "time")
FIND_TRANSLATIONS := ${MAKEDIR}tools/find-missing-translations.pl
MERGE_TRANSLATIONS := ${MAKEDIR}tools/merge-with-missing-translations.pl
MONITOR := ${MAKEDIR}tools/monitor
## directory with all test sets (submodule OPUS-MT-testsets)
OPUSMT_TESTSETS := ${REPOHOME}OPUS-MT-testsets
TESTSET_HOME := ${OPUSMT_TESTSETS}/testsets
TESTSET_INDEX := ${OPUSMT_TESTSETS}/index.txt
## model directory (for test results)
## model score file and zipfile with evaluation results
SCORE_HOME ?= ${REPOHOME}scores
MODEL_HOME ?= ${REPOHOME}models
MODEL_DIR := ${MODEL_HOME}/${MODEL}
MODEL_EVALZIP := ${MODEL_DIR}.zip
MODEL_EVALLOGZIP := ${MODEL_DIR}.log.zip
MODEL_EVALALLZIP := ${MODEL_DIR}.eval.zip
MODEL_TESTSETS := ${MODEL_DIR}.testsets.tsv
LEADERBOARD_DIR = ${REPOHOME}scores
SCORE_DB := ${LEADERBOARD_DIR}/${METRIC}_scores.db
SCORE_CSV := ${LEADERBOARD_DIR}/${METRIC}_scores.csv
SCORE_DBS := $(foreach m,${METRICS},${LEADERBOARD_DIR}/${m}_scores.db)
## convenient function to reverse a list
reverse = $(if $(wordlist 2,2,$(1)),$(call reverse,$(wordlist 2,$(words $(1)),$(1))) $(firstword $(1)),$(1))
## MODEL_EVAL_URL: location of the storage space for the evaluation output files
STORAGE_BUCKET := ${LEADERBOARD}
MODEL_STORAGE ?= https://object.pouta.csc.fi/${STORAGE_BUCKET}
MODEL_EVAL_URL := ${MODEL_STORAGE}/${MODEL}.eval.zip
LEADERBOARD_GITURL = https://raw.githubusercontent.com/Helsinki-NLP/${LEADERBOARD}/master
# MODELSCORE_STORAGE = ${LEADERBOARD_GITURL}/models/$(patsubst $(MODEL_HOME)%,%,${MODEL_DIR})
# MODELSCORE_STORAGE = ${LEADERBOARD_GITURL}/models/$(notdir ${MODEL_HOME})
MODELSCORE_STORAGE = ${LEADERBOARD_GITURL}/models
## score files with all evaluation results
## - combination of BLEU and chrF (MODEL_SCORES)
## - for a specific metric (MODEL_METRIC_SCORES)
## - all score files (MODEL_EVAL_SCORES)
MODEL_SCORES := ${MODEL_DIR}.scores.txt
MODEL_METRIC_SCORE := ${MODEL_DIR}.${METRIC}-scores.txt
MODEL_METRIC_SCORES := $(patsubst %,${MODEL_DIR}.%-scores.txt,${METRICS})
MODEL_EVAL_SCORES := ${MODEL_SCORES} ${MODEL_METRIC_SCORES}
#-------------------------------------------------
# all language pairs that the model supports
# find all test sets that we need to consider
#-------------------------------------------------
## if MODEL_LANGPAIRS is not set then simply combine all SRCLANGS with all TRG_LANGS
ifndef MODEL_LANGPAIRS
MODEL_LANGPAIRS := ${shell for s in ${SRC_LANGS}; do \
for t in ${TRG_LANGS}; do echo "$$s-$$t"; done done}
endif
#-------------------------------------------------
# new structure of OPUS-MT-testsets (check index files)
#-------------------------------------------------
TESTSET_FILES := ${OPUSMT_TESTSETS}/testsets.tsv
LANGPAIR_TO_TESTSETS := ${OPUSMT_TESTSETS}/langpair2benchmark.tsv
TESTSETS_TO_LANGPAIR := ${OPUSMT_TESTSETS}/benchmark2langpair.tsv
ifdef LANGPAIRDIR
LANGPAIR = $(lastword $(subst /, ,${LANGPAIRDIR}))
endif
## NOTE that filtering becomes really slow if
## both ALL_LANGPAIRS and MODEL_LANGPAIRS are many!
ALL_LANGPAIRS := $(shell cut -f1 ${LANGPAIR_TO_TESTSETS})
LANGPAIRS := ${sort $(filter ${MODEL_LANGPAIRS},${ALL_LANGPAIRS})}
LANGPAIR ?= ${firstword ${LANGPAIRS}}
LANGPAIRSTR := ${LANGPAIR}
SRC := ${firstword ${subst -, ,${LANGPAIR}}}
TRG := ${lastword ${subst -, ,${LANGPAIR}}}
# get all test sets available for this language pair
# - all testsets from the index
# - all testsets in testset sub directories
TESTSET_DIR := ${TESTSET_HOME}/${LANGPAIR}
TESTSETS := $(sort $(shell grep '^${LANGPAIR} ' ${LANGPAIR_TO_TESTSETS} | cut -f2) \
${notdir ${basename ${wildcard ${TESTSET_DIR}/*.${SRC}}}})
TESTSET ?= $(firstword ${TESTSETS})
TESTSET_SRC := $(patsubst %,${OPUSMT_TESTSETS}/%,\
$(shell grep '^${SRC} ${TRG} ${TESTSET} ' ${TESTSET_FILES} | cut -f7))
TESTSET_REFS := $(patsubst %,${OPUSMT_TESTSETS}/%,\
$(shell grep '^${SRC} ${TRG} ${TESTSET} ' ${TESTSET_FILES} | cut -f8-))
TESTSET_TRG := $(firstword ${TESTSET_REFS})
TESTSET_DOMAINS := $(patsubst %,${OPUSMT_TESTSETS}/%,\
$(shell grep '^${SRC} ${TRG} ${TESTSET} ' ${TESTSET_FILES} | cut -f4))
TESTSET_LABELS := $(patsubst %,${OPUSMT_TESTSETS}/%,\
$(shell grep '^${SRC} ${TRG} ${TESTSET} ' ${TESTSET_FILES} | cut -f6))
ifeq ($(wildcard ${TESTSET_SRC}),)
TESTSET_SRC := ${TESTSET_DIR}/${TESTSET}.${SRC}
endif
ifeq ($(wildcard ${TESTSET_TRG}),)
TESTSET_TRG := ${TESTSET_DIR}/${TESTSET}.${TRG}
TESTSET_REFS := ${TESTSET_TRG}
ifeq ($(wildcard ${TESTSET_TRG}).labels,)
TESTSET_LABELS := ${TESTSET_TRG}.labels
endif
endif
## get all available benchmarks for the current model
## TODO: is this super expensive? (for highly multilingual models)
## TODO: should we also check for each metric what is missing?
## --> yes, this does not scale!
## the assignment below would extract all available benchmarks
## for all supported language pairs in the given model
## --> but this does not scale well for highly multilingual models
## --> do it only once and store the list in a file
#
# AVAILABLE_BENCHMARKS := $(sort \
# $(foreach langpair,${LANGPAIRS},\
# $(patsubst %,${langpair}/%,\
# $(shell grep '^${langpair} ' ${LANGPAIR_TO_TESTSETS} | cut -f2))))
## store available benchmarks for this model in a file
## --> problem: this will be outdated if new benchmarks appear!
ifneq (${MODEL},)
ifneq ($(wildcard $(dir ${MODEL_DIR})),)
ifeq ($(wildcard ${MODEL_TESTSETS}),)
MAKE_BENCHMARK_FILE := \
$(foreach lp,${LANGPAIRS},\
$(shell mkdir -p $(dir ${MODEL_TESTSETS}) && \
grep '^${lp} ' ${LANGPAIR_TO_TESTSETS} | \
cut -f2 | tr ' ' "\n" | \
sed 's|^|${lp}/|' | \
grep -v "/\($(subst ${SPACE},\|,${EXCLUDE_BENCHMARKS})\)$$" >> ${MODEL_TESTSETS}))
endif
endif
AVAILABLE_BENCHMARKS := $(sort $(shell if [ -e ${MODEL_TESTSETS} ]; then cut -f1 ${MODEL_TESTSETS}; fi))
TESTED_BENCHMARKS := $(sort $(shell if [ -e ${MODEL_METRIC_SCORE} ]; then cut -f1,2 ${MODEL_METRIC_SCORE} | tr "\t" '/'; fi))
MISSING_BENCHMARKS := $(filter-out ${TESTED_BENCHMARKS},${AVAILABLE_BENCHMARKS})
SYSTEM_INPUT := ${MODEL_DIR}/${TESTSET}.${LANGPAIR}.input
SYSTEM_OUTPUT := ${MODEL_DIR}/${TESTSET}.${LANGPAIR}.output
TRANSLATED_BENCHMARK := ${MODEL_DIR}/${TESTSET}.${LANGPAIR}.compare
EVALUATED_BENCHMARK := ${MODEL_DIR}/${TESTSET}.${LANGPAIR}.eval
endif
.INTERMEDIATE: ${SYSTEM_INPUT}
TRANSLATED_BENCHMARKS := $(patsubst %,${MODEL_DIR}/%.${LANGPAIR}.compare,${TESTSETS})
EVALUATED_BENCHMARKS := $(patsubst %,${MODEL_DIR}/%.${LANGPAIR}.eval,${TESTSETS})
# EVALUATED_BENCHMARKS := $(patsubst %,${MODEL_DIR}/%.${LANGPAIR}.evalfiles.zip,${TESTSETS})
BENCHMARK_SCORE_FILES := $(foreach m,${METRICS},${MODEL_DIR}/${TESTSET}.${LANGPAIR}.${m})
## don't delete those files when used in implicit rules
.NOTINTERMEDIATE: ${TRANSLATED_BENCHMARKS} ${EVALUATED_BENCHMARKS} ${BENCHMARK_SCORE_FILES}