-
Notifications
You must be signed in to change notification settings - Fork 10
/
bigfat
executable file
·88 lines (77 loc) · 2.09 KB
/
bigfat
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/usr/bin/env bash
set -e
set -o pipefail
scriptDir=$(dirname $0)
# Runs a BigFatLM task
# Does not string tasks together in any particular order
if [[ $# == 0 ]]; then
java -jar $scriptDir/bigfat.jar
fi
set -x
subtask=$1
shift
otherArgs="$@"
# Assume task is subtask unless otherwise
task=$subtask
if which hadoop; then
haveHadoop=1
numTasks=400
else
haveHadoop=""
numTasks=1
fi
# 1) Prepare the hadoop invocation line
case "$subtask" in
# applies to interpOrders and interpModels
interp*)
numTasks=$(($numTasks/4))
if (( $numTasks < 1 )); then
numTasks=1
fi
# MAPREDUCE-478 - Separate map/reduce jvm opts
run="hadoop jar bigfat.jar \
-Dmapred.job.map.memory.mb=3584 \
-Dmapred.job.queue.name=m45 \
-Dmapred.map.tasks=$numTasks \
-Dmapred.reduce.tasks=$numTasks \
-Dmapred.map.tasks.speculative.execution=True \
-Dmapred.reduce.tasks.speculative.execution=True \
-Dmapred.map.child.java.opts='-Xmx3000m' \
-Dmapred.reduce.child.java.opts='-Xmx1500m'"
otherArgs="$otherArgs --numReducers $numTasks"
;;
*)
run="hadoop jar $scriptDir/bigfat.jar \
-Dmapred.job.map.memory.mb=3584 \
-Dmapred.job.queue.name=m45 \
-Dmapred.map.tasks=$numTasks \
-Dmapred.reduce.tasks=$numTasks \
-Dmapred.map.tasks.speculative.execution=True \
-Dmapred.reduce.tasks.speculative.execution=True \
-Dmapred.map.child.java.opts='-Xmx3000m' \
-Dmapred.reduce.child.java.opts='-Xmx1500m'"
;;
esac
# Check if we're running locally
if [ ! $haveHadoop ]; then
run="java -jar $scriptDir/bigfat.jar"
fi
# 2) Run the command
case "$task" in
filterForPP)
# We can use a very strict phrase constraint
# when filtering for perplexity
# Each sentence is treated as one big "phrase"
#
# otherArgs should contain "inFile outFile"
$scriptDir/filter-lm.sh $otherArgs phrase
;;
filter)
$scriptDir/filter-lm.sh $otherArgs
;;
*)
# TODO: How do we pass args here...
# otherArgs should contain "inFile outFile"
eval "$run $task $otherArgs"
;;
esac