forked from horovod/horovod
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path.travis.yml
157 lines (132 loc) · 7.76 KB
/
.travis.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
dist: trusty
language: python
python:
- "2.7"
- "3.5"
- "3.6"
services:
- docker
before_install:
# force latest Ubuntu for Python 3.6 and nightly TensorFlow which requires new glibc
- |
if [[ ${TRAVIS_PYTHON_VERSION} == "3.6" || ${TF_PACKAGE} == "tf-nightly" ]]; then
export UBUNTU=18.04
else
export UBUNTU=16.04
fi
- docker pull ubuntu:${UBUNTU}
# run docker container for an hour
- docker run -v `pwd`:/horovod ubuntu:${UBUNTU} /bin/sh -c "sleep 3600" &
# wait for docker to start
- sleep 5
- export CONTAINER=$(docker ps -q | head -n 1)
- docker exec ${CONTAINER} /bin/sh -c "apt-get update -qq"
# install necessary network tools
- docker exec ${CONTAINER} /bin/sh -c "apt-get install -y wget openssh-client git build-essential"
# install OpenJDK 8 for PySpark
- docker exec ${CONTAINER} /bin/sh -c "apt install -y openjdk-8-jdk-headless"
# install Python and add a proper symlink
- |
if [[ ${TRAVIS_PYTHON_VERSION} == "3.6" ]]; then
docker exec ${CONTAINER} /bin/sh -c "apt-get install -y python${TRAVIS_PYTHON_VERSION} python${TRAVIS_PYTHON_VERSION}-dev python3-distutils"
else
docker exec ${CONTAINER} /bin/sh -c "apt-get install -y python${TRAVIS_PYTHON_VERSION} python${TRAVIS_PYTHON_VERSION}-dev"
fi
- docker exec ${CONTAINER} /bin/sh -c "ln -s /usr/bin/python${TRAVIS_PYTHON_VERSION} /usr/bin/python"
- docker exec ${CONTAINER} /bin/sh -c "wget https://bootstrap.pypa.io/get-pip.py && python get-pip.py && rm get-pip.py"
- docker exec ${CONTAINER} /bin/sh -c "pip install -U --force pip setuptools requests"
# install PySpark
- docker exec ${CONTAINER} /bin/sh -c "pip install pyspark==${PYSPARK}"
env:
matrix:
- TF_PACKAGE=tensorflow==1.1.0 KERAS_PACKAGE=keras==2.0.0 PYTORCH_PACKAGE=torch==0.4.0 MXNET_PACKAGE=mxnet-gcc5 MPI=OpenMPI PYSPARK=2.1.2
- TF_PACKAGE=tensorflow==1.6.0 KERAS_PACKAGE=keras==2.1.2 PYTORCH_PACKAGE=torch==0.4.1 MXNET_PACKAGE=mxnet-gcc5 MPI=OpenMPI PYSPARK=2.3.2
- TF_PACKAGE=tensorflow==1.12.0 KERAS_PACKAGE=keras==2.2.2 PYTORCH_PACKAGE=torch==1.0.0 MXNET_PACKAGE=mxnet-gcc5 MPI=OpenMPI PYSPARK=2.4.0
- TF_PACKAGE=tf-nightly KERAS_PACKAGE=git+https://github.com/keras-team/keras.git PYTORCH_PACKAGE=torch-nightly MXNET_PACKAGE=mxnet-gcc5 MPI=OpenMPI PYSPARK=2.4.0
- TF_PACKAGE=tensorflow==1.12.0 KERAS_PACKAGE=keras==2.2.2 PYTORCH_PACKAGE=torch==1.0.0 MXNET_PACKAGE=mxnet-gcc5 MPI=MPICH PYSPARK=2.4.0
matrix:
fast_finish: true
exclude:
- python: "3.5"
env: TF_PACKAGE=tensorflow==1.6.0 KERAS_PACKAGE=keras==2.1.2 PYTORCH_PACKAGE=torch==0.4.0 MXNET_PACKAGE=mxnet-gcc5 MPI=OpenMPI PYSPARK=2.3.2
- python: "3.6"
env: TF_PACKAGE=tensorflow==1.6.0 KERAS_PACKAGE=keras==2.1.2 PYTORCH_PACKAGE=torch==0.4.1 MXNET_PACKAGE=mxnet-gcc5 MPI=OpenMPI PYSPARK=2.3.2
- python: "3.5"
env: TF_PACKAGE=tensorflow==1.12.0 KERAS_PACKAGE=keras==2.2.2 PYTORCH_PACKAGE=torch==1.0.0 MXNET_PACKAGE=mxnet-gcc5 MPI=MPICH PYSPARK=2.4.0
- python: "3.6"
env: TF_PACKAGE=tensorflow==1.12.0 KERAS_PACKAGE=keras==2.2.2 PYTORCH_PACKAGE=torch==1.0.0 MXNET_PACKAGE=mxnet-gcc5 MPI=MPICH PYSPARK=2.4.0
- python: "3.5"
env: TF_PACKAGE=tf-nightly KERAS_PACKAGE=git+https://github.com/keras-team/keras.git PYTORCH_PACKAGE=torch-nightly MXNET_PACKAGE=mxnet-gcc5 MPI=OpenMPI PYSPARK=2.4.0
install:
- |
if [[ ${MPI} == "OpenMPI" ]]; then
docker exec ${CONTAINER} /bin/sh -c "wget -O /tmp/openmpi-3.0.0-bin.tar.gz https://github.com/uber/horovod/files/1596799/openmpi-3.0.0-bin.tar.gz"
docker exec ${CONTAINER} /bin/sh -c "cd /usr/local && tar -zxf /tmp/openmpi-3.0.0-bin.tar.gz && ldconfig"
else
# installs mpich version 3.0.4
docker exec ${CONTAINER} /bin/sh -c "apt-get install -y mpich"
fi
# TensorFlow
- docker exec ${CONTAINER} /bin/sh -c "pip install ${TF_PACKAGE}"
# Keras & dependencies
- docker exec ${CONTAINER} /bin/sh -c "pip install ${KERAS_PACKAGE} h5py scipy pandas"
# PyTorch dependencies
- docker exec ${CONTAINER} /bin/sh -c "pip install future typing"
# PyTorch
- |
if [[ ${PYTORCH_PACKAGE} == "torch-nightly" ]]; then
docker exec ${CONTAINER} /bin/sh -c "pip install torchvision"
docker exec ${CONTAINER} /bin/sh -c "pip uninstall -y torch"
docker exec ${CONTAINER} /bin/sh -c "pip install torch_nightly -v -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html"
else
docker exec ${CONTAINER} /bin/sh -c "pip install ${PYTORCH_PACKAGE} torchvision"
fi
# MXNet
- docker exec ${CONTAINER} /bin/sh -c "pip install ${MXNET_PACKAGE}"
# Horovod
- docker exec ${CONTAINER} /bin/sh -c "cd /horovod && python setup.py sdist"
- docker exec ${CONTAINER} /bin/sh -c "pip install -v /horovod/dist/horovod-*.tar.gz"
script:
- |
if [[ ${MPI} == "OpenMPI" ]]; then
export MPIRUN="mpirun -allow-run-as-root -np 2 -H localhost:2 -bind-to none -map-by slot -mca mpi_abort_print_stack 1"
else
export MPIRUN="mpirun -np 2"
fi
# prepare ~/.keras folder to avoid race condition
- docker exec ${CONTAINER} /bin/sh -c "mkdir -p ~/.keras"
# run unit tests
- docker exec ${CONTAINER} /bin/sh -c "pip install pytest && cd /horovod/test && (echo test_*.py | xargs -n 1 ${MPIRUN} pytest -v)"
# hack for compatibility of MNIST example with tf 1.1.0
- |
if [[ ${TF_PACKAGE} == "tensorflow==1.1.0" ]]; then
docker exec ${CONTAINER} /bin/sh -c "sed -i \"s/from tensorflow import keras/from tensorflow.contrib import keras/\" /horovod/examples/tensorflow_mnist.py"
fi
# hack TensorFlow MNIST example to be smaller
- docker exec ${CONTAINER} /bin/sh -c "sed -i \"s/last_step=20000/last_step=100/\" /horovod/examples/tensorflow_mnist.py"
# run TensorFlow MNIST example
- docker exec ${CONTAINER} /bin/sh -c "${MPIRUN} python /horovod/examples/tensorflow_mnist.py"
# hack TensorFlow Eager MNIST example to be smaller
- docker exec ${CONTAINER} /bin/sh -c "sed -i \"s/dataset.take(20000/dataset.take(100/\" /horovod/examples/tensorflow_mnist_eager.py"
# run TensorFlow Eager MNIST example
- |
if [[ ${TF_PACKAGE} == "tensorflow==1.12.0" ]]; then
docker exec ${CONTAINER} /bin/sh -c "${MPIRUN} python /horovod/examples/tensorflow_mnist_eager.py"
fi
# download Keras MNIST dataset
- docker exec ${CONTAINER} /bin/sh -c "python -c \"from keras.datasets import mnist; mnist.load_data()\""
# hack Keras MNIST advanced example to be smaller
- docker exec ${CONTAINER} /bin/sh -c "sed -i \"s/epochs = .*/epochs = 12/\" /horovod/examples/keras_mnist_advanced.py"
- docker exec ${CONTAINER} /bin/sh -c "sed -i \"s/model.add(Conv2D(32, kernel_size=(3, 3),/model.add(Conv2D(1, kernel_size=(3, 3),/\" /horovod/examples/keras_mnist_advanced.py"
- docker exec ${CONTAINER} /bin/sh -c "sed -i \"s/model.add(Conv2D(64, (3, 3), activation='relu'))//\" /horovod/examples/keras_mnist_advanced.py"
# run Keras MNIST advanced example
- docker exec ${CONTAINER} /bin/sh -c "${MPIRUN} python /horovod/examples/keras_mnist_advanced.py"
# hack PyTorch MNIST example to be smaller
- docker exec ${CONTAINER} /bin/sh -c "sed -i \"s/self.fc1 = nn.Linear(320, 50)/self.fc1 = nn.Linear(784, 50)/\" /horovod/examples/pytorch_mnist.py"
- docker exec ${CONTAINER} /bin/sh -c "sed -i \"s/x = F.relu(F.max_pool2d(self.conv1(x), 2))//\" /horovod/examples/pytorch_mnist.py"
- docker exec ${CONTAINER} /bin/sh -c "sed -i \"s/x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))//\" /horovod/examples/pytorch_mnist.py"
- docker exec ${CONTAINER} /bin/sh -c "sed -i \"s/x = x.view(-1, 320)/x = x.view(-1, 784)/\" /horovod/examples/pytorch_mnist.py"
# run PyTorch MNIST example
- docker exec ${CONTAINER} /bin/sh -c "${MPIRUN} python /horovod/examples/pytorch_mnist.py"
# run MXNet MNIST example
- docker exec ${CONTAINER} /bin/sh -c "${MPIRUN} python /horovod/examples/mxnet_mnist.py"