diff --git a/docker/Dockerfile.lumi b/docker/Dockerfile.lumi index 2a330f146..5c99998ff 100644 --- a/docker/Dockerfile.lumi +++ b/docker/Dockerfile.lumi @@ -1,4 +1,4 @@ -FROM ubuntu:latest +FROM ubuntu:jammy ENV DEBIAN_FRONTEND=noninteractive ENV LC_ALL=C.UTF-8 @@ -21,9 +21,9 @@ RUN apt-get install -y \ vim \ fish \ wget \ + unzip \ parallel \ s3cmd \ - awscli \ htop \ wget \ fish \ @@ -38,17 +38,22 @@ RUN apt-get install -y \ gdb \ apt-utils +RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" +RUN unzip awscliv2.zip +RUN ./aws/install +RUN rm -r ./aws/ + # Install Google tools RUN echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list RUN curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - RUN apt-get update -RUN apt-get install google-cloud-cli +RUN apt-get install -y google-cloud-cli # Install ROCm RUN mkdir --parents --mode=0755 /etc/apt/keyrings && \ wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | tee /etc/apt/keyrings/rocm.gpg > /dev/null && \ - echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/amdgpu/5.6/ubuntu jammy main" >> /etc/apt/sources.list.d/amdgpu.list && \ - echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/5.6 jammy main" >> /etc/apt/sources.list.d/rocm.list && \ + echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/amdgpu/6.0.2/ubuntu jammy main" >> /etc/apt/sources.list.d/amdgpu.list && \ + echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/6.0.2 jammy main" >> /etc/apt/sources.list.d/rocm.list && \ echo 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' > /etc/apt/preferences.d/rocm-pin-600 && \ apt-get update && \ apt-get install -y rccl rccl-dev rocm-libs rocm-gdb rocm-dev rocm-developer-tools rocm-hip-runtime-dev rocm-utils rocm-hip-sdk && \ @@ -83,7 +88,7 @@ ENV LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH # Install torch RUN pip install --upgrade pip -RUN pip install torch==2.1.1 torchvision==0.16.1 torchaudio==2.1.1 --index-url https://download.pytorch.org/whl/rocm5.6 +RUN pip install --no-cache-dir torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1 --index-url https://download.pytorch.org/whl/rocm6.0 # Install DeepSpeed RUN pip install --no-cache-dir mpi4py @@ -95,22 +100,24 @@ RUN cd /opt && \ # Install more dependencies COPY pyproject.toml . -RUN mkdir olmo && touch olmo/__init__.py && \ - pip install --no-cache-dir .[all] && \ +RUN mkdir olmo && \ + touch olmo/__init__.py && \ + echo 'VERSION = "0.1.0"' > olmo/version.py && \ + pip install --no-cache-dir .[train] && \ pip uninstall -y ai2-olmo && \ rm -rf olmo/ RUN pip install --no-cache-dir py-spy RUN pip install --no-cache-dir wandb --upgrade +# # Install flash attention (for MI200 series!) +# RUN cd /opt && \ +# git clone --recursive https://github.com/ROCm/flash-attention.git && \ +# cd flash-attention && \ +# GPU_ARCHS="gfx90a" pip install . + # Cleanup RUN apt-get autoremove RUN rm -rf /opt/mpich-3.1.4 /opt/aws-ofi-rccl /opt/DeepSpeed RUN apt-get clean -RUN pip cache purge - -# Install flash attention (for MI200 series!) -RUN cd /opt && \ - git clone --recursive https://github.com/ROCm/flash-attention.git && \ - cd flash-attention && \ - GPU_ARCHS="gfx90a" pip install . +RUN pip cache purge \ No newline at end of file