From 681f2b81da81cbbb26d79356e60bc2a690380c16 Mon Sep 17 00:00:00 2001 From: Cody Yu Date: Tue, 24 Jan 2023 17:24:41 -0800 Subject: [PATCH] [Bugfix] Fix schedule and dockerfile (#17) --- docker/Dockerfile | 5 +++++ examples/gpt/schedule.py | 3 ++- examples/opt/schedule.py | 3 ++- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 9883f67a..aecd8696 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -280,6 +280,11 @@ RUN cd $HOME/epoi && git fetch && git checkout b2e2e98 && pip3 install -e ".[dev RUN git clone https://github.com/huggingface/transformers.git $HOME/transformers RUN cd $HOME/transformers && git checkout 2bdd9fa && pip3 install -e ".[dev]" --no-deps +# FIXME Install official DeepSpeed +USER root +RUN pip3 install deepspeed==0.6.5 +USER deepspeed + # Fix dependencies RUN pip3 install huggingface-hub tokenizers numpy==1.23.4 datasets diff --git a/examples/gpt/schedule.py b/examples/gpt/schedule.py index b399f876..eba74ea9 100644 --- a/examples/gpt/schedule.py +++ b/examples/gpt/schedule.py @@ -233,7 +233,8 @@ def fwd_post_hook(_module, _input, output): sch[word_embed_name].sync(mode="fwd_post", sync_op_or_fn=fwd_post_hook) # Shard output embedding. - head_sch.shard("weight", axis=0) + if head_sch is not None: + head_sch.shard("weight", axis=0) def shard_qkv( diff --git a/examples/opt/schedule.py b/examples/opt/schedule.py index 1e2d417b..a92a651a 100644 --- a/examples/opt/schedule.py +++ b/examples/opt/schedule.py @@ -262,7 +262,8 @@ def fwd_post_hook(_module, _input, output): sch[word_embed_name].sync(mode="fwd_post", sync_op_or_fn=fwd_post_hook) # Shard output embedding. - head_sch.shard("weight", axis=0) + if head_sch is not None: + head_sch.shard("weight", axis=0) def shard_qkv(