diff --git a/optimum/neuron/modeling_decoder.py b/optimum/neuron/modeling_decoder.py index 1fe713bca..17072ff9d 100644 --- a/optimum/neuron/modeling_decoder.py +++ b/optimum/neuron/modeling_decoder.py @@ -123,14 +123,11 @@ def __init__( neuronx_model.load(compiled_dir) # Compile the Neuron model (if present compiled artifacts will be reloaded instead of compiled) - neuron_cc_flags = os.environ.get("NEURON_CC_FLAGS", "") - os.environ["NEURON_CC_FLAGS"] = neuron_cc_flags + " --model-type=transformer" checkpoint_id = neuron_config.get("checkpoint_id", None) # Only create a cache entry if the model comes from the hub cache_entry = None if checkpoint_id is None else ModelCacheEntry(checkpoint_id, config) with hub_neuronx_cache(entry=cache_entry): neuronx_model.to_neuron() - os.environ["NEURON_CC_FLAGS"] = neuron_cc_flags super().__init__(neuronx_model, config) diff --git a/optimum/neuron/version.py b/optimum/neuron/version.py index fabab3963..8d48f1965 100644 --- a/optimum/neuron/version.py +++ b/optimum/neuron/version.py @@ -13,3 +13,5 @@ # limitations under the License. __version__ = "0.0.18.dev0" + +__sdk_version__ = "2.16.1" diff --git a/setup.py b/setup.py index 77eea2506..64e1588c4 100644 --- a/setup.py +++ b/setup.py @@ -55,7 +55,7 @@ ], "neuronx": [ "wheel", - "neuronx-cc==2.12.54.0", + "neuronx-cc==2.12.68.0", "torch-neuronx==1.13.1.1.13.0", "transformers-neuronx==0.9.474", "torch==1.13.1.*", diff --git a/text-generation-inference/Dockerfile b/text-generation-inference/Dockerfile index 304665553..5b2036c7f 100644 --- a/text-generation-inference/Dockerfile +++ b/text-generation-inference/Dockerfile @@ -99,7 +99,7 @@ RUN apt-get update -y \ ENV PATH="/opt/bin/:/opt/aws/neuron/bin:${PATH}" RUN pip3 install \ - neuronx-cc==2.12.54.0 \ + neuronx-cc==2.12.68.0 \ torch-neuronx==1.13.1.1.13.0 \ transformers-neuronx==0.9.474 \ --extra-index-url=https://pip.repos.neuron.amazonaws.com