From 20dd1c82378067ade1d493254eaecacdf6aacc45 Mon Sep 17 00:00:00 2001 From: JingyaHuang Date: Thu, 13 Feb 2025 23:41:29 +0000 Subject: [PATCH] NeuronModel API modification done --- optimum/commands/export/neuronx.py | 2 +- optimum/exporters/neuron/__main__.py | 6 +- optimum/exporters/neuron/base.py | 6 +- optimum/exporters/neuron/convert.py | 6 - .../neuron/model_configs/traced_configs.py | 2 +- optimum/exporters/neuron/model_wrappers.py | 8 +- optimum/exporters/neuron/utils.py | 8 +- optimum/neuron/modeling_diffusion.py | 40 +- optimum/neuron/modeling_traced.py | 2 +- optimum/neuron/utils/__init__.py | 10 +- optimum/neuron/utils/argument_utils.py | 42 +- optimum/neuron/utils/input_generators.py | 4 +- tests/exporters/test_export.py | 418 +++++++++--------- 13 files changed, 292 insertions(+), 262 deletions(-) diff --git a/optimum/commands/export/neuronx.py b/optimum/commands/export/neuronx.py index 13b9d4997..abfb97933 100644 --- a/optimum/commands/export/neuronx.py +++ b/optimum/commands/export/neuronx.py @@ -224,7 +224,7 @@ def parse_args_neuronx(parser: "ArgumentParser"): type=float, help="Scaling factors for the IP-Adapters.", ) - + # Static Input Shapes input_group = parser.add_argument_group("Input shapes") doc_input = "that the Neuronx-cc compiler exported model will be able to take as input." diff --git a/optimum/exporters/neuron/__main__.py b/optimum/exporters/neuron/__main__.py index 312c6a68a..ab3f0c123 100644 --- a/optimum/exporters/neuron/__main__.py +++ b/optimum/exporters/neuron/__main__.py @@ -36,10 +36,10 @@ DIFFUSION_MODEL_VAE_ENCODER_NAME, ENCODER_NAME, NEURON_FILE_NAME, - LoRAAdapterArguments, - IPAdapterArguments, ImageEncoderArguments, InputShapesArguments, + IPAdapterArguments, + LoRAAdapterArguments, is_neuron_available, is_neuronx_available, is_transformers_neuronx_available, @@ -298,7 +298,7 @@ def infer_stable_diffusion_shapes_from_diffusers( hidden_size=model.image_encoder.vision_model.embeddings.position_embedding.weight.shape[1], projection_dim=getattr(model.image_encoder.config, "projection_dim", None), ) - + # Format with `InputShapesArguments` for sub_model_name in input_shapes.keys(): input_shapes[sub_model_name] = InputShapesArguments(**input_shapes[sub_model_name]) diff --git a/optimum/exporters/neuron/base.py b/optimum/exporters/neuron/base.py index 4ac837022..a0794c70f 100644 --- a/optimum/exporters/neuron/base.py +++ b/optimum/exporters/neuron/base.py @@ -23,7 +23,7 @@ from optimum.utils import logging from ...exporters.base import ExportConfig -from ...neuron.utils import InputShapesArguments, ImageEncoderArguments, is_neuron_available +from ...neuron.utils import ImageEncoderArguments, InputShapesArguments, is_neuron_available if TYPE_CHECKING: @@ -192,8 +192,8 @@ def __init__( "vae_scale_factor": input_shapes.vae_scale_factor, "encoder_hidden_size": input_shapes.encoder_hidden_size, "image_encoder_shapes": ImageEncoderArguments( - sequence_length=getattr(input_shapes.image_encoder_shapes, "sequence_length", None), - hidden_size=getattr(input_shapes.image_encoder_shapes, "hidden_size", None), + sequence_length=getattr(input_shapes.image_encoder_shapes, "sequence_length", None), + hidden_size=getattr(input_shapes.image_encoder_shapes, "hidden_size", None), projection_dim=getattr(input_shapes.image_encoder_shapes, "projection_dim", None), ), } diff --git a/optimum/exporters/neuron/convert.py b/optimum/exporters/neuron/convert.py index 788a40f01..e46b671c4 100644 --- a/optimum/exporters/neuron/convert.py +++ b/optimum/exporters/neuron/convert.py @@ -348,12 +348,6 @@ def export_models( failed_models = [] total_compilation_time = 0 compile_configs = {} - models_and_neuron_configs.pop("text_encoder") - # models_and_neuron_configs.pop("text_encoder_2") - models_and_neuron_configs.pop("unet") - models_and_neuron_configs.pop("vae_encoder") - models_and_neuron_configs.pop("vae_decoder") - # models_and_neuron_configs.pop("image_encoder") for i, model_name in enumerate(models_and_neuron_configs.keys()): logger.info(f"***** Compiling {model_name} *****") submodel, sub_neuron_config = models_and_neuron_configs[model_name] diff --git a/optimum/exporters/neuron/model_configs/traced_configs.py b/optimum/exporters/neuron/model_configs/traced_configs.py index 5b56ad5d3..5d785346d 100644 --- a/optimum/exporters/neuron/model_configs/traced_configs.py +++ b/optimum/exporters/neuron/model_configs/traced_configs.py @@ -632,7 +632,7 @@ def inputs(self) -> List[str]: if self.image_encoder_output_hidden_states: common_inputs += ["image_enc_hidden_states"] else: - common_inputs += ["image_embeds"] + common_inputs += ["image_embeds"] return common_inputs diff --git a/optimum/exporters/neuron/model_wrappers.py b/optimum/exporters/neuron/model_wrappers.py index 757562041..ba8ed97cc 100644 --- a/optimum/exporters/neuron/model_wrappers.py +++ b/optimum/exporters/neuron/model_wrappers.py @@ -571,8 +571,8 @@ def forward(self, input_ids, attention_mask): class CLIPVisionModelNeuronWrapper(torch.nn.Module): def __init__( - self, - model, + self, + model, input_names: List[str], output_hidden_states: bool = True, ): @@ -585,9 +585,9 @@ def forward(self, pixel_values): vision_outputs = self.model.vision_model(pixel_values=pixel_values, output_hidden_states=self.output_hidden_states) pooled_output = vision_outputs[1] image_embeds = self.model.visual_projection(pooled_output) - + outputs = (image_embeds, vision_outputs.last_hidden_state) - + if self.output_hidden_states: outputs += (vision_outputs.hidden_states, ) return outputs diff --git a/optimum/exporters/neuron/utils.py b/optimum/exporters/neuron/utils.py index 69c0f96c4..301df57eb 100644 --- a/optimum/exporters/neuron/utils.py +++ b/optimum/exporters/neuron/utils.py @@ -18,12 +18,11 @@ import os from collections import OrderedDict from pathlib import Path -from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union, Any +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union import torch from ...neuron.utils import ( - LoRAAdapterArguments, DECODER_NAME, DIFFUSION_MODEL_CONTROLNET_NAME, DIFFUSION_MODEL_TEXT_ENCODER_2_NAME, @@ -33,6 +32,7 @@ DIFFUSION_MODEL_VAE_DECODER_NAME, DIFFUSION_MODEL_VAE_ENCODER_NAME, ENCODER_NAME, + LoRAAdapterArguments, get_attention_scores_sd, get_attention_scores_sdxl, neuron_scaled_dot_product_attention, @@ -235,7 +235,7 @@ def get_diffusion_models_for_export( unet_neuron_config.with_ip_adapter = getattr(unet.config, "encoder_hid_dim_type", None) == "ip_image_proj" models_for_export[DIFFUSION_MODEL_UNET_NAME] = (unet, unet_neuron_config) - + # Diffusion Transformer transformer = None @@ -340,7 +340,7 @@ def get_diffusion_models_for_export( input_shapes=image_encoder_input_shapes, ) models_for_export["image_encoder"] = (image_encoder, image_encoder_neuron_config) - models_for_export[DIFFUSION_MODEL_UNET_NAME][1].image_encoder_output_hidden_states = output_hidden_states + models_for_export[DIFFUSION_MODEL_UNET_NAME][1].image_encoder_output_hidden_states = output_hidden_states return models_for_export diff --git a/optimum/neuron/modeling_diffusion.py b/optimum/neuron/modeling_diffusion.py index 66c169b6c..120741c8b 100644 --- a/optimum/neuron/modeling_diffusion.py +++ b/optimum/neuron/modeling_diffusion.py @@ -22,6 +22,7 @@ import shutil from abc import abstractmethod from collections import OrderedDict +from dataclasses import asdict from pathlib import Path from tempfile import TemporaryDirectory from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Tuple, Union @@ -52,6 +53,7 @@ DIFFUSION_MODEL_VAE_ENCODER_NAME, NEURON_FILE_NAME, DiffusersPretrainedConfig, + NeuronArgumentParser, check_if_weights_replacable, is_neuronx_available, replace_weights, @@ -877,16 +879,8 @@ def _export( dynamic_batch_size: bool = False, output_hidden_states: bool = False, data_parallel_mode: Optional[Literal["none", "unet", "transformer", "all"]] = None, - lora_model_ids: Optional[Union[str, List[str]]] = None, - lora_weight_names: Optional[Union[str, List[str]]] = None, - lora_adapter_names: Optional[Union[str, List[str]]] = None, - lora_scales: Optional[Union[float, List[float]]] = None, controlnet_ids: Optional[Union[str, List[str]]] = None, - ip_adapter_ids: Optional[Union[str, List[str]]] = None, - ip_adapter_subfolders: Optional[Union[str, List[str]]] = None, - ip_adapter_weight_names: Optional[Union[str, List[str]]] = None, - ip_adapter_scales: Optional[Union[float, List[float]]] = None, - **kwargs_shapes, + **kwargs, ) -> "NeuronDiffusionPipelineBase": """ Args: @@ -967,9 +961,13 @@ def _export( The name of the weight file to load. If a list is passed, it should have the same length as `ip_adapter_subfolders`. ip_adapter_scales (`Optional[Union[float, List[float]]]`, defaults to `None`): Scaling factors for the IP-Adapters. - kwargs_shapes (`Dict[str, int]`): - Shapes to use during inference. This argument allows to override the default shapes used during the export. """ + # Parse kwargs to their dataclass + parser = NeuronArgumentParser(**kwargs) + lora_args = parser.lora_args + ip_adapter_args = parser.ip_adapter_args + kwargs_shapes = asdict(parser.input_shapes) + if task is None: if cls.task is not None: task = cls.task @@ -1024,17 +1022,11 @@ def _export( local_files_only=local_files_only, token=token, submodels=submodels, + lora_args=lora_args, + ip_adapter_args=ip_adapter_args, output_hidden_states=output_hidden_states, - lora_model_ids=lora_model_ids, - lora_weight_names=lora_weight_names, - lora_adapter_names=lora_adapter_names, - lora_scales=lora_scales, torch_dtype=torch_dtype, controlnet_ids=controlnet_ids, - ip_adapter_ids=ip_adapter_ids, - ip_adapter_subfolders=ip_adapter_subfolders, - ip_adapter_weight_names=ip_adapter_weight_names, - ip_adapter_scales=ip_adapter_scales, **input_shapes_copy, ) @@ -1090,6 +1082,8 @@ def _export( model_name_or_path=model_id, output=save_dir_path, compiler_kwargs=compiler_kwargs, + lora_args=lora_args, + ip_adapter_args=ip_adapter_args, torch_dtype=torch_dtype, task=task, dynamic_batch_size=dynamic_batch_size, @@ -1107,15 +1101,7 @@ def _export( do_validation=False, submodels={"unet": unet_id}, output_hidden_states=output_hidden_states, - lora_model_ids=lora_model_ids, - lora_weight_names=lora_weight_names, - lora_adapter_names=lora_adapter_names, - lora_scales=lora_scales, controlnet_ids=controlnet_ids, - ip_adapter_ids=ip_adapter_ids, - ip_adapter_subfolders=ip_adapter_subfolders, - ip_adapter_weight_names=ip_adapter_weight_names, - ip_adapter_scales=ip_adapter_scales, library_name=cls.library_name, **input_shapes, ) diff --git a/optimum/neuron/modeling_traced.py b/optimum/neuron/modeling_traced.py index 1e499128c..0673cff99 100644 --- a/optimum/neuron/modeling_traced.py +++ b/optimum/neuron/modeling_traced.py @@ -31,8 +31,8 @@ from ..exporters.tasks import TasksManager from .modeling_base import NeuronModel from .utils import ( - InputShapesArguments, NEURON_FILE_NAME, + InputShapesArguments, check_if_weights_replacable, is_neuron_available, replace_weights, diff --git a/optimum/neuron/utils/__init__.py b/optimum/neuron/utils/__init__.py index bb96a5e85..09ca437bd 100644 --- a/optimum/neuron/utils/__init__.py +++ b/optimum/neuron/utils/__init__.py @@ -24,7 +24,8 @@ "IPAdapterArguments", "ImageEncoderArguments", "InputShapesArguments", - "convert_neuronx_compiler_args_to_neuron", + "NeuronArgumentParser", + "convert_neuronx_compiler_args_to_neuron", "store_compilation_config" ], "constant": [ @@ -91,11 +92,12 @@ if TYPE_CHECKING: from .argument_utils import ( - LoRAAdapterArguments, - IPAdapterArguments, ImageEncoderArguments, InputShapesArguments, - convert_neuronx_compiler_args_to_neuron, + IPAdapterArguments, + LoRAAdapterArguments, + NeuronArgumentParser, + convert_neuronx_compiler_args_to_neuron, store_compilation_config, ) from .constant import ( diff --git a/optimum/neuron/utils/argument_utils.py b/optimum/neuron/utils/argument_utils.py index 262980ac1..e42b13873 100644 --- a/optimum/neuron/utils/argument_utils.py +++ b/optimum/neuron/utils/argument_utils.py @@ -15,7 +15,7 @@ """Utilities related to CLI arguments.""" import os -from dataclasses import dataclass, is_dataclass, asdict +from dataclasses import asdict, dataclass, fields, is_dataclass from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union from ...utils import logging @@ -36,7 +36,7 @@ class LoRAAdapterArguments: weight_names: Optional[Union[str, List[str]]] = None adapter_names: Optional[Union[str, List[str]]] = None scales: Optional[Union[float, List[float]]] = None - + def __post_init__(self): if isinstance(self.model_ids, str): self.model_ids = [self.model_ids,] @@ -86,6 +86,44 @@ class InputShapesArguments: image_encoder_shapes: Optional[ImageEncoderArguments] = None +class DataclassParser: + def __init__(self, **kwargs): + for name, cls in self.__class__.__annotations__.items(): + if is_dataclass(cls): + parsed_kwargs = {k: v for k, v in kwargs.items() if k in {f.name for f in fields(cls)}} + setattr(self, f"{name}", cls(**parsed_kwargs)) + + +class NeuronArgumentParser(DataclassParser): + input_shapes: InputShapesArguments + + def __init__(self, **kwargs): + super().__init__(**kwargs) + for name, value in kwargs.items(): + if value is not None: + setattr(self, name, value) + + @property + def lora_args(self): + _lora_args = LoRAAdapterArguments( + model_ids=getattr(self, "lora_model_ids", None), + weight_names=getattr(self, "lora_weight_names", None), + adapter_names=getattr(self, "lora_adapter_names", None), + scales=getattr(self, "lora_scales", None), + ) + return _lora_args + + @property + def ip_adapter_args(self): + _ip_adapter_args = IPAdapterArguments( + model_id=getattr(self, "ip_adapter_id", None), + subfolder=getattr(self, "ip_adapter_subfolder", None), + weight_name=getattr(self, "ip_adapter_weight_name", None), + scale=getattr(self, "ip_adapter_scale", None), + ) + return _ip_adapter_args + + def validate_arg( args, arg_name: str, diff --git a/optimum/neuron/utils/input_generators.py b/optimum/neuron/utils/input_generators.py index 98b92db4f..81d69ac2d 100644 --- a/optimum/neuron/utils/input_generators.py +++ b/optimum/neuron/utils/input_generators.py @@ -14,7 +14,7 @@ # limitations under the License. """Dummy input generation classes.""" -from typing import Optional, TYPE_CHECKING +from typing import TYPE_CHECKING, Optional import torch @@ -195,7 +195,7 @@ def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int if input_name == "image_enc_hidden_states": shape = [self.batch_size, 1, self.image_encoder_shapes.sequence_length, self.image_encoder_shapes.hidden_size] return self.random_float_tensor(shape, framework=framework, dtype=float_dtype) - elif input_name == "image_embeds": + elif input_name == "image_embeds": shape = [self.batch_size, 1, self.image_encoder_shapes.projection_dim] return self.random_float_tensor(shape, framework=framework, dtype=float_dtype) elif input_name == "ip_adapter_masks": diff --git a/tests/exporters/test_export.py b/tests/exporters/test_export.py index 9a7b3e403..97a10215d 100644 --- a/tests/exporters/test_export.py +++ b/tests/exporters/test_export.py @@ -17,32 +17,42 @@ import random import unittest from pathlib import Path -from tempfile import TemporaryDirectory +from tempfile import NamedTemporaryFile, TemporaryDirectory from typing import Dict, List, Optional from parameterized import parameterized -from transformers import set_seed +from transformers import AutoConfig, AutoModelForSeq2SeqLM, set_seed from transformers.testing_utils import require_vision from optimum.exporters.neuron import ( + NeuronDefaultConfig, build_stable_diffusion_components_mandatory_shapes, + export, export_models, + validate_model_outputs, validate_models_outputs, ) from optimum.exporters.neuron.__main__ import get_submodels_and_neuron_configs from optimum.exporters.neuron.model_configs import * # noqa: F403 from optimum.exporters.tasks import TasksManager +from optimum.neuron.utils import is_neuron_available from optimum.neuron.utils.testing_utils import is_inferentia_test, requires_neuronx from optimum.utils import DEFAULT_DUMMY_SHAPES, is_diffusers_available, logging -from optimum.utils.testing_utils import require_diffusers +from optimum.utils.testing_utils import require_diffusers, require_sentence_transformers from .exporters_utils import ( + ENCODER_DECODER_MODELS_TINY, + EXPORT_MODELS_TINY, + EXTREA_DEFAULT_DUMMY_SHAPES, + LORA_WEIGHTS_TINY, + SENTENCE_TRANSFORMERS_MODELS, STABLE_DIFFUSION_MODELS_TINY, + WEIGHTS_NEFF_SEPARATION_UNSUPPORTED_ARCH, ) if is_diffusers_available(): - from diffusers import StableDiffusionXLPipeline + from diffusers import StableDiffusionPipeline, StableDiffusionXLPipeline SEED = 42 @@ -94,100 +104,100 @@ def _get_models_to_test( return sorted(models_to_test) -# class NeuronExportTestCase(unittest.TestCase): -# """ -# Integration tests ensuring supported models are correctly exported. -# """ - -# if is_neuron_available(): -# # Deberta has 'XSoftmax' unsupported on INF1 -# for model in ["deberta", "deberta-v2"]: -# EXPORT_MODELS_TINY.pop(model) - -# def _neuronx_export( -# self, -# test_name: str, -# model_type: str, -# model_name: str, -# task: str, -# neuron_config_constructor: "NeuronDefaultConfig", -# dynamic_batch_size: bool = False, -# inline_weights_to_neff: bool = True, -# ): -# library_name = TasksManager.infer_library_from_model(model_name) -# if library_name == "sentence_transformers": -# model_class = TasksManager.get_model_class_for_task(task, framework="pt", library=library_name) -# model = model_class(model_name) -# reference_model = model_class(model_name) -# if "clip" in model[0].__class__.__name__.lower(): -# config = model[0].model.config -# else: -# config = model[0].auto_model.config -# else: -# model_class = TasksManager.get_model_class_for_task(task, model_type=model_type, framework="pt") -# config = AutoConfig.from_pretrained(model_name) -# model = model_class.from_config(config) -# reference_model = model_class.from_config(config) - -# mandatory_shapes = { -# name: DEFAULT_DUMMY_SHAPES.get(name) or EXTREA_DEFAULT_DUMMY_SHAPES.get(name) -# for name in neuron_config_constructor.func.get_mandatory_axes_for_task(task) -# } -# neuron_config = neuron_config_constructor( -# config=config, task=task, dynamic_batch_size=dynamic_batch_size, **mandatory_shapes -# ) - -# atol = neuron_config.ATOL_FOR_VALIDATION - -# with NamedTemporaryFile("w") as output: -# try: -# _, neuron_outputs = export( -# model_or_path=model, -# config=neuron_config, -# output=Path(output.name), -# inline_weights_to_neff=inline_weights_to_neff, -# ) - -# validate_model_outputs( -# config=neuron_config, -# reference_model=reference_model, -# neuron_model_path=Path(output.name), -# neuron_named_outputs=neuron_outputs, -# atol=atol, -# ) -# except (RuntimeError, ValueError) as e: -# self.fail(f"{model_type}, {task} -> {e}") - -# @parameterized.expand(_get_models_to_test(EXPORT_MODELS_TINY, library_name="transformers")) -# @is_inferentia_test -# def test_export(self, test_name, name, model_name, task, neuron_config_constructor): -# self._neuronx_export(test_name, name, model_name, task, neuron_config_constructor) - -# @parameterized.expand( -# _get_models_to_test( -# EXPORT_MODELS_TINY, -# exclude_model_types=WEIGHTS_NEFF_SEPARATION_UNSUPPORTED_ARCH, -# library_name="transformers", -# ) -# ) -# @is_inferentia_test -# def test_export_separated_weights(self, test_name, name, model_name, task, neuron_config_constructor): -# self._neuronx_export( -# test_name, name, model_name, task, neuron_config_constructor, inline_weights_to_neff=False -# ) - -# @parameterized.expand(_get_models_to_test(SENTENCE_TRANSFORMERS_MODELS, library_name="sentence_transformers")) -# @is_inferentia_test -# @require_sentence_transformers -# @requires_neuronx -# def test_export_sentence_transformers(self, test_name, name, model_name, task, neuron_config_constructor): -# self._neuronx_export(test_name, name, model_name, task, neuron_config_constructor) - -# @parameterized.expand(_get_models_to_test(EXPORT_MODELS_TINY, library_name="transformers"), skip_on_empty=True) -# @is_inferentia_test -# @requires_neuronx -# def test_export_with_dynamic_batch_size(self, test_name, name, model_name, task, neuron_config_constructor): -# self._neuronx_export(test_name, name, model_name, task, neuron_config_constructor, dynamic_batch_size=True) +class NeuronExportTestCase(unittest.TestCase): + """ + Integration tests ensuring supported models are correctly exported. + """ + + if is_neuron_available(): + # Deberta has 'XSoftmax' unsupported on INF1 + for model in ["deberta", "deberta-v2"]: + EXPORT_MODELS_TINY.pop(model) + + def _neuronx_export( + self, + test_name: str, + model_type: str, + model_name: str, + task: str, + neuron_config_constructor: "NeuronDefaultConfig", + dynamic_batch_size: bool = False, + inline_weights_to_neff: bool = True, + ): + library_name = TasksManager.infer_library_from_model(model_name) + if library_name == "sentence_transformers": + model_class = TasksManager.get_model_class_for_task(task, framework="pt", library=library_name) + model = model_class(model_name) + reference_model = model_class(model_name) + if "clip" in model[0].__class__.__name__.lower(): + config = model[0].model.config + else: + config = model[0].auto_model.config + else: + model_class = TasksManager.get_model_class_for_task(task, model_type=model_type, framework="pt") + config = AutoConfig.from_pretrained(model_name) + model = model_class.from_config(config) + reference_model = model_class.from_config(config) + + mandatory_shapes = { + name: DEFAULT_DUMMY_SHAPES.get(name) or EXTREA_DEFAULT_DUMMY_SHAPES.get(name) + for name in neuron_config_constructor.func.get_mandatory_axes_for_task(task) + } + neuron_config = neuron_config_constructor( + config=config, task=task, dynamic_batch_size=dynamic_batch_size, **mandatory_shapes + ) + + atol = neuron_config.ATOL_FOR_VALIDATION + + with NamedTemporaryFile("w") as output: + try: + _, neuron_outputs = export( + model_or_path=model, + config=neuron_config, + output=Path(output.name), + inline_weights_to_neff=inline_weights_to_neff, + ) + + validate_model_outputs( + config=neuron_config, + reference_model=reference_model, + neuron_model_path=Path(output.name), + neuron_named_outputs=neuron_outputs, + atol=atol, + ) + except (RuntimeError, ValueError) as e: + self.fail(f"{model_type}, {task} -> {e}") + + @parameterized.expand(_get_models_to_test(EXPORT_MODELS_TINY, library_name="transformers")) + @is_inferentia_test + def test_export(self, test_name, name, model_name, task, neuron_config_constructor): + self._neuronx_export(test_name, name, model_name, task, neuron_config_constructor) + + @parameterized.expand( + _get_models_to_test( + EXPORT_MODELS_TINY, + exclude_model_types=WEIGHTS_NEFF_SEPARATION_UNSUPPORTED_ARCH, + library_name="transformers", + ) + ) + @is_inferentia_test + def test_export_separated_weights(self, test_name, name, model_name, task, neuron_config_constructor): + self._neuronx_export( + test_name, name, model_name, task, neuron_config_constructor, inline_weights_to_neff=False + ) + + @parameterized.expand(_get_models_to_test(SENTENCE_TRANSFORMERS_MODELS, library_name="sentence_transformers")) + @is_inferentia_test + @require_sentence_transformers + @requires_neuronx + def test_export_sentence_transformers(self, test_name, name, model_name, task, neuron_config_constructor): + self._neuronx_export(test_name, name, model_name, task, neuron_config_constructor) + + @parameterized.expand(_get_models_to_test(EXPORT_MODELS_TINY, library_name="transformers"), skip_on_empty=True) + @is_inferentia_test + @requires_neuronx + def test_export_with_dynamic_batch_size(self, test_name, name, model_name, task, neuron_config_constructor): + self._neuronx_export(test_name, name, model_name, task, neuron_config_constructor, dynamic_batch_size=True) @is_inferentia_test @@ -199,38 +209,38 @@ class NeuronStableDiffusionExportTestCase(unittest.TestCase): Integration tests ensuring stable diffusion models are correctly exported. """ - # @parameterized.expand( - # [STABLE_DIFFUSION_MODELS_TINY["stable-diffusion"], STABLE_DIFFUSION_MODELS_TINY["latent-consistency"]] - # ) - # def test_export_for_stable_diffusion_models(self, model_id): - # set_seed(SEED) - - # # prepare neuron config / models - # model = StableDiffusionPipeline.from_pretrained(model_id) - # input_shapes = build_stable_diffusion_components_mandatory_shapes( - # **{"batch_size": 1, "height": 64, "width": 64, "num_images_per_prompt": 4} - # ) - - # with TemporaryDirectory() as tmpdirname: - # models_and_neuron_configs, output_model_names = get_submodels_and_neuron_configs( - # model=model, - # input_shapes=input_shapes, - # task="text-to-image", - # library_name="diffusers", - # output=Path(tmpdirname), - # model_name_or_path=model_id, - # ) - # _, neuron_outputs = export_models( - # models_and_neuron_configs=models_and_neuron_configs, - # output_dir=Path(tmpdirname), - # output_file_names=output_model_names, - # ) - # validate_models_outputs( - # models_and_neuron_configs=models_and_neuron_configs, - # neuron_named_outputs=neuron_outputs, - # output_dir=Path(tmpdirname), - # neuron_files_subpaths=output_model_names, - # ) + @parameterized.expand( + [STABLE_DIFFUSION_MODELS_TINY["stable-diffusion"], STABLE_DIFFUSION_MODELS_TINY["latent-consistency"]] + ) + def test_export_for_stable_diffusion_models(self, model_id): + set_seed(SEED) + + # prepare neuron config / models + model = StableDiffusionPipeline.from_pretrained(model_id) + input_shapes = build_stable_diffusion_components_mandatory_shapes( + **{"batch_size": 1, "height": 64, "width": 64, "num_images_per_prompt": 4} + ) + + with TemporaryDirectory() as tmpdirname: + models_and_neuron_configs, output_model_names = get_submodels_and_neuron_configs( + model=model, + input_shapes=input_shapes, + task="text-to-image", + library_name="diffusers", + output=Path(tmpdirname), + model_name_or_path=model_id, + ) + _, neuron_outputs = export_models( + models_and_neuron_configs=models_and_neuron_configs, + output_dir=Path(tmpdirname), + output_file_names=output_model_names, + ) + validate_models_outputs( + models_and_neuron_configs=models_and_neuron_configs, + neuron_named_outputs=neuron_outputs, + output_dir=Path(tmpdirname), + neuron_files_subpaths=output_model_names, + ) @parameterized.expand([STABLE_DIFFUSION_MODELS_TINY["stable-diffusion-xl"]]) def test_export_for_stable_diffusion_xl_models(self, model_id): @@ -265,77 +275,77 @@ def test_export_for_stable_diffusion_xl_models(self, model_id): neuron_files_subpaths=output_model_names, ) - # def test_export_sd_with_fused_lora_weights(self): - # model_id = STABLE_DIFFUSION_MODELS_TINY["stable-diffusion"] - # lora_params = LORA_WEIGHTS_TINY["stable-diffusion"] - # set_seed(SEED) - - # # prepare neuron config / models - # model = StableDiffusionPipeline.from_pretrained(model_id) - # input_shapes = build_stable_diffusion_components_mandatory_shapes( - # **{"batch_size": 1, "height": 64, "width": 64, "num_images_per_prompt": 4} - # ) - - # with TemporaryDirectory() as tmpdirname: - # models_and_neuron_configs, output_model_names = get_submodels_and_neuron_configs( - # model=model, - # input_shapes=input_shapes, - # task="text-to-image", - # library_name="diffusers", - # output=Path(tmpdirname), - # model_name_or_path=model_id, - # lora_model_ids=lora_params[0], - # lora_weight_names=lora_params[1], - # lora_adapter_names=lora_params[2], - # lora_scales=0.9, - # ) - # _, neuron_outputs = export_models( - # models_and_neuron_configs=models_and_neuron_configs, - # output_dir=Path(tmpdirname), - # output_file_names=output_model_names, - # ) - # validate_models_outputs( - # models_and_neuron_configs=models_and_neuron_configs, - # neuron_named_outputs=neuron_outputs, - # output_dir=Path(tmpdirname), - # neuron_files_subpaths=output_model_names, - # ) - - -# @is_inferentia_test -# @requires_neuronx -# class NeuronEncoderDecoderExportTestCase(unittest.TestCase): -# """ -# Integration tests ensuring encoder-decoder models are correctly exported. -# """ - -# @parameterized.expand(ENCODER_DECODER_MODELS_TINY.items()) -# def test_export_encoder_decoder_models(self, model_name, model_id): -# set_seed(SEED) - -# # prepare neuron config / models -# model = AutoModelForSeq2SeqLM.from_pretrained(model_id) -# input_shapes = {"batch_size": 1, "sequence_length": 18, "num_beams": 4} - -# with TemporaryDirectory() as tmpdirname: -# models_and_neuron_configs, output_model_names = get_submodels_and_neuron_configs( -# model=model, -# input_shapes=input_shapes, -# task="text2text-generation", -# output=Path(tmpdirname), -# library_name="transformers", -# model_name_or_path=model_id, -# output_attentions=True, -# output_hidden_states=True, -# ) -# _, neuron_outputs = export_models( -# models_and_neuron_configs=models_and_neuron_configs, -# output_dir=Path(tmpdirname), -# output_file_names=output_model_names, -# ) -# validate_models_outputs( -# models_and_neuron_configs=models_and_neuron_configs, -# neuron_named_outputs=neuron_outputs, -# output_dir=Path(tmpdirname), -# neuron_files_subpaths=output_model_names, -# ) + def test_export_sd_with_fused_lora_weights(self): + model_id = STABLE_DIFFUSION_MODELS_TINY["stable-diffusion"] + lora_params = LORA_WEIGHTS_TINY["stable-diffusion"] + set_seed(SEED) + + # prepare neuron config / models + model = StableDiffusionPipeline.from_pretrained(model_id) + input_shapes = build_stable_diffusion_components_mandatory_shapes( + **{"batch_size": 1, "height": 64, "width": 64, "num_images_per_prompt": 4} + ) + + with TemporaryDirectory() as tmpdirname: + models_and_neuron_configs, output_model_names = get_submodels_and_neuron_configs( + model=model, + input_shapes=input_shapes, + task="text-to-image", + library_name="diffusers", + output=Path(tmpdirname), + model_name_or_path=model_id, + lora_model_ids=lora_params[0], + lora_weight_names=lora_params[1], + lora_adapter_names=lora_params[2], + lora_scales=0.9, + ) + _, neuron_outputs = export_models( + models_and_neuron_configs=models_and_neuron_configs, + output_dir=Path(tmpdirname), + output_file_names=output_model_names, + ) + validate_models_outputs( + models_and_neuron_configs=models_and_neuron_configs, + neuron_named_outputs=neuron_outputs, + output_dir=Path(tmpdirname), + neuron_files_subpaths=output_model_names, + ) + + +@is_inferentia_test +@requires_neuronx +class NeuronEncoderDecoderExportTestCase(unittest.TestCase): + """ + Integration tests ensuring encoder-decoder models are correctly exported. + """ + + @parameterized.expand(ENCODER_DECODER_MODELS_TINY.items()) + def test_export_encoder_decoder_models(self, model_name, model_id): + set_seed(SEED) + + # prepare neuron config / models + model = AutoModelForSeq2SeqLM.from_pretrained(model_id) + input_shapes = {"batch_size": 1, "sequence_length": 18, "num_beams": 4} + + with TemporaryDirectory() as tmpdirname: + models_and_neuron_configs, output_model_names = get_submodels_and_neuron_configs( + model=model, + input_shapes=input_shapes, + task="text2text-generation", + output=Path(tmpdirname), + library_name="transformers", + model_name_or_path=model_id, + output_attentions=True, + output_hidden_states=True, + ) + _, neuron_outputs = export_models( + models_and_neuron_configs=models_and_neuron_configs, + output_dir=Path(tmpdirname), + output_file_names=output_model_names, + ) + validate_models_outputs( + models_and_neuron_configs=models_and_neuron_configs, + neuron_named_outputs=neuron_outputs, + output_dir=Path(tmpdirname), + neuron_files_subpaths=output_model_names, + )