Skip to content

Commit

Permalink
Merge branch 'main' into energy_star_pr
Browse files Browse the repository at this point in the history
  • Loading branch information
IlyasMoutawwakil committed Nov 22, 2024
2 parents c18c9e5 + 31aa662 commit 3f7f1c1
Show file tree
Hide file tree
Showing 15 changed files with 689 additions and 181 deletions.
42 changes: 42 additions & 0 deletions examples/pytorch_vlm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
defaults:
- benchmark
- scenario: inference
- launcher: process
- backend: pytorch
- _base_
- _self_

name: pytorch_vlm

launcher:
device_isolation: true
device_isolation_action: warn

backend:
device: cuda
device_ids: 0
no_weights: true
torch_dtype: float16
model: Qwen/Qwen2-VL-7B-Instruct

scenario:
memory: true
latency: true

warmup_runs: 10
iterations: 10
duration: 10

input_shapes:
# text
batch_size: 1
sequence_length: 256
# image
num_images: 2
num_channels: 3
height: 224
width: 224

generate_kwargs:
max_new_tokens: 32
min_new_tokens: 32
1 change: 0 additions & 1 deletion optimum_benchmark/backends/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
class Backend(Generic[BackendConfigT], ABC):
NAME: ClassVar[str]

model_type: str
model_shapes: Dict[str, int]

pretrained_model: PreTrainedModel
Expand Down
34 changes: 14 additions & 20 deletions optimum_benchmark/backends/timm_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import warnings
from typing import Any, Dict

from transformers import PretrainedConfig
Expand Down Expand Up @@ -35,15 +36,17 @@ def extract_timm_shapes_from_config(config: PretrainedConfig) -> Dict[str, Any]:
shapes = {}

# image input
shapes["num_channels"] = artifacts_dict.get("num_channels", None)
if shapes["num_channels"] is None:
# processors have different names for the number of channels
if "num_channels" in artifacts_dict:
shapes["num_channels"] = artifacts_dict.get("num_channels", None)
elif "channels" in artifacts_dict:
shapes["num_channels"] = artifacts_dict.get("channels", None)

image_size = artifacts_dict.get("image_size", None)
if image_size is None:
# processors have different names for the image size
image_size = artifacts_dict.get("size", None)
if "image_size" in artifacts_dict:
image_size = artifacts_dict["image_size"]
elif "size" in artifacts_dict:
image_size = artifacts_dict["size"]
else:
image_size = None

if isinstance(image_size, (int, float)):
shapes["height"] = image_size
Expand All @@ -57,24 +60,15 @@ def extract_timm_shapes_from_config(config: PretrainedConfig) -> Dict[str, Any]:
elif isinstance(image_size, dict) and len(image_size) == 1:
shapes["height"] = list(image_size.values())[0]
shapes["width"] = list(image_size.values())[0]
else:
shapes["height"] = None
shapes["width"] = None

input_size = artifacts_dict.get("input_size", None)
if input_size is not None:
if "input_size" in artifacts_dict:
input_size = artifacts_dict.get("input_size", None)
shapes["num_channels"] = input_size[0]
shapes["height"] = input_size[1]
shapes["width"] = input_size[2]

# classification labels
id2label = artifacts_dict.get("id2label", None)
if id2label is not None:
shapes["num_labels"] = len(id2label)

num_classes = artifacts_dict.get("num_classes", None)
if num_classes is not None:
shapes["num_labels"] = num_classes
if "num_classes" not in artifacts_dict:
warnings.warn("Could not extract shapes [num_channels, height, width] from timm model config.")

return shapes

Expand Down
149 changes: 96 additions & 53 deletions optimum_benchmark/backends/transformers_utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import warnings
from contextlib import contextmanager
from typing import Any, Dict, Optional, Union

Expand All @@ -7,6 +6,7 @@
from transformers import (
AutoConfig,
AutoFeatureExtractor,
AutoImageProcessor,
AutoProcessor,
AutoTokenizer,
FeatureExtractionMixin,
Expand Down Expand Up @@ -47,6 +47,7 @@
"image-to-text": "AutoModelForVision2Seq",
"text-generation": "AutoModelForCausalLM",
"text2text-generation": "AutoModelForSeq2SeqLM",
"image-text-to-text": "AutoModelForImageTextToText",
"visual-question-answering": "AutoModelForVisualQuestionAnswering",
"automatic-speech-recognition": ("AutoModelForSpeechSeq2Seq", "AutoModelForCTC"),
}
Expand All @@ -64,8 +65,11 @@
model_loaders = (model_loaders,)

for model_loader_name in model_loaders:
model_loader_class = getattr(transformers, model_loader_name)
TASKS_TO_MODEL_TYPES_TO_MODEL_CLASSES[task_name].update(model_loader_class._model_mapping._model_mapping)
model_loader_class = getattr(transformers, model_loader_name, None)
if model_loader_class is not None:
TASKS_TO_MODEL_TYPES_TO_MODEL_CLASSES[task_name].update(
model_loader_class._model_mapping._model_mapping
)
else:
TASKS_TO_MODEL_TYPES_TO_MODEL_CLASSES = {}

Expand Down Expand Up @@ -107,56 +111,83 @@ def get_transformers_pretrained_processor(model: str, **kwargs) -> Optional["Pre
return AutoFeatureExtractor.from_pretrained(model, **kwargs)
except Exception:
try:
return AutoTokenizer.from_pretrained(model, **kwargs)
return AutoImageProcessor.from_pretrained(model, **kwargs)
except Exception:
return None
try:
return AutoTokenizer.from_pretrained(model, **kwargs)
except Exception:
return None


def get_flat_dict(d: Dict[str, Any]) -> Dict[str, Any]:
flat_dict = {}
for k, v in d.items():
if isinstance(v, dict):
flat_dict.update(get_flat_dict(v))
else:
flat_dict[k] = v
return flat_dict


def get_flat_artifact_dict(artifact: Union[PretrainedConfig, PretrainedProcessor]) -> Dict[str, Any]:
artifact_dict = {}

if isinstance(artifact, ProcessorMixin):
artifact_dict.update(
{k: v for k, v in artifact.__dict__.items() if isinstance(v, (int, str, float, bool, list, tuple, dict))}
)
for attribute in artifact.attributes:
artifact_dict.update(get_flat_artifact_dict(getattr(artifact, attribute)))
elif hasattr(artifact, "to_dict"):
artifact_dict.update(
{k: v for k, v in artifact.to_dict().items() if isinstance(v, (int, str, float, bool, list, tuple, dict))}
)
else:
artifact_dict.update(
{k: v for k, v in artifact.__dict__.items() if isinstance(v, (int, str, float, bool, list, tuple, dict))}
)

artifact_dict = get_flat_dict(artifact_dict)

return artifact_dict


def extract_transformers_shapes_from_artifacts(
config: Optional["PretrainedConfig"] = None, processor: Optional["PretrainedProcessor"] = None
config: Optional["PretrainedConfig"] = None,
processor: Optional["PretrainedProcessor"] = None,
) -> Dict[str, Any]:
artifacts_dict = {}
flat_artifacts_dict = {}

if config is not None and hasattr(config, "to_dict"):
config_dict = {k: v for k, v in config.to_dict().items() if v is not None}
artifacts_dict.update(config_dict)
elif config is not None:
try:
config_dict = {k: getattr(config, k) for k in dir(config) if isinstance(getattr(config, k), int)}
artifacts_dict.update(config_dict)
except Exception:
warnings.warn(f"Could not extract shapes from config {config}")
if config is not None:
flat_artifacts_dict.update(get_flat_artifact_dict(config))

if processor is not None and hasattr(processor, "to_dict"):
processor_dict = {k: v for k, v in processor.to_dict().items() if v is not None}
artifacts_dict.update(processor_dict)
elif processor is not None:
try:
processor_dict = {
k: getattr(processor, k) for k in dir(processor) if isinstance(getattr(processor, k), int)
}
except Exception:
warnings.warn(f"Could not extract shapes from processor {processor}")
if processor is not None:
flat_artifacts_dict.update(get_flat_artifact_dict(processor))

shapes = {}

# text input
shapes["vocab_size"] = artifacts_dict.get("vocab_size", None)
shapes["type_vocab_size"] = artifacts_dict.get("type_vocab_size", None)
shapes["max_position_embeddings"] = artifacts_dict.get("max_position_embeddings", None)
if shapes["max_position_embeddings"] is None:
shapes["max_position_embeddings"] = artifacts_dict.get("n_positions", None)
if "vocab_size" in flat_artifacts_dict:
shapes["vocab_size"] = flat_artifacts_dict["vocab_size"]

if "type_vocab_size" in flat_artifacts_dict:
shapes["type_vocab_size"] = flat_artifacts_dict["type_vocab_size"]

if "max_position_embeddings" in flat_artifacts_dict:
shapes["max_position_embeddings"] = flat_artifacts_dict["max_position_embeddings"]
elif "n_positions" in flat_artifacts_dict:
shapes["max_position_embeddings"] = flat_artifacts_dict["n_positions"]

# image input
shapes["num_channels"] = artifacts_dict.get("num_channels", None)
if shapes["num_channels"] is None:
# processors have different names for the number of channels
shapes["num_channels"] = artifacts_dict.get("channels", None)
if "num_channels" in flat_artifacts_dict:
shapes["num_channels"] = flat_artifacts_dict["num_channels"]

image_size = artifacts_dict.get("image_size", None)
if image_size is None:
# processors have different names for the image size
image_size = artifacts_dict.get("size", None)
if "image_size" in flat_artifacts_dict:
image_size = flat_artifacts_dict["image_size"]
elif "size" in flat_artifacts_dict:
image_size = flat_artifacts_dict["size"]
else:
image_size = None

if isinstance(image_size, (int, float)):
shapes["height"] = image_size
Expand All @@ -170,29 +201,41 @@ def extract_transformers_shapes_from_artifacts(
elif isinstance(image_size, dict) and len(image_size) == 1:
shapes["height"] = list(image_size.values())[0]
shapes["width"] = list(image_size.values())[0]
else:
shapes["height"] = None
shapes["width"] = None

input_size = artifacts_dict.get("input_size", None)
if input_size is not None:
if "input_size" in flat_artifacts_dict:
input_size = flat_artifacts_dict["input_size"]
shapes["num_channels"] = input_size[0]
shapes["height"] = input_size[1]
shapes["width"] = input_size[2]

# classification labels
id2label = artifacts_dict.get("id2label", None)
if id2label is not None:
if "id2label" in flat_artifacts_dict:
id2label = flat_artifacts_dict["id2label"]
shapes["num_labels"] = len(id2label)

num_classes = artifacts_dict.get("num_classes", None)
if num_classes is not None:
shapes["num_labels"] = num_classes
elif "num_classes" in flat_artifacts_dict:
shapes["num_labels"] = flat_artifacts_dict["num_classes"]

# object detection labels
shapes["num_queries"] = artifacts_dict.get("num_queries", None)
if shapes["num_queries"] == 0:
shapes["num_queries"] = 2
if "num_queries" in flat_artifacts_dict:
shapes["num_queries"] = flat_artifacts_dict["num_queries"]

# image-text input

if "patch_size" in flat_artifacts_dict:
shapes["patch_size"] = flat_artifacts_dict["patch_size"]
if "in_chans" in flat_artifacts_dict:
shapes["num_channels"] = flat_artifacts_dict["in_chans"]
if "image_seq_len" in flat_artifacts_dict:
shapes["image_seq_len"] = flat_artifacts_dict["image_seq_len"]
if "image_token_id" in flat_artifacts_dict:
shapes["image_token_id"] = flat_artifacts_dict["image_token_id"]
if "spatial_merge_size" in flat_artifacts_dict:
shapes["spatial_merge_size"] = flat_artifacts_dict["spatial_merge_size"]
if "do_image_splitting" in flat_artifacts_dict:
shapes["do_image_splitting"] = flat_artifacts_dict["do_image_splitting"]

if "temporal_patch_size" in flat_artifacts_dict:
shapes["temporal_patch_size"] = flat_artifacts_dict["temporal_patch_size"]

return shapes

Expand Down
52 changes: 52 additions & 0 deletions optimum_benchmark/generators/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import logging
import random
import string
from abc import ABC
from typing import Dict, List, Tuple

import torch

LOGGER = logging.getLogger("generators")


class BaseGenerator(ABC):
def __init__(self, shapes: Dict[str, int], with_labels: bool):
self.shapes = shapes
self.with_labels = with_labels

def assert_not_missing_shapes(self, required_shapes: List[str]):
for shape in required_shapes:
assert self.shapes.get(shape, None) is not None, (
f"{shape} either couldn't be inferred automatically from model artifacts or should be provided by the user. "
f"Please provide it under `scenario.input_shapes.{shape}` or open an issue/PR in optimum-benchmark repository. "
)

@staticmethod
def generate_constant_integers(value: int, shape: Tuple[int]):
return torch.full(shape, value, dtype=torch.int64)

@staticmethod
def generate_constant_floats(value: float, shape: Tuple[int]):
return torch.full(shape, value, dtype=torch.float32)

@staticmethod
def generate_random_integers(min_value: int, max_value: int, shape: Tuple[int]):
return torch.randint(min_value, max_value, shape)

@staticmethod
def generate_random_floats(min_value: float, max_value: float, shape: Tuple[int]):
return torch.rand(shape) * (max_value - min_value) + min_value

@staticmethod
def generate_ranges(start: int, stop: int, shape: Tuple[int]):
return torch.arange(start, stop).repeat(shape[0], 1)

@staticmethod
def generate_random_strings(num_seq: int) -> List[str]:
return [
"".join(random.choice(string.ascii_letters + string.digits) for _ in range(random.randint(10, 100)))
for _ in range(num_seq)
]

def __call__(self):
raise NotImplementedError("Generator must implement __call__ method")
Loading

0 comments on commit 3f7f1c1

Please sign in to comment.