Skip to content

Commit

Permalink
Add ONNX support for Embeddings and Pipelines, closes #109
Browse files Browse the repository at this point in the history
  • Loading branch information
davidmezzetti committed Aug 31, 2021
1 parent f2512f5 commit 2ed7c7f
Show file tree
Hide file tree
Showing 13 changed files with 250 additions and 72 deletions.
26 changes: 8 additions & 18 deletions docs/pipelines/onnx.md
Original file line number Diff line number Diff line change
@@ -1,33 +1,23 @@
# HFOnnx

Exports a Hugging Face Transformer model to ONNX.
Exports a Hugging Face Transformer model to ONNX. Currently, this works best with classification/pooling/qa models. Work is ongoing for sequence to
sequence models (summarization, transcription, translation).

Example on how to use the pipeline below.

```python
from onnxruntime import InferenceSession, SessionOptions
from transformers import AutoTokenizer
from txtai.pipeline import HFOnnx, Labels

from txtai.pipeline import HFOnnx

# Normalize logits using sigmoid function
sigmoid = lambda x: 1.0 / (1.0 + np.exp(-x))
# Model path
path = "distilbert-base-uncased-finetuned-sst-2-english"

# Export model to ONNX
onnx = HFOnnx()
model = onnx("distilbert-base-uncased-finetuned-sst-2-english", "sequence-classification", "model.onnx", True)

# Build ONNX session
options = SessionOptions()
session = InferenceSession(model, options)

# Tokenize
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
tokens = tokenizer(["I am happy"], return_tensors="np")
model = onnx(path, "text-classification", "model.onnx", True)

# Run inference and validate
outputs = session.run(None, dict(tokens))
outputs = sigmoid(outputs[0])
labels = Labels((model, path), dynamic=False)
labels("I am happy")
```

::: txtai.pipeline.HFOnnx.__init__
Expand Down
5 changes: 3 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
"uvicorn>=0.12.1",
]

extras["model"] = ["onnxruntime>=1.8.1"]

extras["pipeline"] = [
"fasttext>=0.9.2",
"nltk>=3.5",
Expand All @@ -33,15 +35,14 @@
"annoy>=1.16.3",
"fasttext>=0.9.2",
"hnswlib>=0.5.0",
"onnxruntime>=1.8.1",
"pymagnitude-lite>=0.1.43",
"scikit-learn>=0.23.1",
"sentence-transformers>=2.0.0",
]

extras["workflow"] = ["apache-libcloud>=3.3.1", "pillow>=7.2.0", "requests>=2.24.0"]

extras["all"] = extras["api"] + extras["pipeline"] + extras["similarity"] + extras["workflow"]
extras["all"] = extras["api"] + extras["model"] + extras["pipeline"] + extras["similarity"] + extras["workflow"]

setup(
name="txtai",
Expand Down
1 change: 1 addition & 0 deletions src/python/txtai/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@
"""

from .models import Models
from .onnx import OnnxModel
from .pooling import MeanPooling, Pooling
39 changes: 39 additions & 0 deletions src/python/txtai/models/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,14 @@
Models module
"""

import os

import torch

from transformers import AutoModel, AutoModelForQuestionAnswering, AutoModelForSeq2SeqLM, AutoModelForSequenceClassification

from .onnx import OnnxModel


class Models:
"""
Expand Down Expand Up @@ -81,3 +87,36 @@ def reference(deviceid):
"""

return "cpu" if deviceid < 0 else "cuda:{}".format(deviceid)

@staticmethod
def load(path, task="default"):
"""
Loads a machine learning model. Handles multiple model frameworks (ONNX, Transformers).
Args:
path: path to model
task: task name used to lookup model configuration
Returns:
machine learning model
"""

# Detect ONNX models
if isinstance(path, bytes) or (isinstance(path, str) and os.path.isfile(path)):
return OnnxModel(path)

# Return path, if path isn't a string
if not isinstance(path, str):
return path

# Transformer models
config = {
"default": AutoModel.from_pretrained,
"question-answering": AutoModelForQuestionAnswering.from_pretrained,
"summarization": AutoModelForSeq2SeqLM.from_pretrained,
"text-classification": AutoModelForSequenceClassification.from_pretrained,
"zero-shot-classification": AutoModelForSequenceClassification.from_pretrained,
}

# Load model for supported tasks. Return path for unsupported tasks.
return config[task](path) if task in config else path
104 changes: 104 additions & 0 deletions src/python/txtai/models/onnx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
"""
ONNX module
"""

# Conditional import
try:
from onnxruntime import InferenceSession, SessionOptions

ONNX_RUNTIME = True
except ImportError:
ONNX_RUNTIME = False

import numpy as np
import torch

from transformers.configuration_utils import PretrainedConfig
from transformers.models.auto.modeling_auto import (
MODEL_MAPPING,
MODEL_FOR_QUESTION_ANSWERING_MAPPING,
MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
)
from transformers.models.auto.tokenization_auto import TOKENIZER_MAPPING
from transformers.modeling_utils import PreTrainedModel

# pylint: disable=W0223
class OnnxModel(PreTrainedModel):
"""
Provides a Transformers/PyTorch compatible interface for ONNX models. Handles casting inputs
and outputs with minimal to no copying of data.
"""

def __init__(self, model):
"""
Creates a new OnnxModel.
Args:
model: path to model or InferenceSession
"""

if not ONNX_RUNTIME:
raise ImportError('onnxruntime is not available - install "model" extra to enable')

super().__init__(PretrainedConfig())

# Create ONNX session
self.model = InferenceSession(model, SessionOptions())

# Add references for this class to supported AutoModel classes
name = self.__class__.__name__
if name not in MODEL_MAPPING:
MODEL_MAPPING[name] = self.__class__
MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING[name] = self.__class__
MODEL_FOR_QUESTION_ANSWERING_MAPPING[name] = self.__class__

# Add references for this class to support pipeline AutoTokenizers
if type(self.config) not in TOKENIZER_MAPPING:
TOKENIZER_MAPPING[type(self.config)] = None

def forward(self, **inputs):
"""
Runs inputs through an ONNX model and returns outputs. This method handles casting inputs
and outputs between torch tensors and numpy arrays as shared memory (no copy).
Args:
inputs: model inputs
Returns:
model outputs
"""

inputs = self.parse(inputs)

# Run inputs through ONNX model
results = self.model.run(None, inputs)

# pylint: disable=E1101
return torch.from_numpy(np.array(results))

def parse(self, inputs):
"""
Parse model inputs and handle converting to ONNX compatible inputs.
Args:
inputs: model inputs
Returns:
ONNX compatible model inputs
"""

features = {}

# Select features from inputs
for key in ["input_ids", "attention_mask", "token_type_ids"]:
if key in inputs:
value = inputs[key]

# Cast torch tensors to numpy
if hasattr(value, "cpu"):
value = value.cpu().numpy()

# Cast to numpy array if not already one
features[key] = np.asarray(value)

return features
11 changes: 6 additions & 5 deletions src/python/txtai/models/pooling.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from torch import nn

from transformers import AutoModel, AutoTokenizer
from transformers import AutoTokenizer

from .models import Models

Expand All @@ -17,21 +17,22 @@ class Pooling(nn.Module):
Builds pooled vectors usings outputs from a transformers model.
"""

def __init__(self, path, device, batch=32, maxlength=None):
def __init__(self, path, device, tokenizer=None, batch=32, maxlength=None):
"""
Creates a new Pooling model.
Args:
path: path to transformers model
path: path to model, accepts Hugging Face model hub id or local path
device: tensor device id
tokenizer: optional path to tokenizer
batch: batch size
maxlength: max sequence length
"""

super().__init__()

self.model = AutoModel.from_pretrained(path)
self.tokenizer = AutoTokenizer.from_pretrained(path)
self.model = Models.load(path)
self.tokenizer = AutoTokenizer.from_pretrained(tokenizer if tokenizer else path)
self.device = Models.device(device)

# Detect unbounded tokenizer typically found in older models
Expand Down
15 changes: 8 additions & 7 deletions src/python/txtai/pipeline/hfonnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@

from torch.onnx import export

from transformers import AutoModel, AutoModelForCausalLM, AutoModelForMultipleChoice, AutoModelForQuestionAnswering
from transformers import AutoModelForSeq2SeqLM, AutoModelForSequenceClassification, AutoModelForTokenClassification, AutoTokenizer
from transformers import AutoModel, AutoModelForQuestionAnswering, AutoModelForSequenceClassification, AutoTokenizer

from ..models import MeanPooling
from .tensors import Tensors
Expand All @@ -40,6 +39,9 @@ def __call__(self, path, task="default", output=None, quantize=False, opset=12):
output: optional output model path, defaults to return byte array if None
quantize: if model should be quantized (requires onnx to be installed), defaults to False
opset: onnx opset, defaults to 12
Returns:
path to model output or model as bytes depending on output parameter
"""

inputs, outputs, model = self.parameters(task)
Expand Down Expand Up @@ -141,12 +143,7 @@ def parameters(self, task):

config = {
"default": (OrderedDict({"last_hidden_state": {0: "batch", 1: "sequence"}}), AutoModel.from_pretrained),
"causal-lm": (OrderedDict({"logits": {0: "batch", 1: "sequence"}}), AutoModelForCausalLM.from_pretrained),
"pooling": (OrderedDict({"embeddings": {0: "batch", 1: "sequence"}}), lambda x: MeanPoolingOnnx(x, -1)),
"seq2seq-lm": (OrderedDict({"logits": {0: "batch", 1: "decoder_sequence"}}), AutoModelForSeq2SeqLM.from_pretrained),
"sequence-classification": (OrderedDict({"logits": {0: "batch"}}), AutoModelForSequenceClassification.from_pretrained),
"token-classification": (OrderedDict({"logits": {0: "batch", 1: "sequence"}}), AutoModelForTokenClassification.from_pretrained),
"multiple-choice": (OrderedDict({"logits": {0: "batch"}}), AutoModelForMultipleChoice.from_pretrained),
"question-answering": (
OrderedDict(
{
Expand All @@ -156,8 +153,12 @@ def parameters(self, task):
),
AutoModelForQuestionAnswering.from_pretrained,
),
"text-classification": (OrderedDict({"logits": {0: "batch"}}), AutoModelForSequenceClassification.from_pretrained),
}

# Aliases
config["zero-shot-classification"] = config["text-classification"]

return (inputs,) + config[task]


Expand Down
2 changes: 1 addition & 1 deletion src/python/txtai/pipeline/hfpipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def __init__(self, task, path=None, quantize=False, gpu=False, model=None):

# Transformer pipeline task
if isinstance(path, tuple):
self.pipeline = pipeline(task, model=path[0], tokenizer=path[1], device=deviceid)
self.pipeline = pipeline(task, model=Models.load(path[0], task), tokenizer=path[1], device=deviceid)
else:
self.pipeline = pipeline(task, model=path, tokenizer=path, device=deviceid)

Expand Down
4 changes: 1 addition & 3 deletions src/python/txtai/vectors/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
Factory module
"""

import os

from .transformers import TransformersVectors
from .words import WordVectors, WORDS

Expand Down Expand Up @@ -58,6 +56,6 @@ def method(config):

# Infer method from path, if blank
if not method and path:
method = "words" if os.path.isfile(path) else "transformers"
method = "words" if WordVectors.isDatabase(path) else "transformers"

return method
8 changes: 6 additions & 2 deletions src/python/txtai/vectors/transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Transformers module
"""

import os
import pickle
import tempfile

Expand All @@ -14,7 +15,7 @@
SENTENCE_TRANSFORMERS = False

from .base import Vectors
from ..models import MeanPooling, Models
from ..models import MeanPooling, Models, Pooling
from ..pipeline.tokenizer import Tokenizer


Expand All @@ -32,7 +33,10 @@ def load(self, path):

# Build embeddings with transformers (default)
if transformers:
return MeanPooling(path, device=deviceid)
if isinstance(path, bytes) or (isinstance(path, str) and os.path.isfile(path)):
return Pooling(path, device=deviceid, tokenizer=self.config.get("tokenizer"))

return MeanPooling(path, device=deviceid, tokenizer=self.config.get("tokenizer"))

if not SENTENCE_TRANSFORMERS:
raise ImportError('sentence-transformers is not available - install "similarity" extra to enable')
Expand Down
22 changes: 22 additions & 0 deletions src/python/txtai/vectors/words.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,28 @@ def lookup(self, tokens):

return self.model.query(tokens)

@staticmethod
def isDatabase(path):
"""
Checks if this is a SQLite database file which is the file format used for word vectors databases.
Args:
path: path to check
Returns:
True if this is a SQLite database
"""

if isinstance(path, str) and os.path.isfile(path) and os.path.getsize(path) >= 100:
# Read 100 byte SQLite header
with open(path, "rb") as f:
header = f.read(100)

# Check for SQLite header
return header.startswith(b"SQLite format 3\000")

return False

@staticmethod
def build(data, size, mincount, path):
"""
Expand Down
Loading

0 comments on commit 2ed7c7f

Please sign in to comment.