From 3510a447d97f40881665a87ca6c39ca3462b2a32 Mon Sep 17 00:00:00 2001 From: Oliver Holworthy Date: Thu, 2 Feb 2023 15:20:49 +0000 Subject: [PATCH] Use row lengths instead of offsets in convert_df_to_dict (#285) --- merlin/systems/triton/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/merlin/systems/triton/__init__.py b/merlin/systems/triton/__init__.py index d1f4993f7..4a8ec0d8c 100644 --- a/merlin/systems/triton/__init__.py +++ b/merlin/systems/triton/__init__.py @@ -71,7 +71,9 @@ def _convert_df_to_dict(schema, batch, dtype="int32"): df_dict[col_name + "__values"] = col.list.leaves.values_host.astype( col_schema.dtype.to_numpy ) - df_dict[col_name + "__lengths"] = col._column.offsets.values_host.astype(dtype) + offsets = col._column.offsets.values_host.astype(dtype) + row_lengths = offsets[1:] - offsets[:-1] + df_dict[col_name + "__lengths"] = row_lengths else: values = col.list.leaves.values_host values = values.reshape(*shape).astype(col_schema.dtype.to_numpy)