From 9c8f216a512b2df0855f09fe84681ab52354c494 Mon Sep 17 00:00:00 2001 From: Julio Perez <37191411+jperez999@users.noreply.github.com> Date: Tue, 11 Jun 2024 10:24:17 -0400 Subject: [PATCH] fix categorify c++ op not working with new tensor conversions (#370) * fix categorify c++ op not working with new tensor conversions * fix spacing format --- merlin/dag/executors.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/merlin/dag/executors.py b/merlin/dag/executors.py index de24923c4..fcb8c9535 100644 --- a/merlin/dag/executors.py +++ b/merlin/dag/executors.py @@ -31,6 +31,7 @@ ) from merlin.dag import ColumnSelector, DataFormats, Graph, Node from merlin.dag.ops.stat_operator import StatOperator +from merlin.dag.utils import group_values_offsets from merlin.dtypes.shape import DefaultShapes from merlin.io import Dataset from merlin.io.worker import clean_worker_cache @@ -119,7 +120,11 @@ def _execute_node(self, node, transformable, capture_dtypes=False, strict=False) upstream_columns = self._append_addl_root_columns(node, transformable, upstream_outputs) formatted_columns = self._standardize_formats(node, upstream_columns) transform_input = self._merge_upstream_columns(formatted_columns) + if "CategorifyTransform" in str(node.op): + transform_input = group_values_offsets(transform_input) transform_output = self._run_node_transform(node, transform_input, capture_dtypes, strict) + if "CategorifyTransform" in str(node.op): + transform_output = TensorTable(transform_output) transform_output = _convert_format(transform_output, self.target_format) return transform_output