From fb7f3d5616fd2bdaeb3b8c1ba5cad51ea757769b Mon Sep 17 00:00:00 2001 From: zhilong Date: Mon, 2 Dec 2024 21:40:37 -0500 Subject: [PATCH] fix Signed-off-by: zhilong --- python/ray/data/_internal/pandas_block.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/ray/data/_internal/pandas_block.py b/python/ray/data/_internal/pandas_block.py index e181c9ac8aa2..a68b9e962737 100644 --- a/python/ray/data/_internal/pandas_block.py +++ b/python/ray/data/_internal/pandas_block.py @@ -356,7 +356,7 @@ def get_deep_size(obj): # TensorDtype for ray.air.util.tensor_extensions.pandas.TensorDtype object_need_check = (TensorDtype,) - min_sample_size = _PANDAS_SIZE_BYTES_MIN_COUNT + max_sample_count = _PANDAS_SIZE_BYTES_MAX_SAMPLE_COUNT # Handle object columns separately for column in self._table.columns: @@ -366,8 +366,8 @@ def get_deep_size(obj): ): total_size = len(self._table[column]) - # Determine the sample size based on min_count - sample_size = min(total_size, min_sample_size) + # Determine the sample size based on max_sample_count + sample_size = min(total_size, max_sample_count) # Following codes can also handel case that sample_size == total_size sampled_data = self._table[column].sample(n=sample_size).values