diff --git a/src/main/cpp/src/SparkResourceAdaptorJni.cpp b/src/main/cpp/src/SparkResourceAdaptorJni.cpp index e09ef0dfdb..a1ca8eedf3 100644 --- a/src/main/cpp/src/SparkResourceAdaptorJni.cpp +++ b/src/main/cpp/src/SparkResourceAdaptorJni.cpp @@ -300,7 +300,6 @@ class full_thread_state { // time) long time_retry_running_nanos = 0; std::chrono::time_point block_start; - long gpu_memory_allocated_bytes = 0; // metrics for the current thread task_metrics metrics; @@ -891,6 +890,8 @@ class spark_resource_adaptor final : public rmm::mr::device_memory_resource { std::condition_variable task_has_woken_condition; std::map threads; std::map> task_to_threads; + long gpu_memory_allocated_bytes = 0; + // Metrics are a little complicated. Spark reports metrics at a task level // but we track and collect them at a thread level. The life time of a thread // and a task are not tied to each other, and a thread can work on things for @@ -1376,10 +1377,10 @@ class spark_resource_adaptor final : public rmm::mr::device_memory_resource { // num_bytes is likely not padded, which could cause slight inaccuracies // but for now it shouldn't matter for watermark purposes if (!is_for_cpu) { - thread->second.gpu_memory_allocated_bytes += num_bytes; + gpu_memory_allocated_bytes += num_bytes; thread->second.metrics.gpu_max_memory_allocated = std::max(thread->second.metrics.gpu_max_memory_allocated, - thread->second.gpu_memory_allocated_bytes); + gpu_memory_allocated_bytes); } break; default: break; @@ -1780,7 +1781,7 @@ class spark_resource_adaptor final : public rmm::mr::device_memory_resource { auto const thread = threads.find(tid); if (thread != threads.end()) { log_status("DEALLOC", tid, thread->second.task_id, thread->second.state); - if (!is_for_cpu) { thread->second.gpu_memory_allocated_bytes -= num_bytes; } + if (!is_for_cpu) { gpu_memory_allocated_bytes -= num_bytes; } } else { log_status("DEALLOC", tid, -2, thread_state::UNKNOWN); }