rapidsai · galipremsagar · Sep 4, 2024 · Sep 9, 2024 · Sep 9, 2024 · Sep 9, 2024
@@ -68,8 +68,20 @@ def emoji_failed(x):
 pr_df = pd.DataFrame.from_dict(pr_results, orient="index").sort_index()
 main_df = pd.DataFrame.from_dict(main_results, orient="index").sort_index()
 diff_df = pr_df - main_df
+pr_df['CPU Usage'] = ((pr_df['_slow_function_call']/(pr_df['_slow_function_call'] + pr_df['_fast_function_call']))*100.0).round(1)
+pr_df['GPU Usage'] = ((pr_df['_fast_function_call']/(pr_df['_slow_function_call'] + pr_df['_fast_function_call']))*100.0).round(1)
 
-pr_df = pr_df[["total", "passed", "failed", "skipped"]]
+cpu_usage_mean = pr_df['CPU Usage'].mean().round(2)
+gpu_usage_mean = pr_df['GPU Usage'].mean().round(2)
+
+# Add '%' suffix to 'CPU Usage' and 'GPU Usage' columns
+pr_df['CPU Usage'] = pr_df['CPU Usage'].astype(str) + '%'
+pr_df['GPU Usage'] = pr_df['GPU Usage'].astype(str) + '%'
+
+pr_df['CPU Usage'] = pr_df['CPU Usage'].replace('nan%', '0%')
+pr_df['GPU Usage'] = pr_df['GPU Usage'].replace('nan%', '0%')
+
+pr_df = pr_df[["total", "passed", "failed", "skipped", 'CPU Usage', 'GPU Usage']]
 diff_df = diff_df[["total", "passed", "failed", "skipped"]]
 diff_df.columns = diff_df.columns + "_diff"
 diff_df["passed_diff"] = diff_df["passed_diff"].map(emoji_passed)
@@ -95,6 +107,8 @@ def emoji_failed(x):
 
 print(comment)
 print()
+print(f"Average CPU and GPU usage for the tests: {cpu_usage_mean}% and {gpu_usage_mean}%")
+print()
 print("Here are the results of running the Pandas tests against this PR:")
 print()
 print(df.to_markdown())
@@ -33,7 +33,7 @@ bash python/cudf/cudf/pandas/scripts/run-pandas-tests.sh \
   -m "not slow" \
   --max-worker-restart=3 \
   --junitxml="${RAPIDS_TESTS_DIR}/junit-cudf-pandas.xml" \
-  --dist worksteal \
+  --dist loadfile \
   --report-log=${PANDAS_TESTS_BRANCH}.json 2>&1
 
 SUMMARY_FILE_NAME=${PANDAS_TESTS_BRANCH}-${RAPIDS_FULL_VERSION}-results.json

@@ -881,6 +881,20 @@ def _assert_fast_slow_eq(left, right):
         assert_eq(left, right)
 
 
+def _fast_function_call():
+    """
+    Placeholder fast function for pytest profiling purposes.
+    """
+    return None
+
+
+def _slow_function_call():
+    """
+    Placeholder slow function for pytest profiling purposes.
+    """
+    return None
+
+
 def _fast_slow_function_call(
     func: Callable,
     /,
@@ -910,6 +924,7 @@ def _fast_slow_function_call(
                 # try slow path
                 raise Exception()
             fast = True
+            _fast_function_call()
             if _env_get_bool("CUDF_PANDAS_DEBUGGING", False):
                 try:
                     with nvtx.annotate(
@@ -952,6 +967,7 @@ def _fast_slow_function_call(
                 from ._logger import log_fallback
 
                 log_fallback(slow_args, slow_kwargs, err)
+            _slow_function_call()
             with disable_module_accelerator():
                 result = func(*slow_args, **slow_kwargs)
     return _maybe_wrap_result(result, func, *args, **kwargs), fast

@@ -1,10 +1,13 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES.
 # All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
 import contextlib
+import json
+import multiprocessing
 import os
 import sys
+from collections import defaultdict
 from functools import wraps
 
 import pytest
@@ -36,4 +39,95 @@ def patch_testing_functions():
     pytest.raises = replace_kwargs({"match": None})(pytest.raises)
 
 
+# Dictionary to store function call counts
+manager = multiprocessing.Manager()
+function_call_counts = defaultdict(int)  # type: ignore
+
+# The specific function to track
+FUNCTION_NAME = {"_slow_function_call", "_fast_function_call"}
+
+
+def find_pytest_file(frame):
+    new_f = frame
+    while new_f:
+        if "pandas-testing/pandas-tests/tests" in new_f.f_globals.get(
+            "__file__", ""
+        ):
+            return os.path.abspath(new_f.f_globals.get("__file__", ""))
+        new_f = new_f.f_back
+    return None
+
+
+def trace_calls(frame, event, arg):
+    if event != "call":
+        return
+    code = frame.f_code
+    func_name = code.co_name
+
+    if func_name in FUNCTION_NAME:
+        # filename = find_pytest_file(frame)
+        # if filename not in function_call_counts:
+        #     function_call_counts[filename] = defaultdict(int)
+        # function_call_counts[filename][func_name] += 1
+        function_call_counts[func_name] += 1
+
+
+def pytest_sessionstart(session):
+    # Set the profile function to trace calls
+    sys.setprofile(trace_calls)
+
+
+def pytest_sessionfinish(session, exitstatus):
+    # Remove the profile function
+    sys.setprofile(None)
+
+
+@pytest.hookimpl(tryfirst=True)
+def pytest_runtest_setup(item):
+    # Check if this is the first test in the file
+    if item.nodeid.split("::")[0] != getattr(
+        pytest_runtest_setup, "current_file", None
+    ):
+        # If it's a new file, reset the function call counts
+        global function_call_counts
+        function_call_counts = defaultdict(int)
+        pytest_runtest_setup.current_file = item.nodeid.split("::")[0]
+
+
+@pytest.hookimpl(trylast=True)
+def pytest_runtest_teardown(item, nextitem):
+    # Check if this is the last test in the file
+    if (
+        nextitem is None
+        or nextitem.nodeid.split("::")[0] != item.nodeid.split("::")[0]
+    ):
+        # Write the function call counts to a file
+        worker_id = os.getenv("PYTEST_XDIST_WORKER", "master")
+        output_file = f'{item.nodeid.split("::")[0].replace("/", "__")}_{worker_id}_metrics.json'
+        # if os.path.exists(output_file):
+        #     output_file = f'{item.nodeid.split("::")[0].replace("/", "__")}_{worker_id}_metrics_1.json'
+        with open(output_file, "w") as f:
+            json.dump(dict(function_call_counts), f, indent=4)
+        print(f"Function call counts have been written to {output_file}")
+
+
+@pytest.hookimpl(tryfirst=True)
+def pytest_configure(config):
+    if hasattr(config, "workerinput"):
+        # Running in xdist worker
+        global function_call_counts
+        function_call_counts = defaultdict(int)
+
+
+@pytest.hookimpl(trylast=True)
+def pytest_unconfigure(config):
+    if hasattr(config, "workerinput"):
+        # Running in xdist worker
+        worker_id = config.workerinput["workerid"]
+        output_file = f"function_call_counts_worker_{worker_id}.json"
+        with open(output_file, "w") as f:
+            json.dump(dict(function_call_counts), f, indent=4)
+        # print(f"Function call counts have been written to {output_file}")
+
+
 sys.path.append(os.path.dirname(__file__))
@@ -65,7 +65,7 @@ markers = [
 ]
 EOF
     # append the contents of patch-confest.py to conftest.py
-    cat ../python/cudf/cudf/pandas/scripts/conftest-patch.py >> pandas-tests/conftest.py
+    # cat ../python/cudf/cudf/pandas/scripts/conftest-patch.py >> pandas-tests/conftest.py
 
     # Substitute `pandas.tests` with a relative import.
     # This will depend on the location of the test module relative to
@@ -134,10 +134,11 @@ TEST_THAT_CRASH_PYTEST_WORKERS="not test_bitmasks_pyarrow \
 and not test_large_string_pyarrow \
 and not test_interchange_from_corrected_buffer_dtypes \
 and not test_eof_states \
-and not test_array_tz"
+and not test_array_tz \
+and not test_groupby_raises_category"
 
 # TODO: Remove "not db" once a postgres & mysql container is set up on the CI
-PANDAS_CI="1" timeout 60m python -m pytest -p cudf.pandas \
+PANDAS_CI="1" timeout 90m python -m pytest -p cudf.pandas \
     -v -m "not single_cpu and not db" \
     -k "$TEST_THAT_NEED_MOTO_SERVER and $TEST_THAT_CRASH_PYTEST_WORKERS and not test_groupby_raises_category_on_category and not test_constructor_no_pandas_array and not test_is_monotonic_na and not test_index_contains and not test_index_contains and not test_frame_op_subclass_nonclass_constructor and not test_round_trip_current" \
     --import-mode=importlib \
@@ -146,5 +147,4 @@ PANDAS_CI="1" timeout 60m python -m pytest -p cudf.pandas \
 
 mv *.json ..
 cd ..
-
 rm -rf pandas-testing/pandas-tests/
@@ -12,7 +12,9 @@
 """
 
 import argparse
+import glob
 import json
+import os
 
 from rich.console import Console
 from rich.table import Table
@@ -57,6 +59,24 @@ def get_per_module_results(log_file_name):
                 per_module_results[module_name].setdefault(outcome, 0)
                 per_module_results[module_name]["total"] += 1
                 per_module_results[module_name][outcome] += 1
+
+    for key, value in per_module_results.items():
+        processed_name = key.replace("/", "__") + "_*_metrics.json"
+        # Assuming the directory is the same as the module name's directory
+        directory = os.path.dirname(log_file_name)
+        pattern = os.path.join(directory, processed_name)
+        matching_files = glob.glob(pattern)
+        for file in matching_files:
+            with open(file) as f:
+                function_call_counts = json.load(f)
+            per_module_results[key]["_slow_function_call"] = (
+                per_module_results[key].get("_slow_function_call", 0)
+                + function_call_counts.get("_slow_function_call", 0)
+            )
+            per_module_results[key]["_fast_function_call"] = (
+                per_module_results[key].get("_fast_function_call", 0)
+                + function_call_counts.get("_fast_function_call", 0)
+            )
     return per_module_results