Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test #16827

Closed
wants to merge 44 commits into from
Closed

Test #16827

Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
28bf38e
working conftest
galipremsagar Sep 4, 2024
a2e25e2
Merge remote-tracking branch 'upstream/branch-24.10' into gpu_cpu_met…
galipremsagar Sep 9, 2024
a6b3de5
enable logging
galipremsagar Sep 9, 2024
0206872
test
galipremsagar Sep 9, 2024
f4364f8
test
galipremsagar Sep 10, 2024
77731bb
Merge branch 'branch-24.10' into gpu_cpu_metrics
galipremsagar Sep 10, 2024
c6a44a1
test
galipremsagar Sep 10, 2024
80f628b
Merge branch 'gpu_cpu_metrics' of https://github.com/galipremsagar/cu…
galipremsagar Sep 10, 2024
2cc6e0d
test
galipremsagar Sep 10, 2024
ab5ba4e
test
galipremsagar Sep 11, 2024
3b7d740
test
galipremsagar Sep 11, 2024
695bf30
Merge remote-tracking branch 'upstream/branch-24.10' into gpu_cpu_met…
galipremsagar Sep 11, 2024
264a444
test
galipremsagar Sep 12, 2024
337cef8
test
galipremsagar Sep 12, 2024
5efca92
test
galipremsagar Sep 12, 2024
5e6ec98
test
galipremsagar Sep 13, 2024
2200ec2
test
galipremsagar Sep 14, 2024
3ac06df
Merge branch 'branch-24.10' into gpu_cpu_metrics
galipremsagar Sep 14, 2024
1b7b5a9
test
galipremsagar Sep 14, 2024
3702b4c
Merge branch 'gpu_cpu_metrics' of https://github.com/galipremsagar/cu…
galipremsagar Sep 14, 2024
b0e4955
test
galipremsagar Sep 15, 2024
d2344dc
test
galipremsagar Sep 15, 2024
32d3a30
cleanup
galipremsagar Sep 15, 2024
84c58e1
update and cleanup
galipremsagar Sep 16, 2024
c4f4cbf
revert
galipremsagar Sep 16, 2024
23545de
cleanup
galipremsagar Sep 16, 2024
cf4a3f4
Merge remote-tracking branch 'upstream/branch-24.10' into gpu_cpu_met…
galipremsagar Sep 16, 2024
ed2bea6
Merge remote-tracking branch 'upstream/branch-24.10' into gpu_cpu_met…
galipremsagar Sep 17, 2024
124cd80
test
galipremsagar Sep 18, 2024
35d6a3a
Update python/cudf/cudf/pandas/scripts/run-pandas-tests.sh
galipremsagar Sep 18, 2024
bc0fc18
test
galipremsagar Sep 18, 2024
91b03c6
test
galipremsagar Sep 18, 2024
1f1f77c
Merge branch 'test' of https://github.com/galipremsagar/cudf into test
galipremsagar Sep 18, 2024
d959ada
Merge branch 'branch-24.10' into test
galipremsagar Sep 18, 2024
0404129
Merge remote-tracking branch 'upstream/branch-24.10' into test
galipremsagar Sep 18, 2024
63358cc
Update summarize-test-results.py
galipremsagar Sep 18, 2024
3b9ed5b
Merge branch 'branch-24.10' into test
galipremsagar Sep 18, 2024
13890fd
test
galipremsagar Sep 18, 2024
f8f86bd
test
galipremsagar Sep 18, 2024
44afa9d
Merge branch 'branch-24.10' into test
galipremsagar Sep 18, 2024
db1bf0b
test
galipremsagar Sep 18, 2024
bee4727
test
galipremsagar Sep 18, 2024
fe2611c
test
galipremsagar Sep 18, 2024
b4ce6ad
Update python/cudf/cudf/pandas/scripts/conftest-patch.py
galipremsagar Sep 18, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion ci/cudf_pandas_scripts/pandas-tests/job-summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,20 @@ def emoji_failed(x):
pr_df = pd.DataFrame.from_dict(pr_results, orient="index").sort_index()
main_df = pd.DataFrame.from_dict(main_results, orient="index").sort_index()
diff_df = pr_df - main_df
pr_df['CPU Usage'] = ((pr_df['_slow_function_call']/(pr_df['_slow_function_call'] + pr_df['_fast_function_call']))*100.0).round(1)
pr_df['GPU Usage'] = ((pr_df['_fast_function_call']/(pr_df['_slow_function_call'] + pr_df['_fast_function_call']))*100.0).round(1)

pr_df = pr_df[["total", "passed", "failed", "skipped"]]
cpu_usage_mean = pr_df['CPU Usage'].mean().round(2)
gpu_usage_mean = pr_df['GPU Usage'].mean().round(2)

# Add '%' suffix to 'CPU Usage' and 'GPU Usage' columns
pr_df['CPU Usage'] = pr_df['CPU Usage'].astype(str) + '%'
pr_df['GPU Usage'] = pr_df['GPU Usage'].astype(str) + '%'

pr_df['CPU Usage'] = pr_df['CPU Usage'].replace('nan%', '0%')
pr_df['GPU Usage'] = pr_df['GPU Usage'].replace('nan%', '0%')

pr_df = pr_df[["total", "passed", "failed", "skipped", 'CPU Usage', 'GPU Usage']]
diff_df = diff_df[["total", "passed", "failed", "skipped"]]
diff_df.columns = diff_df.columns + "_diff"
diff_df["passed_diff"] = diff_df["passed_diff"].map(emoji_passed)
Expand All @@ -95,6 +107,8 @@ def emoji_failed(x):

print(comment)
print()
print(f"Average CPU and GPU usage for the tests: {cpu_usage_mean}% and {gpu_usage_mean}%")
print()
print("Here are the results of running the Pandas tests against this PR:")
print()
print(df.to_markdown())
2 changes: 1 addition & 1 deletion ci/cudf_pandas_scripts/pandas-tests/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ bash python/cudf/cudf/pandas/scripts/run-pandas-tests.sh \
-m "not slow" \
--max-worker-restart=3 \
--junitxml="${RAPIDS_TESTS_DIR}/junit-cudf-pandas.xml" \
--dist worksteal \
--dist loadfile \
--report-log=${PANDAS_TESTS_BRANCH}.json 2>&1

SUMMARY_FILE_NAME=${PANDAS_TESTS_BRANCH}-${RAPIDS_FULL_VERSION}-results.json
Expand Down
16 changes: 16 additions & 0 deletions python/cudf/cudf/pandas/fast_slow_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -881,6 +881,20 @@ def _assert_fast_slow_eq(left, right):
assert_eq(left, right)


def _fast_function_call():
"""
Placeholder fast function for pytest profiling purposes.
"""
return None


def _slow_function_call():
"""
Placeholder slow function for pytest profiling purposes.
"""
return None


def _fast_slow_function_call(
func: Callable,
/,
Expand Down Expand Up @@ -910,6 +924,7 @@ def _fast_slow_function_call(
# try slow path
raise Exception()
fast = True
_fast_function_call()
if _env_get_bool("CUDF_PANDAS_DEBUGGING", False):
try:
with nvtx.annotate(
Expand Down Expand Up @@ -952,6 +967,7 @@ def _fast_slow_function_call(
from ._logger import log_fallback

log_fallback(slow_args, slow_kwargs, err)
_slow_function_call()
with disable_module_accelerator():
result = func(*slow_args, **slow_kwargs)
return _maybe_wrap_result(result, func, *args, **kwargs), fast
Expand Down
96 changes: 95 additions & 1 deletion python/cudf/cudf/pandas/scripts/conftest-patch.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES.
# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import contextlib
import json
import multiprocessing
import os
import sys
from collections import defaultdict
from functools import wraps

import pytest
Expand Down Expand Up @@ -36,4 +39,95 @@ def patch_testing_functions():
pytest.raises = replace_kwargs({"match": None})(pytest.raises)


# Dictionary to store function call counts
manager = multiprocessing.Manager()
function_call_counts = defaultdict(int) # type: ignore

# The specific function to track
FUNCTION_NAME = {"_slow_function_call", "_fast_function_call"}


def find_pytest_file(frame):
new_f = frame
while new_f:
if "pandas-testing/pandas-tests/tests" in new_f.f_globals.get(
"__file__", ""
):
return os.path.abspath(new_f.f_globals.get("__file__", ""))
new_f = new_f.f_back
return None


def trace_calls(frame, event, arg):
if event != "call":
return
code = frame.f_code
func_name = code.co_name

if func_name in FUNCTION_NAME:
# filename = find_pytest_file(frame)
# if filename not in function_call_counts:
# function_call_counts[filename] = defaultdict(int)
# function_call_counts[filename][func_name] += 1
function_call_counts[func_name] += 1


def pytest_sessionstart(session):
# Set the profile function to trace calls
sys.setprofile(trace_calls)


def pytest_sessionfinish(session, exitstatus):
# Remove the profile function
sys.setprofile(None)


@pytest.hookimpl(tryfirst=True)
def pytest_runtest_setup(item):
# Check if this is the first test in the file
if item.nodeid.split("::")[0] != getattr(
pytest_runtest_setup, "current_file", None
):
# If it's a new file, reset the function call counts
global function_call_counts
function_call_counts = defaultdict(int)
pytest_runtest_setup.current_file = item.nodeid.split("::")[0]


@pytest.hookimpl(trylast=True)
def pytest_runtest_teardown(item, nextitem):
# Check if this is the last test in the file
if (
nextitem is None
or nextitem.nodeid.split("::")[0] != item.nodeid.split("::")[0]
):
# Write the function call counts to a file
worker_id = os.getenv("PYTEST_XDIST_WORKER", "master")
output_file = f'{item.nodeid.split("::")[0].replace("/", "__")}_{worker_id}_metrics.json'
# if os.path.exists(output_file):
# output_file = f'{item.nodeid.split("::")[0].replace("/", "__")}_{worker_id}_metrics_1.json'
with open(output_file, "w") as f:
json.dump(dict(function_call_counts), f, indent=4)
print(f"Function call counts have been written to {output_file}")


@pytest.hookimpl(tryfirst=True)
def pytest_configure(config):
if hasattr(config, "workerinput"):
# Running in xdist worker
global function_call_counts
function_call_counts = defaultdict(int)


@pytest.hookimpl(trylast=True)
def pytest_unconfigure(config):
if hasattr(config, "workerinput"):
# Running in xdist worker
worker_id = config.workerinput["workerid"]
output_file = f"function_call_counts_worker_{worker_id}.json"
with open(output_file, "w") as f:
json.dump(dict(function_call_counts), f, indent=4)
# print(f"Function call counts have been written to {output_file}")


sys.path.append(os.path.dirname(__file__))
8 changes: 4 additions & 4 deletions python/cudf/cudf/pandas/scripts/run-pandas-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ markers = [
]
EOF
# append the contents of patch-confest.py to conftest.py
cat ../python/cudf/cudf/pandas/scripts/conftest-patch.py >> pandas-tests/conftest.py
# cat ../python/cudf/cudf/pandas/scripts/conftest-patch.py >> pandas-tests/conftest.py

# Substitute `pandas.tests` with a relative import.
# This will depend on the location of the test module relative to
Expand Down Expand Up @@ -134,10 +134,11 @@ TEST_THAT_CRASH_PYTEST_WORKERS="not test_bitmasks_pyarrow \
and not test_large_string_pyarrow \
and not test_interchange_from_corrected_buffer_dtypes \
and not test_eof_states \
and not test_array_tz"
and not test_array_tz \
and not test_groupby_raises_category"

# TODO: Remove "not db" once a postgres & mysql container is set up on the CI
PANDAS_CI="1" timeout 60m python -m pytest -p cudf.pandas \
PANDAS_CI="1" timeout 90m python -m pytest -p cudf.pandas \
galipremsagar marked this conversation as resolved.
Show resolved Hide resolved
-v -m "not single_cpu and not db" \
-k "$TEST_THAT_NEED_MOTO_SERVER and $TEST_THAT_CRASH_PYTEST_WORKERS and not test_groupby_raises_category_on_category and not test_constructor_no_pandas_array and not test_is_monotonic_na and not test_index_contains and not test_index_contains and not test_frame_op_subclass_nonclass_constructor and not test_round_trip_current" \
--import-mode=importlib \
Expand All @@ -146,5 +147,4 @@ PANDAS_CI="1" timeout 60m python -m pytest -p cudf.pandas \

mv *.json ..
cd ..

rm -rf pandas-testing/pandas-tests/
20 changes: 20 additions & 0 deletions python/cudf/cudf/pandas/scripts/summarize-test-results.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
"""

import argparse
import glob
import json
import os

from rich.console import Console
from rich.table import Table
Expand Down Expand Up @@ -57,6 +59,24 @@ def get_per_module_results(log_file_name):
per_module_results[module_name].setdefault(outcome, 0)
per_module_results[module_name]["total"] += 1
per_module_results[module_name][outcome] += 1

for key, value in per_module_results.items():
processed_name = key.replace("/", "__") + "_*_metrics.json"
# Assuming the directory is the same as the module name's directory
directory = os.path.dirname(log_file_name)
pattern = os.path.join(directory, processed_name)
matching_files = glob.glob(pattern)
for file in matching_files:
with open(file) as f:
function_call_counts = json.load(f)
per_module_results[key]["_slow_function_call"] = (
per_module_results[key].get("_slow_function_call", 0)
+ function_call_counts.get("_slow_function_call", 0)
)
per_module_results[key]["_fast_function_call"] = (
per_module_results[key].get("_fast_function_call", 0)
+ function_call_counts.get("_fast_function_call", 0)
)
return per_module_results


Expand Down
Loading