Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Apply ruff everywhere (notebooks and scripts) #17820

Merged
merged 6 commits into from
Jan 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 2 additions & 11 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ ci:
autoupdate_branch: ""
autoupdate_commit_msg: "[pre-commit.ci] pre-commit autoupdate"
autoupdate_schedule: quarterly
skip: ["verify-alpha-spec", "nbqa-isort"]
skip: ["verify-alpha-spec"]
submodules: false

repos:
Expand Down Expand Up @@ -41,13 +41,6 @@ repos:
"python/cudf_polars/cudf_polars",
"python/dask_cudf/dask_cudf"]
pass_filenames: false
- repo: https://github.com/nbQA-dev/nbQA
rev: 1.9.1
hooks:
- id: nbqa-isort
# Use the cudf_kafka isort orderings in notebooks so that dask
# and RAPIDS packages have their own sections.
args: ["--settings-file=python/cudf_kafka/pyproject.toml"]
- repo: https://github.com/pre-commit/mirrors-clang-format
rev: v16.0.6
hooks:
Expand Down Expand Up @@ -153,13 +146,11 @@ repos:
^CHANGELOG.md$
)
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.8.0
rev: v0.9.3
hooks:
- id: ruff
args: ["--fix"]
files: python/.*$
- id: ruff-format
files: python/.*$
- repo: https://github.com/rapidsai/pre-commit-hooks
rev: v0.4.0
hooks:
Expand Down
26 changes: 18 additions & 8 deletions ci/cudf_pandas_scripts/fetch_pandas_versions.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,34 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Copyright (c) 2024-2025, NVIDIA CORPORATION.

import argparse

import requests
from packaging.version import Version
from packaging.specifiers import SpecifierSet
import argparse
from packaging.version import Version


def get_pandas_versions(pandas_range):
url = "https://pypi.org/pypi/pandas/json"
response = requests.get(url)
data = response.json()
versions = [Version(v) for v in data['releases']]
versions = [Version(v) for v in data["releases"]]
specifier = SpecifierSet(pandas_range.lstrip("pandas"))
matching_versions = [v for v in versions if v in specifier]
matching_minors = sorted(set(".".join((str(v.major), str(v.minor))) for v in matching_versions), key=Version)
matching_minors = sorted(
set(".".join((str(v.major), str(v.minor))) for v in matching_versions),
key=Version,
)
return matching_minors


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Filter pandas versions by prefix.")
parser.add_argument("pandas_range", type=str, help="The version prefix to filter by.")
parser = argparse.ArgumentParser(
description="Filter pandas versions by prefix."
)
parser.add_argument(
"pandas_range", type=str, help="The version prefix to filter by."
)
args = parser.parse_args()

versions = get_pandas_versions(args.pandas_range)
print(','.join(versions))
print(",".join(versions))
30 changes: 22 additions & 8 deletions ci/cudf_pandas_scripts/pandas-tests/job-summary.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES.
# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0

Expand Down Expand Up @@ -68,17 +68,27 @@ def emoji_failed(x):
pr_df = pd.DataFrame.from_dict(pr_results, orient="index").sort_index()
main_df = pd.DataFrame.from_dict(main_results, orient="index").sort_index()
total_usage = main_df["_slow_function_call"] + main_df["_fast_function_call"]
main_df["CPU Usage"] = ((main_df["_slow_function_call"] / total_usage) * 100.0).round(1)
main_df["GPU Usage"] = ((main_df["_fast_function_call"] / total_usage) * 100.0).round(1)
main_df["CPU Usage"] = (
(main_df["_slow_function_call"] / total_usage) * 100.0
).round(1)
main_df["GPU Usage"] = (
(main_df["_fast_function_call"] / total_usage) * 100.0
).round(1)

total_usage = pr_df["_slow_function_call"] + pr_df["_fast_function_call"]
pr_df["CPU Usage"] = ((pr_df["_slow_function_call"] / total_usage) * 100.0).round(1)
pr_df["GPU Usage"] = ((pr_df["_fast_function_call"] / total_usage) * 100.0).round(1)
pr_df["CPU Usage"] = (
(pr_df["_slow_function_call"] / total_usage) * 100.0
).round(1)
pr_df["GPU Usage"] = (
(pr_df["_fast_function_call"] / total_usage) * 100.0
).round(1)

cpu_usage_mean = pr_df["CPU Usage"].mean().round(2)
gpu_usage_mean = pr_df["GPU Usage"].mean().round(2)

gpu_usage_rate_change = abs(pr_df["GPU Usage"].mean() - main_df["GPU Usage"].mean())
gpu_usage_rate_change = abs(
pr_df["GPU Usage"].mean() - main_df["GPU Usage"].mean()
)
pr_df["CPU Usage"] = pr_df["CPU Usage"].fillna(0)
pr_df["GPU Usage"] = pr_df["GPU Usage"].fillna(0)
main_df["CPU Usage"] = main_df["CPU Usage"].fillna(0)
Expand All @@ -92,8 +102,12 @@ def emoji_failed(x):
pr_df["CPU Usage"] = pr_df["CPU Usage"].astype(str) + "%"
pr_df["GPU Usage"] = pr_df["GPU Usage"].astype(str) + "%"

pr_df = pr_df[["total", "passed", "failed", "skipped", "CPU Usage", "GPU Usage"]]
diff_df = diff_df[["total", "passed", "failed", "skipped", "CPU Usage", "GPU Usage"]]
pr_df = pr_df[
["total", "passed", "failed", "skipped", "CPU Usage", "GPU Usage"]
]
diff_df = diff_df[
["total", "passed", "failed", "skipped", "CPU Usage", "GPU Usage"]
]
diff_df.columns = diff_df.columns + "_diff"
diff_df["passed_diff"] = diff_df["passed_diff"].map(emoji_passed)
diff_df["failed_diff"] = diff_df["failed_diff"].map(emoji_failed)
Expand Down
34 changes: 18 additions & 16 deletions ci/utils/nbtestlog2junitxml.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
# Copyright (c) 2020-2022, NVIDIA CORPORATION.
# Copyright (c) 2020-2025, NVIDIA CORPORATION.
# Generate a junit-xml file from parsing a nbtest log

import re
from xml.etree.ElementTree import Element, ElementTree
from os import path
import string
from enum import Enum

from os import path
from xml.etree.ElementTree import Element, ElementTree

startingPatt = re.compile(r"^STARTING: ([\w\.\-]+)$")
skippingPatt = re.compile(r"^SKIPPING: ([\w\.\-]+)\s*(\(([\w\.\-\ \,]+)\))?\s*$")
skippingPatt = re.compile(
r"^SKIPPING: ([\w\.\-]+)\s*(\(([\w\.\-\ \,]+)\))?\s*$"
)
exitCodePatt = re.compile(r"^EXIT CODE: (\d+)$")
folderPatt = re.compile(r"^FOLDER: ([\w\.\-]+)$")
timePatt = re.compile(r"^real\s+([\d\.ms]+)$")
Expand Down Expand Up @@ -37,12 +38,8 @@ def makeFailureElement(outputLines):


def setFileNameAttr(attrDict, fileName):
attrDict.update(file=fileName,
classname="",
line="",
name="",
time=""
)
attrDict.update(file=fileName, classname="", line="", name="", time="")


def setClassNameAttr(attrDict, className):
attrDict["classname"] = className
Expand Down Expand Up @@ -76,11 +73,12 @@ def parseLog(logFile, testSuiteElement):
testSuiteElement.attrib["timestamp"] = ""

attrDict = {}
#setFileNameAttr(attrDict, logFile)
# setFileNameAttr(attrDict, logFile)
setFileNameAttr(attrDict, "nbtest")

parserStateEnum = Enum("parserStateEnum",
"newTest startingLine finishLine exitCode")
parserStateEnum = Enum(
"parserStateEnum", "newTest startingLine finishLine exitCode"
)
parserState = parserStateEnum.newTest

testOutput = ""
Expand All @@ -98,7 +96,9 @@ def parseLog(logFile, testSuiteElement):
setTimeAttr(attrDict, "0m0s")
skippedElement = makeTestCaseElement(attrDict)
message = m.group(3) or ""
skippedElement.append(Element("skipped", message=message, type=""))
skippedElement.append(
Element("skipped", message=message, type="")
)
testSuiteElement.append(skippedElement)
incrNumAttr(testSuiteElement, "skipped")
incrNumAttr(testSuiteElement, "tests")
Expand Down Expand Up @@ -160,4 +160,6 @@ def parseLog(logFile, testSuiteElement):
testSuiteElement = Element("testsuite", name="nbtest", hostname="")
parseLog(sys.argv[1], testSuiteElement)
testSuitesElement.append(testSuiteElement)
ElementTree(testSuitesElement).write(sys.argv[1]+".xml", xml_declaration=True)
ElementTree(testSuitesElement).write(
sys.argv[1] + ".xml", xml_declaration=True
)
23 changes: 14 additions & 9 deletions cpp/scripts/sort_ninja_log.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,12 @@
import argparse
import os
import re
import sys
import xml.etree.ElementTree as ET
from pathlib import Path
from xml.dom import minidom

parser = argparse.ArgumentParser()
parser.add_argument("log_file", type=str, default=".ninja_log", help=".ninja_log file")
parser.add_argument(
"log_file", type=str, default=".ninja_log", help=".ninja_log file"
)
parser.add_argument(
"--fmt",
type=str,
Expand Down Expand Up @@ -146,8 +145,8 @@ def format_file_size(input_size):


def replace_placeholder_patterns(input_string: str) -> str:
pattern = r'(_h_env_placehold)[_placehold]+'
return re.sub(pattern, r'\1...', input_string)
pattern = r"(_h_env_placehold)[_placehold]+"
return re.sub(pattern, r"\1...", input_string)


# adjust name for display
Expand Down Expand Up @@ -262,7 +261,9 @@ def output_html(entries, sorted_list, cmp_entries, args):

# output detail table in build-time descending order
print("<table id='detail' bgcolor='#EEEEEE'>")
print("<tr><th>File</th>", "<th>Compile time</th>", "<th>Size</th>", sep="")
print(
"<tr><th>File</th>", "<th>Compile time</th>", "<th>Size</th>", sep=""
)
if cmp_entries:
print("<th>t-cmp</th>", sep="")
print("</tr>")
Expand All @@ -282,7 +283,9 @@ def output_html(entries, sorted_list, cmp_entries, args):
print("<td align='right'>", build_time_str, "</td>", sep="", end="")
print("<td align='right'>", file_size_str, "</td>", sep="", end="")
# output diff column
cmp_entry = cmp_entries[name] if cmp_entries and name in cmp_entries else None
cmp_entry = (
cmp_entries[name] if cmp_entries and name in cmp_entries else None
)
if cmp_entry:
diff_time = build_time - (cmp_entry[1] - cmp_entry[0])
diff_time_str = format_build_time(diff_time)
Expand Down Expand Up @@ -347,7 +350,9 @@ def output_csv(entries, sorted_list, cmp_entries, args):
entry = entries[name]
build_time = entry[1] - entry[0]
file_size = entry[2]
cmp_entry = cmp_entries[name] if cmp_entries and name in cmp_entries else None
cmp_entry = (
cmp_entries[name] if cmp_entries and name in cmp_entries else None
)
print(build_time, file_size, name, sep=",", end="")
if cmp_entry:
diff_time = build_time - (cmp_entry[1] - cmp_entry[0])
Expand Down
27 changes: 17 additions & 10 deletions docs/cudf/source/_ext/PandasCompat.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2021-2024, NVIDIA CORPORATION
# Copyright (c) 2021-2025, NVIDIA CORPORATION

# This file is adapted from official sphinx tutorial for `todo` extension:
# https://www.sphinx-doc.org/en/master/development/tutorials/todo.html
Expand All @@ -7,7 +7,6 @@
from typing import cast

from docutils import nodes
from docutils.nodes import Element
from docutils.parsers.rst import Directive
from docutils.parsers.rst.directives.admonitions import BaseAdmonition
from sphinx import addnodes
Expand Down Expand Up @@ -39,7 +38,6 @@ def run(self):


class PandasCompatDirective(BaseAdmonition, SphinxDirective):

# this enables content in the directive
has_content = True

Expand Down Expand Up @@ -119,18 +117,24 @@ def __init__(self, app, doctree, docname):
self.builder = app.builder
self.config = app.config
self.env = app.env
self.domain = cast(PandasCompatDomain, app.env.get_domain("pandascompat"))
self.domain = cast(
PandasCompatDomain, app.env.get_domain("pandascompat")
)
self.document = new_document("")
self.process(doctree, docname)

def process(self, doctree: nodes.document, docname: str) -> None:
pandascompats = [v for vals in self.domain.pandascompats.values() for v in vals]
pandascompats = [
v for vals in self.domain.pandascompats.values() for v in vals
]
for node in doctree.findall(PandasCompatList):
if not self.config.include_pandas_compat:
node.parent.remove(node)
continue

content: list[Element | None] = [nodes.target()] if node.get("ids") else []
content: list[nodes.Element | None] = (
[nodes.target()] if node.get("ids") else []
)

for pandascompat in pandascompats:
# Create a copy of the pandascompat node
Expand All @@ -149,13 +153,16 @@ def create_reference(self, pandascompat, docname):
para = nodes.paragraph()
newnode = nodes.reference("", "")
innernode = nodes.emphasis(
get_translation_sphinx("[source]"), get_translation_sphinx("[source]")
get_translation_sphinx("[source]"),
get_translation_sphinx("[source]"),
)
newnode["refdocname"] = pandascompat["docname"]
try:
newnode["refuri"] = self.builder.get_relative_uri(
docname, pandascompat["docname"]
) + "#" + pandascompat["target"]["refid"]
newnode["refuri"] = (
self.builder.get_relative_uri(docname, pandascompat["docname"])
+ "#"
+ pandascompat["target"]["refid"]
)
except NoUri:
# ignore if no URI can be determined, e.g. for LaTeX output
pass
Expand Down
13 changes: 7 additions & 6 deletions docs/cudf/source/conf.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2018-2024, NVIDIA CORPORATION.
# Copyright (c) 2018-2025, NVIDIA CORPORATION.
#
# cudf documentation build configuration file, created by
# sphinx-quickstart on Wed May 3 10:59:22 2017.
Expand Down Expand Up @@ -36,7 +36,7 @@
from pygments.token import Text as PText
from sphinx.addnodes import pending_xref
from sphinx.ext import intersphinx
from sphinx.ext.autodoc import ClassDocumenter, bool_option
from sphinx.ext.autodoc import ClassDocumenter
from sphinx.highlighting import lexers


Expand Down Expand Up @@ -694,15 +694,16 @@ def add_content(self, more_content) -> None:
enum_object: IntEnum = self.object

if self.object.__name__ != "Kind":
self.add_line(f"See also :cpp:enum:`cudf::{self.object.__name__}`.", source_name)
self.add_line(
f"See also :cpp:enum:`cudf::{self.object.__name__}`.",
source_name,
)
self.add_line("", source_name)
self.add_line("Enum members", source_name)
self.add_line("", source_name)

for the_member_name in enum_object.__members__: # type: ignore[attr-defined]
self.add_line(
f"* ``{the_member_name}``", source_name
)
self.add_line(f"* ``{the_member_name}``", source_name)
self.add_line("", source_name)


Expand Down
3 changes: 1 addition & 2 deletions docs/cudf/source/user_guide/10min.ipynb
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In #17461, @wence- were you trying to say you are hesitant to apply ruff without a config for sorting cudf into a separate section? I am okay with the diff shown here, but if you're not, I wanted to let you opine. We could use an import section config of some kind, but I find it unimportant in notebooks (only within the library internals).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just meant that we didn't port the isort config for notebooks over. I am happy with any consistent sorting!

Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,11 @@
"source": [
"import os\n",
"\n",
"import cudf\n",
"import cupy as cp\n",
"import dask_cudf\n",
"import pandas as pd\n",
"\n",
"import cudf\n",
"\n",
"cp.random.seed(12)\n",
"\n",
"#### Portions of this were borrowed and adapted from the\n",
Expand Down
Loading
Loading