Skip to content

Commit

Permalink
test cudf.
Browse files Browse the repository at this point in the history
  • Loading branch information
trivialfis committed Mar 2, 2023
1 parent bda0f4e commit 7b06c19
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 52 deletions.
71 changes: 71 additions & 0 deletions python-package/xgboost/testing/ranking.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
"""Tests for learning to rank."""
from types import ModuleType
from typing import Any

import numpy as np
import pytest

import xgboost as xgb
from xgboost import testing as tm


def run_ranking_qid_df(impl: ModuleType, tree_method: str) -> None:
"""Test ranking with qid packed into X."""
import scipy.sparse
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import StratifiedGroupKFold, cross_val_score

X, y, q, w = tm.make_ltr(n_samples=128, n_features=2, n_query_groups=8, max_rel=3)

# pack qid into x using dataframe
df = impl.DataFrame(X)
df["qid"] = q
ranker = xgb.XGBRanker(n_estimators=3, eval_metric="ndcg", tree_method=tree_method)
ranker.fit(df, y)
s = ranker.score(df, y)
assert s > 0.7

# works with validation datasets as well
valid_df = df.copy()
valid_df.iloc[0, 0] = 3.0
ranker.fit(df, y, eval_set=[(valid_df, y)])

# same as passing qid directly
ranker = xgb.XGBRanker(n_estimators=3, eval_metric="ndcg", tree_method=tree_method)
ranker.fit(X, y, qid=q)
s1 = ranker.score(df, y)
assert np.isclose(s, s1)

# Works with standard sklearn cv
if tree_method != "gpu_hist":
# we need cuML for this.
kfold = StratifiedGroupKFold(shuffle=False)
results = cross_val_score(ranker, df, y, cv=kfold, groups=df.qid)
assert len(results) == 5

# Works with custom metric
def neg_mse(*args: Any, **kwargs: Any) -> float:
return -mean_squared_error(*args, **kwargs)

ranker = xgb.XGBRanker(n_estimators=3, eval_metric=neg_mse, tree_method=tree_method)
ranker.fit(df, y, eval_set=[(valid_df, y)])
score = ranker.score(valid_df, y)
assert np.isclose(score, ranker.evals_result()["validation_0"]["neg_mse"][-1])

# Works with sparse data
if tree_method != "gpu_hist":
# no sparse with cuDF
X_csr = scipy.sparse.csr_matrix(X)
df = impl.DataFrame.sparse.from_spmatrix(
X_csr, columns=[str(i) for i in range(X.shape[1])]
)
df["qid"] = q
ranker = xgb.XGBRanker(
n_estimators=3, eval_metric="ndcg", tree_method=tree_method
)
ranker.fit(df, y)
s2 = ranker.score(df, y)
assert np.isclose(s2, s)

with pytest.raises(ValueError, match="Either `group` or `qid`."):
ranker.fit(df, y, eval_set=[(X, y)])
8 changes: 8 additions & 0 deletions tests/python-gpu/test_gpu_with_sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import numpy as np
import pytest
from xgboost.testing.ranking import run_ranking_qid_df

import xgboost as xgb
from xgboost import testing as tm
Expand Down Expand Up @@ -153,3 +154,10 @@ def test_classififer():
y *= 10
with pytest.raises(ValueError, match=r"Invalid classes.*"):
clf.fit(X, y)


@pytest.mark.skipif(**tm.no_pandas())
def test_ranking_qid_df():
import cudf

run_ranking_qid_df(cudf, "gpu_hist")
55 changes: 3 additions & 52 deletions tests/python/test_with_sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@
import pickle
import random
import tempfile
from typing import Any, Callable, Optional
from typing import Callable, Optional

import numpy as np
import pytest
from sklearn.utils.estimator_checks import parametrize_with_checks
from xgboost.testing.ranking import run_ranking_qid_df
from xgboost.testing.shared import get_feature_weights, validate_data_initialization
from xgboost.testing.updater import get_basescore

Expand Down Expand Up @@ -183,58 +184,8 @@ def test_ranking_metric() -> None:
@pytest.mark.skipif(**tm.no_pandas())
def test_ranking_qid_df():
import pandas as pd
import scipy.sparse
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import StratifiedGroupKFold, cross_val_score

X, y, q, w = tm.make_ltr(n_samples=128, n_features=2, n_query_groups=8, max_rel=3)

# pack qid into x using dataframe
df = pd.DataFrame(X)
df["qid"] = q
ranker = xgb.XGBRanker(n_estimators=3, eval_metric="ndcg")
ranker.fit(df, y)
s = ranker.score(df, y)
assert s > 0.7

# works with validation datasets as well
valid_df = df.copy()
valid_df.iloc[0, 0] = 3.0
ranker.fit(df, y, eval_set=[(valid_df, y)])

# same as passing qid directly
ranker = xgb.XGBRanker(n_estimators=3, eval_metric="ndcg")
ranker.fit(X, y, qid=q)
s1 = ranker.score(df, y)
assert np.isclose(s, s1)

# Works with standard sklearn cv
kfold = StratifiedGroupKFold(shuffle=False)
results = cross_val_score(ranker, df, y, cv=kfold, groups=df.qid)
assert len(results) == 5

# Works with custom metric
def neg_mse(*args: Any, **kwargs: Any) -> float:
return -mean_squared_error(*args, **kwargs)

ranker = xgb.XGBRanker(n_estimators=3, eval_metric=neg_mse)
ranker.fit(df, y, eval_set=[(valid_df, y)])
score = ranker.score(valid_df, y)
assert np.isclose(score, ranker.evals_result()["validation_0"]["neg_mse"][-1])

# Works with sparse data
X_csr = scipy.sparse.csr_matrix(X)
df = pd.DataFrame.sparse.from_spmatrix(
X_csr, columns=[str(i) for i in range(X.shape[1])]
)
df["qid"] = q
ranker = xgb.XGBRanker(n_estimators=3, eval_metric="ndcg")
ranker.fit(df, y)
s2 = ranker.score(df, y)
assert np.isclose(s2, s)

with pytest.raises(ValueError, match="Either `group` or `qid`."):
ranker.fit(df, y, eval_set=[(X, y)])
run_ranking_qid_df(pd, "hist")


def test_stacking_regression():
Expand Down

0 comments on commit 7b06c19

Please sign in to comment.