diff --git a/src/c_api.cpp b/src/c_api.cpp index a389e8e47b1d..7e71a4a3417d 100644 --- a/src/c_api.cpp +++ b/src/c_api.cpp @@ -515,6 +515,7 @@ class Booster { out_indices, out_data, data_type, &is_data_float32, num_matrices); std::vector row_sizes(num_matrices * nrow); std::vector row_matrix_offsets(num_matrices * nrow); + std::vector matrix_offsets(num_matrices); int64_t row_vector_cnt = 0; for (int m = 0; m < num_matrices; ++m) { for (int64_t i = 0; i < static_cast(agg.size()); ++i) { @@ -529,6 +530,12 @@ class Booster { } row_vector_cnt++; } + if (m == 0) { + matrix_offsets[m] = 0; + } + if (m + 1 < num_matrices) { + matrix_offsets[m + 1] = static_cast(matrix_offsets[m] + row_matrix_offsets[row_vector_cnt - 1] + row_sizes[row_vector_cnt - 1]); + } } // copy vector results to output for each row int64_t indptr_index = 0; @@ -546,7 +553,7 @@ class Booster { OMP_LOOP_EX_BEGIN(); auto row_vector = agg[i]; int64_t row_start_index = matrix_start_index + i; - int64_t element_index = row_matrix_offsets[row_start_index]; + int64_t element_index = row_matrix_offsets[row_start_index] + matrix_offsets[m]; int64_t indptr_loop_index = indptr_index + i; for (auto it = row_vector[m].begin(); it != row_vector[m].end(); ++it) { (*out_indices)[element_index] = it->first; @@ -646,13 +653,16 @@ class Booster { } else { (reinterpret_cast(*out_col_ptr))[col_ptr_index] = last_column_start_index + last_column_size; } - if (m != 0) { - matrix_start_indices[m] = matrix_start_indices[m - 1] + - last_column_start_index + - last_column_size; + if (m + 1 < num_matrices) { + matrix_start_indices[m + 1] = matrix_start_indices[m] + last_column_start_index + last_column_size; } + col_ptr_index++; } + // Note: we parallelize across matrices instead of rows because of the column_counts[m][col_idx] increment inside the loop + OMP_INIT_EX(); + #pragma omp parallel for schedule(static) for (int m = 0; m < num_matrices; ++m) { + OMP_LOOP_EX_BEGIN(); for (int64_t i = 0; i < static_cast(agg.size()); ++i) { auto row_vector = agg[i]; for (auto it = row_vector[m].begin(); it != row_vector[m].end(); ++it) { @@ -671,7 +681,9 @@ class Booster { } } } + OMP_LOOP_EX_END(); } + OMP_THROW_EX(); out_len[0] = elements_size; out_len[1] = col_ptr_size; } diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index c3d2794e228b..13d59c726f08 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -1034,6 +1034,51 @@ def test_contribs_sparse(self): # validate the values are the same np.testing.assert_allclose(contribs_csc.toarray(), contribs_dense) + def test_contribs_sparse_multiclass(self): + n_features = 20 + n_samples = 100 + n_labels = 4 + # generate CSR sparse dataset + X, y = make_multilabel_classification(n_samples=n_samples, + sparse=True, + n_features=n_features, + n_classes=1, + n_labels=n_labels) + y = y.flatten() + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) + params = { + 'objective': 'multiclass', + 'num_class': n_labels, + 'verbose': -1, + } + lgb_train = lgb.Dataset(X_train, y_train) + gbm = lgb.train(params, lgb_train, num_boost_round=20) + contribs_csr = gbm.predict(X_test, pred_contrib=True) + self.assertTrue(isinstance(contribs_csr, list)) + for perclass_contribs_csr in contribs_csr: + self.assertTrue(isspmatrix_csr(perclass_contribs_csr)) + # convert data to dense and get back same contribs + contribs_dense = gbm.predict(X_test.toarray(), pred_contrib=True) + # validate the values are the same + contribs_csr_array = np.swapaxes(np.array([sparse_array.todense() for sparse_array in contribs_csr]), 0, 1) + contribs_csr_arr_re = contribs_csr_array.reshape((contribs_csr_array.shape[0], + contribs_csr_array.shape[1] * contribs_csr_array.shape[2])) + np.testing.assert_allclose(contribs_csr_arr_re, contribs_dense) + contribs_dense_re = contribs_dense.reshape(contribs_csr_array.shape) + self.assertLess(np.linalg.norm(gbm.predict(X_test, raw_score=True) + - np.sum(contribs_dense_re, axis=2)), 1e-4) + # validate using CSC matrix + X_test_csc = X_test.tocsc() + contribs_csc = gbm.predict(X_test_csc, pred_contrib=True) + self.assertTrue(isinstance(contribs_csc, list)) + for perclass_contribs_csc in contribs_csc: + self.assertTrue(isspmatrix_csc(perclass_contribs_csc)) + # validate the values are the same + contribs_csc_array = np.swapaxes(np.array([sparse_array.todense() for sparse_array in contribs_csc]), 0, 1) + contribs_csc_array = contribs_csc_array.reshape((contribs_csc_array.shape[0], + contribs_csc_array.shape[1] * contribs_csc_array.shape[2])) + np.testing.assert_allclose(contribs_csc_array, contribs_dense) + @unittest.skipIf(psutil.virtual_memory().available / 1024 / 1024 / 1024 < 3, 'not enough RAM') def test_int32_max_sparse_contribs(self): params = {