Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Resolve merge conflict [nocheck] #15708

Merged
merged 4 commits into from
Aug 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion h2o-algos/src/main/java/hex/tree/gbm/GBMModel.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import water.fvec.NewChunk;
import water.fvec.Vec;
import water.util.FrameUtils;
import water.util.Log;
import water.util.TwoDimTable;

import java.util.*;
Expand Down Expand Up @@ -287,6 +288,7 @@ public FeatureInteractions getFeatureInteractions(int maxInteractionDepth, int m
for (int j = 0; j < nclasses; j++) {
FeatureInteractions currentTreeFeatureInteractions = new FeatureInteractions();
SharedTreeSubgraph tree = this.getSharedTreeSubgraph(i, j);

List<SharedTreeNode> interactionPath = new ArrayList<>();
Set<String> memo = new HashSet<>();

Expand All @@ -295,7 +297,10 @@ public FeatureInteractions getFeatureInteractions(int maxInteractionDepth, int m
featureInteractions.mergeWith(currentTreeFeatureInteractions);
}
}

if(featureInteractions.isEmpty()){
Log.warn("There is no feature interaction for this model.");
return null;
}
return featureInteractions;
}

Expand Down
23 changes: 16 additions & 7 deletions h2o-core/src/main/java/hex/FeatureInteractions.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,18 @@ public void mergeWith(FeatureInteractions featureInteractions) {
}
}
}

public boolean isEmpty(){
return entrySet().isEmpty();
}

public int maxDepth() {
if(isEmpty()) return 0;
return Collections.max(this.entrySet(), Comparator.comparingInt(entry -> entry.getValue().depth)).getValue().depth;
}

public TwoDimTable[] getAsTable() {
if(isEmpty()) return null;
int maxDepth = maxDepth();
TwoDimTable[] twoDimTables = new TwoDimTable[maxDepth + 1];
for (int depth = 0; depth < maxDepth + 1; depth++) {
Expand All @@ -55,23 +61,24 @@ public TwoDimTable[] getAsTable() {
return twoDimTables;
}

List<FeatureInteraction> getFeatureInteractionsOfDepth(int depthRequired) {
private List<FeatureInteraction> getFeatureInteractionsOfDepth(int depthRequired) {
return this.entrySet()
.stream()
.filter(entry -> entry.getValue().depth == depthRequired)
.map(Map.Entry::getValue)
.collect(Collectors.toList());
}

List<FeatureInteraction> getFeatureInteractionsWithLeafStatistics() {
private List<FeatureInteraction> getFeatureInteractionsWithLeafStatistics() {
return this.entrySet()
.stream()
.filter(entry -> entry.getValue().hasLeafStatistics == true)
.map(Map.Entry::getValue)
.collect(Collectors.toList());
}

TwoDimTable constructFeatureInteractionsTable(int depth) {
private TwoDimTable constructFeatureInteractionsTable(int depth) {
assert depth >= 0 : "Depth has to be >= 0.";
String[] colHeaders = new String[] {"Interaction", "Gain", "FScore", "wFScore", "Average wFScore", "Average Gain",
"Expected Gain", "Gain Rank", "FScore Rank", "wFScore Rank", "Avg wFScore Rank", "Avg Gain Rank",
"Expected Gain Rank", "Average Rank", "Average Tree Index", "Average Tree Depth"};
Expand Down Expand Up @@ -134,7 +141,7 @@ TwoDimTable constructFeatureInteractionsTable(int depth) {
return table;
}

int indexOfInteractionWithName(String name, List<FeatureInteraction> featureInteractions) {
private int indexOfInteractionWithName(String name, List<FeatureInteraction> featureInteractions) {
for (int i = 0; i < featureInteractions.size(); i++)
if (featureInteractions.get(i).name == name)
return i;
Expand Down Expand Up @@ -182,7 +189,7 @@ public TwoDimTable[] getSplitValueHistograms() {
return splitValueHistograms;
}

TwoDimTable constructHistogramForFeatureInteraction(FeatureInteraction featureInteraction) {
private TwoDimTable constructHistogramForFeatureInteraction(FeatureInteraction featureInteraction) {
String[] colHeaders = new String[] {"Split Value", "Count"};
String[] colTypes = new String[] {"double", "int"};
String[] colFormat = new String[] {"%.5f", "%d"};
Expand Down Expand Up @@ -297,11 +304,13 @@ public static void collectFeatureInteractions(SharedTreeNode node, List<SharedTr
}

public static TwoDimTable[][] getFeatureInteractionsTable(FeatureInteractions featureInteractions) {
if(featureInteractions == null) {
return null;
}
TwoDimTable[][] table = new TwoDimTable[3][];
table[0] = featureInteractions.getAsTable();
table[0] = featureInteractions.getAsTable();
table[1] = new TwoDimTable[]{featureInteractions.getLeafStatisticsTable()};
table[2] = featureInteractions.getSplitValueHistograms();

return table;
}
}
5 changes: 3 additions & 2 deletions h2o-core/src/main/java/water/api/ModelsHandler.java
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,9 @@ public FeatureInteractionV3 makeFeatureInteraction(int version, FeatureInteracti
Model model = getFromDKV("key", s.model_id.key());
if (model instanceof FeatureInteractionsCollector) {
TwoDimTable[][] featureInteractions = ((FeatureInteractionsCollector) model).getFeatureInteractionsTable(s.max_interaction_depth, s.max_tree_depth, s.max_deepening);

if(featureInteractions == null){
return s;
}
s.feature_interaction = new TwoDimTableV3[featureInteractions[0].length + featureInteractions[2].length + 1];

for (int i = 0; i < featureInteractions[0].length; i++) {
Expand All @@ -189,7 +191,6 @@ public FeatureInteractionV3 makeFeatureInteraction(int version, FeatureInteracti
for (int i = 0; i < featureInteractions[2].length; i++) {
s.feature_interaction[i + featureInteractions[0].length + 1] = new TwoDimTableV3().fillFromImpl(featureInteractions[2][i]);
}

return s;
} else {
throw H2O.unimpl(String.format("%s does not support feature interactions calculation", model._parms.fullName()));
Expand Down
7 changes: 7 additions & 0 deletions h2o-core/src/test/java/hex/FeatureInteractionsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -224,5 +224,12 @@ public void testGetFeatureInteractionsTable() {
expected.add("AA|CC");
assertTrue(vars.containsAll(expected));
}

@Test
public void testNoInteractions() {
FeatureInteractions featureInteractions = new FeatureInteractions();
TwoDimTable[] table = featureInteractions.getAsTable();
assert table == null;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
``custom_metric_func``
----------------------

- Available in: GBM, DRF, Deeplearning, Stacked Ensembles
- Available in: GBM, DRF, Deeplearning, Stacked Ensembles, GLM
- Hyperparameter: no

Description
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
``upload_custom_metric``
------------------------

- Available in: GBM, DRF, Deeplearning
- Available in: GBM, DRF, Deeplearning, GLM
- Hyperparameter: no

Description
Expand Down
4 changes: 4 additions & 0 deletions h2o-docs/src/product/data-science/glm.rst
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ Algorithm-specific parameters

- **calc_like**: Specify whether to return likelihood function value for HGLM or normal GLM. Setting this option to ``True`` while disabling ``HGLM`` will enable the calculation of the full log likelihood and full AIC. This option defaults to ``False`` (disabled).

- `custom_metric_func <algo-params/custom_metric_func.html>`__: Specify a custom evaluation function.

- **dispersion_epsilon**: If changes in dispersion parameter estimation or loglikelihood value is smaller than ``dispersion_epsilon``, this will break out of the dispersion parameter estimation loop using maximum likelihood. This option defaults to ``0.0001``.

- **dispersion_learning_rate**: (Applicable only when ``dispersion_parameter_method="ml"``) This value controls how much the dispersion parameter estimate will be changed when the calculated loglikelihood actually decreases with the new dispersion. In this case, instead of setting *dispersion = dispersion + change*, it is *dispersion + dispersion_learning_rate* :math:`\times` *change*. This option must be > 0 and defaults to ``0.5``.
Expand All @@ -65,6 +67,8 @@ Algorithm-specific parameters

- `rand_family <algo-params/rand_family.html>`__: The Random Component Family specified as an array. You must include one family for each random component. Currently only ``rand_family=["gaussisan"]`` is supported.

- `upload_custom_metric <algo-params/upload_custom_metric.html>`__: Upload a custom metric into a running H2O cluster.

HGLM parameters
'''''''''''''''

Expand Down
3 changes: 2 additions & 1 deletion h2o-docs/src/product/getting-data-into-h2o.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ The first step toward building and scoring your models is getting your data into
Supported File Formats
----------------------

H2O currently supports the following file types:
H2O supports the following file types:

- CSV (delimited, UTF-8 only) files (including GZipped CSV)
- ORC
Expand All @@ -18,6 +18,7 @@ H2O currently supports the following file types:
- XLSX (BIFF 8 only)
- Avro version 1.8.0 (without multifile parsing or column type modification)
- Parquet
- Google Storage (gs://)

**Notes**:

Expand Down
1 change: 1 addition & 0 deletions h2o-py/docs/data.rst
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ The following data sources are supported:
* A Directory (with many data files inside at the *same* level -- no support for recursive import of data)
* S3/S3N
* Native Language Data Structure (c.f. the subsequent section)
* Google Storage (gs://)

.. code-block:: python

Expand Down
2 changes: 1 addition & 1 deletion h2o-py/h2o/h2o.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ def import_file(path=None, destination_frame=None, parse=True, header=0, sep=Non
na_strings=None, pattern=None, skipped_columns=None, custom_non_data_line_markers=None,
partition_by=None, quotechar=None, escapechar=None):
"""
Import a dataset that is already on the cluster.
Import files into an H2O cluster. The default behavior is to pass-through to the parse phase automatically.

The path to the data must be a valid path for each node in the H2O cluster. If some node in the H2O cluster
cannot see the file, then an exception will be thrown by the H2O cluster. Does a parallel/distributed
Expand Down
11 changes: 7 additions & 4 deletions h2o-py/h2o/model/model_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -629,10 +629,13 @@ def feature_interaction(self, max_interaction_depth=100, max_tree_depth=100, max
# For now, redirect to h2o.model.extensions.feature_interaction for models that support the feature, and print legacy message for others..
# Later, the method will be exposed only for models supporting the feature.
if has_extension(self, 'FeatureInteraction'):
return self._feature_interaction(max_interaction_depth=max_interaction_depth,
max_tree_depth=max_tree_depth,
max_deepening=max_deepening,
path=path)
table = self._feature_interaction(max_interaction_depth=max_interaction_depth,
max_tree_depth=max_tree_depth,
max_deepening=max_deepening,
path=path)
if table is None:
print("There is no feature interaction for this model.")
return table
print("No calculation available for this model")

def h(self, frame, variables):
Expand Down
5 changes: 4 additions & 1 deletion h2o-r/h2o-package/R/models.R
Original file line number Diff line number Diff line change
Expand Up @@ -3225,7 +3225,10 @@ h2o.feature_interaction <- function(model, max_interaction_depth = 100, max_tree

json <- .h2o.doSafePOST(urlSuffix = "FeatureInteraction", parms=parms)
source <- .h2o.fromJSON(jsonlite::fromJSON(json,simplifyDataFrame=FALSE))

if(is.null(source$feature_interaction)){
warning(paste0("There is no feature interaction for this model."))
return(NULL)
}
return(source$feature_interaction)
} else {
warning(paste0("No calculation available for this model"))
Expand Down
29 changes: 29 additions & 0 deletions h2o-r/tests/testdir_algos/gbm/runit_GBM_feature_interaction_cv.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
setwd(normalizePath(dirname(R.utils::commandArgs(asValues=TRUE)$"f")))
source("../../../scripts/h2o-r-test-setup.R")

library(ggplot2)

test.feature_interaction_with_cv <- function() {
diamonds <- ggplot2::diamonds
diamonds$cut <- factor(diamonds$cut, ordered = FALSE)
diamonds$color <- factor(diamonds$color, ordered = FALSE)
diamonds$clarity <- factor(diamonds$clarity, ordered = FALSE)
diamonds <- as.h2o(diamonds)
diamonds$expensive <- h2o.asfactor(ifelse(diamonds$price == 5000, 1, 0))

train <- diamonds
train$fold <- h2o.kfold_column(data = train, nfolds = 3, seed = 123)

params <- list( x = setdiff(names(diamonds), "expensive"), y = "expensive", fold_column = "fold", training_frame = as.name("train"), validation_frame = NULL, distribution = "bernoulli", learn_rate = 0.1, ntrees = 500, min_split_improvement = 1e-3, stopping_rounds = 3, stopping_tolerance = 0.001, seed = 456 )
my_gbm <- do.call(what = "h2o.gbm", args = params)

# feature interaction with main model
print(h2o.feature_interaction(model = my_gbm))

# feature interaction with cv model where tree depth = 0
my_cv_gbm <- h2o.getModel(my_gbm@model$cross_validation_models[[1]]$name)
fi <-h2o.feature_interaction(model = my_cv_gbm)
expect_true(is.null(fi))
}

doTest("Test feature interaction with CV enabled", test.feature_interaction_with_cv)