Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/rel-3.42.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
h2o-ops committed Aug 17, 2023
2 parents 45240cf + 3e1c5cc commit ac135bd
Show file tree
Hide file tree
Showing 23 changed files with 657 additions and 30 deletions.
8 changes: 0 additions & 8 deletions h2o-core/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,6 @@ dependencies {

api "com.google.code.gson:gson:${gsonVersion}"
api 'commons-lang:commons-lang:2.6'

// Duke library: collection of String comparators
api('no.priv.garshol.duke:duke:1.2') {
exclude group: 'org.apache.lucene', module: 'lucene-core'
exclude group: 'org.apache.lucene', module: 'lucene-analyzers-common'
exclude group: 'org.apache.lucene', module: 'lucene-spatial'
exclude group: 'org.mapdb', module: 'mapdb'
}

testImplementation project(':h2o-test-support')
testRuntimeOnly project(":${defaultWebserverModule}")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package water.rapids.ast.prims.string;

import no.priv.garshol.duke.Comparator;
import water.MRTask;
import water.fvec.Chunk;
import water.fvec.Frame;
Expand All @@ -11,6 +10,7 @@
import water.rapids.ast.AstPrimitive;
import water.rapids.ast.AstRoot;
import water.rapids.vals.ValFrame;
import water.util.comparison.string.StringComparator;
import water.util.comparison.string.StringComparatorFactory;

/**
Expand Down Expand Up @@ -73,7 +73,7 @@ private StringDistanceComparator(String measure, boolean compareEmpty) {
@Override
public void map(Chunk[] cs, NewChunk[] nc) {
BufferedString tmpStr = new BufferedString();
Comparator cmp = StringComparatorFactory.makeComparator(_measure);
StringComparator cmp = StringComparatorFactory.makeComparator(_measure);
int N = nc.length;
assert N * 2 == cs.length;
for (int i = 0; i < N; i++) {
Expand Down
35 changes: 35 additions & 0 deletions h2o-dist/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,35 @@
.pre {
white-space: pre;
}

.callout {
padding: 15px;
background-color: #eee;
color: #000;
position: relative;
border-left: solid 4px #999;
}

.callout--info {
padding-left: 55px;
border-left-color: #00a0cc;
background-color: rgb(0 150 200 / 0.15);
}

.callout--info::before {
content: '⚠';
font-size: 32px;
font-weight: 200;
font-family: 'Open Sans', sans-serif, system-ui;
color: #00a0cc;
position: absolute;
top: 4px;
left: 10px;
}

.callout--info a {
color: #00a0cc;
}

</style>
<script>
Expand Down Expand Up @@ -562,6 +591,7 @@ <h1><span>H<sub>2</sub>O</span></h1>
</div>
<h2>Get started with H<sub>2</sub>O in 3 easy steps</h2>
<p>1. Download H<sub>2</sub>O. This is a zip file that contains everything you need to get started.</p>
<p class="callout callout--info">By default, this setup is open. If you want to secure your installation, follow the <a href="https://docs.h2o.ai/h2o/latest-stable/h2o-docs/security.html" target="_blank">Security Guidelines.</a></p>
<p>2. From your terminal, run:</p>

<button class="btn copy_button" id="btnCopy1" data-clipboard-target='#to_copy1'></button>
Expand Down Expand Up @@ -589,6 +619,7 @@ <h2>Use H<sub>2</sub>O directly from Python</h2>
<p class="terminal" id="to_copy12">
pip install matplotlib<br/>
</p>
<p class="callout callout--info">By default, this setup is open. If you want to secure your installation, follow the <a href="https://docs.h2o.ai/h2o/latest-stable/h2o-docs/security.html" target="_blank">Security Guidelines.</a></p>
<p>At the command line, copy and paste these commands one line at a time:</p>
<button class="btn copy_button" id="btnCopy2" data-clipboard-target='#to_copy2'></button>
<p class="terminal" id="to_copy2">
Expand All @@ -608,6 +639,8 @@ <h2>Conda Installation</h2>
<div id="quickstart-r" style="display:none">
<h2>Use H<sub>2</sub>O directly from R</h2>

<p class="callout callout--info">By default, this setup is open. If you want to secure your installation, follow the <a href="https://docs.h2o.ai/h2o/latest-stable/h2o-docs/security.html" target="_blank">Security Guidelines.</a></p>

<p>Copy and paste these commands into R one line at a time:</p>
<button class="btn copy_button" id="btnCopy3" data-clipboard-target='#to_copy3'></button>
<p class="terminal" id="to_copy3"data-clipboard-target='to_copy3'>
Expand Down Expand Up @@ -817,6 +850,8 @@ <h2>User Documentation</h2>
<div id="user_documentation"></div>
<h2>Developer Documentation</h2>
<div id="developer_documentation"></div>
<h2>Security Documentation</h2>
<p><a href="https://docs.h2o.ai/h2o/latest-stable/h2o-docs/security.html" target="_blank"> Security Guidelines</a></p>
<h2>Booklets</h2>
<div id="booklets"></div>
</div>
Expand Down
13 changes: 12 additions & 1 deletion h2o-docs/src/product/downloading.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ If you plan to exclusively use H2O's web GUI, `Flow <http://docs.h2o.ai/h2o/late

1. Click the ``Download H2O`` button on the `http://h2o-release.s3.amazonaws.com/h2o/latest_stable.html <http://h2o-release.s3.amazonaws.com/h2o/latest_stable.html>`__ page. This downloads a zip file that contains everything you need to get started.

.. note::

By default, this setup is open. Follow `security guidelines <security.html>`__ if you want to secure your installation.

2. From your terminal, unzip and start H2O as in the example below.

.. substitution-code-block:: bash
Expand All @@ -26,12 +30,15 @@ If you plan to exclusively use H2O's web GUI, `Flow <http://docs.h2o.ai/h2o/late

3. Point your browser to http://localhost:54321 to open up the H2O Flow web GUI.


Install in R
------------

Perform the following steps in R to install H2O. Copy and paste these commands one line at a time.

.. note::

By default, this setup is open. Follow `security guidelines <security.html>`__ if you want to secure your installation.

1. The following two commands remove any previously installed H2O packages for R.

.. code-block:: r
Expand Down Expand Up @@ -70,6 +77,10 @@ Alternatively you can install H2O’s R package from `CRAN <https://cran.r-proje
Install in Python
-----------------

.. note::

By default, this setup is open. Follow `security guidelines <security.html>`__ if you want to secure your installation.

Run the following commands in a Terminal window to install H2O for Python.

1. Install dependencies (prepending with ``sudo`` if needed):
Expand Down
5 changes: 4 additions & 1 deletion h2o-docs/src/product/flow.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@ Download Flow
.. substitution-code-block:: bash

curl -o h2o.zip http://download.h2o.ai/versions/h2o-|version|.zip


.. note::

By default, this setup is open. Follow `security guidelines <security.html>`__ if you want to secure your installation.

2. Next in your terminal, enter the following command lines one at a time. The first line changes into your Downloads folder, the second line unzips your zipfile, the third line changes into your h2o-3-|version| folder, and the fourth line runs your jar file.

Expand Down
1 change: 1 addition & 0 deletions h2o-docs/src/product/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ Additional Resources:
- See how are customers are using H2O at https://www.h2o.ai/customers/.
- Keep up to date with the latest H2O blogs at https://www.h2o.ai/blog/.
- Review projects, applications, research papers, tutorials, courses, and books that use H2O at https://github.com/h2oai/awesome-h2o.
- Learn about securing your installation by following our `security guidelines <security.html>`__.

.. toctree::
:maxdepth: 2
Expand Down
4 changes: 4 additions & 0 deletions h2o-docs/src/product/quick-start-videos.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
Quick Start Videos
==================

.. note::

By default, this setup is open. Follow `security guidelines <security.html>`__ if you want to secure your installation.


H2O Quick Start with Flow
-------------------------
Expand Down
2 changes: 1 addition & 1 deletion h2o-docs/src/product/starting-h2o.rst
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,7 @@ The flatfile contains a list of nodes in the form ``IP:PORT`` that are going to
Web Server
^^^^^^^^^^

The web server IP is auto-configured in the same way as internal communication IP, nevertheless the created socket listens on all available interfaces. A specific API can be specified with the ``-web_ip`` option.
By default, the web server IP is auto-configured in the same way as internal communication IP, nevertheless the created socket listens on all available interfaces. A specific IP can be specified with the ``-web_ip`` option.

Options
'''''''
Expand Down
12 changes: 12 additions & 0 deletions h2o-docs/src/product/welcome.rst
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,10 @@ learn more:

- `GitHub Help <https://help.github.com/>`_: The GitHub Help system is a useful resource for becoming familiar with Git.

.. note::

By default, this setup is open. Follow `security guidelines <security.html>`__ if you want to secure your installation.

Use Cases
~~~~~~~~~

Expand Down Expand Up @@ -165,6 +169,10 @@ At this point, determine whether you want to complete this quick start in either
Type 'q()' to quit R.
>

# By default, this setup is open.
# Follow our security guidelines (https://docs.h2o.ai/h2o/latest-stable/h2o-docs/security.html)
# if you want to secure your installation.

# Copy and paste the following commands in R to download dependency packages.
> pkgs <- c("methods", "statmod", "stats", "graphics", "RCurl", "jsonlite", "tools", "utils")
> for (pkg in pkgs) {if (! (pkg %in% rownames(installed.packages()))) { install.packages(pkg) }}
Expand Down Expand Up @@ -199,6 +207,10 @@ At this point, determine whether you want to complete this quick start in either

.. code-tab:: python

# By default, this setup is open.
# Follow our security guidelines (https://docs.h2o.ai/h2o/latest-stable/h2o-docs/security.html)
# if you want to secure your installation.

# Before starting Python, run the following commands to install dependencies.
# Prepend these commands with `sudo` only if necessary:
# h2o-3 user$ [sudo] pip install -U requests
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
import ai.h2o.mojos.runtime.frame.MojoFrameMeta;
import ai.h2o.mojos.runtime.transforms.MojoTransform;
import ai.h2o.mojos.runtime.transforms.MojoTransformBuilderFactory;
import no.priv.garshol.duke.Comparator;
import water.util.comparison.string.StringComparatorFactory;
import water.util.comparison.string.StringComparator;

import java.util.HashMap;
import java.util.Map;
Expand Down Expand Up @@ -67,7 +67,7 @@ public static class Factory implements MojoTransformBuilderFactory {

private static final HashMap<String,StringPropertiesBinaryFunction> _supportedFunctions = new HashMap<String,StringPropertiesBinaryFunction>() {{
put("strDistance", new StringPropertiesBinaryFunction() {
Comparator _comparator = null;
StringComparator _comparator = null;

boolean _compareEmpty = false;

Expand Down
1 change: 0 additions & 1 deletion h2o-genmodel/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ dependencies {
api "ai.h2o:h2o-tree-api:0.3.17"
// dependencies that are shared with h2o-core - always use the same version as h2o-core
api "com.google.code.gson:gson:${gsonVersion}"
api "no.priv.garshol.duke:duke:1.2"
api "commons-lang:commons-lang:2.6"
api "joda-time:joda-time:2.10.13"
// test only
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package water.util.comparison.string;

/*
Copyright 2023 Lars Marius Garshol
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

// Original code: https://github.com/larsga/Duke/blob/duke-1.2/src/main/java/no/priv/garshol/duke/comparators/ExactComparator.java
public class ExactComparator implements StringComparator {

public boolean isTokenized() {
return false;
}

public double compare(String v1, String v2) {
return v1.equals(v2) ? 1.0 : 0.0;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
*/
package water.util.comparison.string;

import no.priv.garshol.duke.Comparator;

import java.util.ArrayList;
import java.util.List;

Expand All @@ -20,7 +18,7 @@
* E. Yancey, RESEARCH REPORT SERIES (Statistics #2005-05), US Bureau
* of the Census. http://www.census.gov/srd/papers/pdf/rrs2005-05.pdf
*/
public class H2OJaroWinklerComparator implements Comparator {
public class H2OJaroWinklerComparator implements StringComparator {

public double compare(String s1, String s2) {
return similarity(s1, s2);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package water.util.comparison.string;

/*
Copyright 2023 Lars Marius Garshol
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

// Original code: https://github.com/larsga/Duke/blob/duke-1.2/src/main/java/no/priv/garshol/duke/comparators/JaccardIndexComparator.java
public class JaccardIndexComparator implements StringComparator {
private StringComparator subcomp;

public JaccardIndexComparator() {
this.subcomp = new ExactComparator();
}

public void setComparator(StringComparator comp) {
this.subcomp = comp;
}

public boolean isTokenized() {
return true;
}

public double compare(String s1, String s2) {
if (s1.equals(s2))
return 1.0;

// tokenize
String[] t1 = StringUtils.split(s1);
String[] t2 = StringUtils.split(s2);

// FIXME: we assume t1 and t2 do not have internal duplicates

// ensure that t1 is shorter than or same length as t2
if (t1.length > t2.length) {
String[] tmp = t2;
t2 = t1;
t1 = tmp;
}

// find best matches for each token in t1
double intersection = 0;
double union = t1.length + t2.length;
for (int ix1 = 0; ix1 < t1.length; ix1++) {
double highest = 0;
for (int ix2 = 0; ix2 < t2.length; ix2++)
highest = Math.max(highest, subcomp.compare(t1[ix1], t2[ix2]));

// INV: the best match for t1[ix1] in t2 is has similarity highest
intersection += highest;
union -= highest; // we reduce the union by this similarity
}

return intersection / union;
}
}

Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
package water.util.comparison.string;

import no.priv.garshol.duke.Comparator;

import static org.apache.commons.lang.math.IEEE754rUtils.min;

/**
Expand All @@ -24,7 +22,7 @@
* limitations under the License.
* #L%
**/
public class LevenshteinDistanceComparator implements Comparator {
public class LevenshteinDistanceComparator implements StringComparator {

@Override
public boolean isTokenized() {
Expand Down
Loading

0 comments on commit ac135bd

Please sign in to comment.