diff --git a/cpc/test/CMakeLists.txt b/cpc/test/CMakeLists.txt index 2d3af9bb..645ce12f 100644 --- a/cpc/test/CMakeLists.txt +++ b/cpc/test/CMakeLists.txt @@ -49,3 +49,10 @@ target_sources(cpc_test cpc_sketch_deserialize_from_java_test.cpp ) endif() + +if (GENERATE) +target_sources(cpc_test + PRIVATE + cpc_sketch_serialize_for_java.cpp +) +endif() diff --git a/cpc/test/cpc_sketch_deserialize_from_java_test.cpp b/cpc/test/cpc_sketch_deserialize_from_java_test.cpp index deaff5e1..70027e37 100644 --- a/cpc/test/cpc_sketch_deserialize_from_java_test.cpp +++ b/cpc/test/cpc_sketch_deserialize_from_java_test.cpp @@ -39,4 +39,22 @@ TEST_CASE("cpc sketch", "[serde_compat]") { } } +TEST_CASE("cpc sketch negative one", "[serde_compat]") { + std::ifstream is; + is.exceptions(std::ios::failbit | std::ios::badbit); + is.open(testBinaryInputPath + "cpc_negative_one_java.sk", std::ios::binary); + auto sketch = cpc_sketch::deserialize(is); + REQUIRE_FALSE(sketch.is_empty()); + REQUIRE(sketch.get_estimate() == Approx(1).margin(0.01)); + sketch.update((uint64_t) -1); + sketch.update((int64_t) -1); + sketch.update((uint32_t) -1); + sketch.update((int32_t) -1); + sketch.update((uint16_t) -1); + sketch.update((int16_t) -1); + sketch.update((uint8_t) -1); + sketch.update((int8_t) -1); + REQUIRE(sketch.get_estimate() == Approx(1).margin(0.01)); +} + } /* namespace datasketches */ diff --git a/cpc/test/cpc_sketch_serialize_for_java.cpp b/cpc/test/cpc_sketch_serialize_for_java.cpp new file mode 100644 index 00000000..d46fa047 --- /dev/null +++ b/cpc/test/cpc_sketch_serialize_for_java.cpp @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include +#include + +namespace datasketches { + +TEST_CASE("cpc sketch generate", "[serialize_for_java]") { + const unsigned n_arr[] = {0, 100, 200, 2000, 20000}; + for (const unsigned n: n_arr) { + cpc_sketch sketch; + for (unsigned i = 1; i <= n; ++i) sketch.update(i); + REQUIRE(sketch.is_empty() == (n == 0)); + REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.02)); + std::ofstream os("cpc_n" + std::to_string(n) + "_cpp.sk", std::ios::binary); + sketch.serialize(os); + } +} + +} /* namespace datasketches */ diff --git a/cpc/test/cpc_sketch_test.cpp b/cpc/test/cpc_sketch_test.cpp index f45cbc15..fd9bcf37 100644 --- a/cpc/test/cpc_sketch_test.cpp +++ b/cpc/test/cpc_sketch_test.cpp @@ -88,9 +88,6 @@ TEST_CASE("cpc sketch: serialize deserialize empty", "[cpc_sketch]") { REQUIRE(deserialized.is_empty() == sketch.is_empty()); REQUIRE(deserialized.get_estimate() == sketch.get_estimate()); REQUIRE(deserialized.validate()); - - std::ofstream os("cpc-empty.bin"); - sketch.serialize(os); } TEST_CASE("cpc sketch: serialize deserialize sparse", "[cpc_sketch]") { @@ -108,9 +105,6 @@ TEST_CASE("cpc sketch: serialize deserialize sparse", "[cpc_sketch]") { for (int i = 0; i < n; i++) deserialized.update(i); REQUIRE(deserialized.get_estimate() == sketch.get_estimate()); REQUIRE(deserialized.validate()); - - std::ofstream os("cpc-sparse.bin"); - sketch.serialize(os); } TEST_CASE("cpc sketch: serialize deserialize hybrid", "[cpc_sketch]") { @@ -128,9 +122,6 @@ TEST_CASE("cpc sketch: serialize deserialize hybrid", "[cpc_sketch]") { for (int i = 0; i < n; i++) deserialized.update(i); REQUIRE(deserialized.get_estimate() == sketch.get_estimate()); REQUIRE(deserialized.validate()); - - std::ofstream os("cpc-hybrid.bin"); - sketch.serialize(os); } TEST_CASE("cpc sketch: serialize deserialize pinned", "[cpc_sketch]") { @@ -148,9 +139,6 @@ TEST_CASE("cpc sketch: serialize deserialize pinned", "[cpc_sketch]") { for (int i = 0; i < n; i++) deserialized.update(i); REQUIRE(deserialized.get_estimate() == sketch.get_estimate()); REQUIRE(deserialized.validate()); - - std::ofstream os("cpc-pinned.bin"); - sketch.serialize(os); } TEST_CASE("cpc sketch: serialize deserialize sliding", "[cpc_sketch]") { @@ -168,9 +156,6 @@ TEST_CASE("cpc sketch: serialize deserialize sliding", "[cpc_sketch]") { for (int i = 0; i < n; i++) deserialized.update(i); REQUIRE(deserialized.get_estimate() == sketch.get_estimate()); REQUIRE(deserialized.validate()); - - std::ofstream os("cpc-sliding.bin"); - sketch.serialize(os); } TEST_CASE("cpc sketch: serializing deserialize sliding large", "[cpc_sketch]") { @@ -188,9 +173,6 @@ TEST_CASE("cpc sketch: serializing deserialize sliding large", "[cpc_sketch]") { for (int i = 0; i < n; i++) deserialized.update(i); REQUIRE(deserialized.get_estimate() == sketch.get_estimate()); REQUIRE(deserialized.validate()); - - std::ofstream os("cpc-sliding-large.bin"); - sketch.serialize(os); } TEST_CASE("cpc sketch: serialize deserialize empty, bytes", "[cpc_sketch]") { @@ -201,9 +183,6 @@ TEST_CASE("cpc sketch: serialize deserialize empty, bytes", "[cpc_sketch]") { REQUIRE(deserialized.get_estimate() == sketch.get_estimate()); REQUIRE(deserialized.validate()); REQUIRE_THROWS_AS(cpc_sketch::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range); - - std::ofstream os("cpc-empty.bin"); - sketch.serialize(os); } TEST_CASE("cpc sketch: serialize deserialize sparse, bytes", "[cpc_sketch]") { @@ -261,8 +240,6 @@ TEST_CASE("cpc sketch: serialize deserialize pinned, bytes", "[cpc_sketch]") { for (int i = 0; i < n; i++) deserialized.update(i); REQUIRE(deserialized.get_estimate() == sketch.get_estimate()); REQUIRE(deserialized.validate()); - - std::cout << sketch.to_string(); } TEST_CASE("cpc sketch: serialize deserialize sliding, bytes", "[cpc_sketch]") { @@ -380,8 +357,6 @@ TEST_CASE("cpc sketch: update int equivalence", "[cpc_sketch]") { sketch.update((uint8_t) -1); sketch.update((int8_t) -1); REQUIRE(sketch.get_estimate() == Approx(1).margin(RELATIVE_ERROR_FOR_LG_K_11)); - std::ofstream os("cpc-negative-one.bin"); // to compare with Java - sketch.serialize(os); } TEST_CASE("cpc sketch: update float equivalence", "[cpc_sketch]") { diff --git a/fi/test/CMakeLists.txt b/fi/test/CMakeLists.txt index fc8780d2..0279b03f 100644 --- a/fi/test/CMakeLists.txt +++ b/fi/test/CMakeLists.txt @@ -48,3 +48,10 @@ target_sources(fi_test frequent_items_sketch_deserialize_from_java_test.cpp ) endif() + +if (GENERATE) +target_sources(fi_test + PRIVATE + frequent_items_sketch_serialize_for_java.cpp +) +endif() diff --git a/fi/test/frequent_items_sketch_serialize_for_java.cpp b/fi/test/frequent_items_sketch_serialize_for_java.cpp new file mode 100644 index 00000000..4b99b309 --- /dev/null +++ b/fi/test/frequent_items_sketch_serialize_for_java.cpp @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include +#include + +namespace datasketches { + +TEST_CASE("frequent longs sketch generate", "[serialize_for_java]") { + const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; + for (const unsigned n: n_arr) { + frequent_items_sketch sketch(6); + for (unsigned i = 1; i <= n; ++i) sketch.update(i); + REQUIRE(sketch.is_empty() == (n == 0)); + if (n > 10) { + REQUIRE(sketch.get_maximum_error() > 0); + } else { + REQUIRE(sketch.get_maximum_error() == 0); + } + REQUIRE(sketch.get_total_weight() == n); + std::ofstream os("frequent_long_n" + std::to_string(n) + "_cpp.sk", std::ios::binary); + sketch.serialize(os); + } +} + +TEST_CASE("frequent strings sketch generate", "[serialize_for_java]") { + const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; + for (const unsigned n: n_arr) { + frequent_items_sketch sketch(6); + for (unsigned i = 1; i <= n; ++i) sketch.update(std::to_string(i)); + REQUIRE(sketch.is_empty() == (n == 0)); + if (n > 10) { + REQUIRE(sketch.get_maximum_error() > 0); + } else { + REQUIRE(sketch.get_maximum_error() == 0); + } + REQUIRE(sketch.get_total_weight() == n); + std::ofstream os("frequent_string_n" + std::to_string(n) + "_cpp.sk", std::ios::binary); + sketch.serialize(os); + } +} + +TEST_CASE("frequent strings sketch ascii", "[serialize_for_java]") { + frequent_items_sketch sketch(6); + sketch.update("aaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 1); + sketch.update("bbbbbbbbbbbbbbbbbbbbbbbbbbbbb", 2); + sketch.update("ccccccccccccccccccccccccccccc", 3); + sketch.update("ddddddddddddddddddddddddddddd", 4); + std::ofstream os("frequent_string_ascii_cpp.sk", std::ios::binary); + sketch.serialize(os); +} + +TEST_CASE("frequent strings sketch utf8", "[serialize_for_java]") { + frequent_items_sketch sketch(6); + sketch.update("абвгд", 1); + sketch.update("еёжзи", 2); + sketch.update("йклмн", 3); + sketch.update("опрст", 4); + sketch.update("уфхцч", 5); + sketch.update("шщъыь", 6); + sketch.update("эюя", 7); + std::ofstream os("frequent_string_utf8_cpp.sk", std::ios::binary); + sketch.serialize(os); +} + +} /* namespace datasketches */ diff --git a/hll/test/CMakeLists.txt b/hll/test/CMakeLists.txt index 202560fc..efdc5215 100644 --- a/hll/test/CMakeLists.txt +++ b/hll/test/CMakeLists.txt @@ -55,3 +55,10 @@ target_sources(hll_test hll_sketch_deserialize_from_java_test.cpp ) endif() + +if (GENERATE) +target_sources(hll_test + PRIVATE + hll_sketch_serialize_for_java.cpp +) +endif() diff --git a/hll/test/hll_sketch_serialize_for_java.cpp b/hll/test/hll_sketch_serialize_for_java.cpp new file mode 100644 index 00000000..e329c58f --- /dev/null +++ b/hll/test/hll_sketch_serialize_for_java.cpp @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include +#include + +namespace datasketches { + +TEST_CASE("hll sketch generate", "[serialize_for_java]") { + const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; + for (const unsigned n: n_arr) { + hll_sketch hll4(12, HLL_4); + hll_sketch hll6(12, HLL_6); + hll_sketch hll8(12, HLL_8); + for (unsigned i = 0; i < n; ++i) { + hll4.update(i); + hll6.update(i); + hll8.update(i); + } + { + std::ofstream os("hll4_n" + std::to_string(n) + "_cpp.sk", std::ios::binary); + hll4.serialize_compact(os); + } + { + std::ofstream os("hll6_n" + std::to_string(n) + "_cpp.sk", std::ios::binary); + hll6.serialize_compact(os); + } + { + std::ofstream os("hll8_n" + std::to_string(n) + "_cpp.sk", std::ios::binary); + hll8.serialize_compact(os); + } + } +} + +} /* namespace datasketches */ diff --git a/kll/test/CMakeLists.txt b/kll/test/CMakeLists.txt index 17018531..1e6f6f9f 100644 --- a/kll/test/CMakeLists.txt +++ b/kll/test/CMakeLists.txt @@ -49,3 +49,10 @@ target_sources(kll_test kll_sketch_deserialize_from_java_test.cpp ) endif() + +if (GENERATE) +target_sources(kll_test + PRIVATE + kll_sketch_serialize_for_java.cpp +) +endif() diff --git a/kll/test/kll_sketch_serialize_for_java.cpp b/kll/test/kll_sketch_serialize_for_java.cpp new file mode 100644 index 00000000..00b8913d --- /dev/null +++ b/kll/test/kll_sketch_serialize_for_java.cpp @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include +#include + +namespace datasketches { + +TEST_CASE("kll sketch float generate", "[serialize_for_java]") { + const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; + for (const unsigned n: n_arr) { + kll_sketch sketch; + for (unsigned i = 1; i <= n; ++i) sketch.update(i); + std::ofstream os("kll_float_n" + std::to_string(n) + "_cpp.sk", std::ios::binary); + sketch.serialize(os); + } +} + +TEST_CASE("kll sketch double generate", "[serialize_for_java]") { + const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; + for (const unsigned n: n_arr) { + kll_sketch sketch; + for (unsigned i = 1; i <= n; ++i) sketch.update(i); + std::ofstream os("kll_double_n" + std::to_string(n) + "_cpp.sk", std::ios::binary); + sketch.serialize(os); + } +} + +struct compare_as_number { + bool operator()(const std::string& a, const std::string& b) const { + return std::stoi(a) < std::stoi(b); + } +}; + +TEST_CASE("kll sketch string generate", "[serialize_for_java]") { + const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; + for (const unsigned n: n_arr) { + kll_sketch sketch; + for (unsigned i = 1; i <= n; ++i) sketch.update(std::to_string(i)); + std::ofstream os("kll_string_n" + std::to_string(n) + "_cpp.sk", std::ios::binary); + sketch.serialize(os); + } +} + +} /* namespace datasketches */ diff --git a/quantiles/test/CMakeLists.txt b/quantiles/test/CMakeLists.txt index 1831900d..dd999b8c 100644 --- a/quantiles/test/CMakeLists.txt +++ b/quantiles/test/CMakeLists.txt @@ -48,3 +48,10 @@ target_sources(quantiles_test quantiles_sketch_deserialize_from_java_test.cpp ) endif() + +if (GENERATE) +target_sources(quantiles_test + PRIVATE + quantiles_sketch_serialize_for_java.cpp +) +endif() diff --git a/quantiles/test/quantiles_sketch_serialize_for_java.cpp b/quantiles/test/quantiles_sketch_serialize_for_java.cpp new file mode 100644 index 00000000..d762d7c4 --- /dev/null +++ b/quantiles/test/quantiles_sketch_serialize_for_java.cpp @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include +#include + +namespace datasketches { + +TEST_CASE("quantiles sketch double generate", "[serialize_for_java]") { + const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; + for (const unsigned n: n_arr) { + quantiles_sketch sketch; + for (unsigned i = 1; i <= n; ++i) sketch.update(i); + std::ofstream os("quantiles_double_n" + std::to_string(n) + "_cpp.sk", std::ios::binary); + sketch.serialize(os); + } +} + +struct compare_as_number { + bool operator()(const std::string& a, const std::string& b) const { + return std::stoi(a) < std::stoi(b); + } +}; + +TEST_CASE("quantiles sketch string generate", "[serialize_for_java]") { + const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; + for (const unsigned n: n_arr) { + quantiles_sketch sketch; + for (unsigned i = 1; i <= n; ++i) sketch.update(std::to_string(i)); + std::ofstream os("quantiles_string_n" + std::to_string(n) + "_cpp.sk", std::ios::binary); + sketch.serialize(os); + } +} + +} /* namespace datasketches */ diff --git a/req/test/CMakeLists.txt b/req/test/CMakeLists.txt index 72fb39b2..8ebaadb0 100755 --- a/req/test/CMakeLists.txt +++ b/req/test/CMakeLists.txt @@ -47,3 +47,10 @@ target_sources(req_test req_sketch_deserialize_from_java_test.cpp ) endif() + +if (GENERATE) +target_sources(req_test + PRIVATE + req_sketch_serialize_for_java.cpp +) +endif() diff --git a/req/test/req_sketch_serialize_for_java.cpp b/req/test/req_sketch_serialize_for_java.cpp new file mode 100644 index 00000000..d244cb70 --- /dev/null +++ b/req/test/req_sketch_serialize_for_java.cpp @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include +#include + +namespace datasketches { + +TEST_CASE("req sketch float generate", "[serialize_for_java]") { + const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; + for (const unsigned n: n_arr) { + req_sketch sketch(12); + for (unsigned i = 1; i <= n; ++i) sketch.update(i); + std::ofstream os("req_float_n" + std::to_string(n) + "_cpp.sk", std::ios::binary); + sketch.serialize(os); + } +} + +} /* namespace datasketches */ diff --git a/sampling/test/CMakeLists.txt b/sampling/test/CMakeLists.txt index f62a1442..8264f9c6 100644 --- a/sampling/test/CMakeLists.txt +++ b/sampling/test/CMakeLists.txt @@ -49,3 +49,11 @@ target_sources(sampling_test var_opt_union_deserialize_from_java_test.cpp ) endif() + +if (GENERATE) +target_sources(sampling_test + PRIVATE + var_opt_sketch_serialize_for_java.cpp + var_opt_union_serialize_for_java.cpp +) +endif() diff --git a/sampling/test/var_opt_sketch_serialize_for_java.cpp b/sampling/test/var_opt_sketch_serialize_for_java.cpp new file mode 100644 index 00000000..225b4db9 --- /dev/null +++ b/sampling/test/var_opt_sketch_serialize_for_java.cpp @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include +#include + +namespace datasketches { + +TEST_CASE("varopt sketch long generate", "[serialize_for_java]") { + const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; + for (const unsigned n: n_arr) { + var_opt_sketch sketch(32); + for (unsigned i = 1; i <= n; ++i) sketch.update(i); + std::ofstream os("varopt_sketch_long_n" + std::to_string(n) + "_cpp.sk", std::ios::binary); + sketch.serialize(os); + } +} + +TEST_CASE("varopt sketch string exact", "[serialize_for_java]") { + var_opt_sketch sketch(1024); + for (unsigned i = 1; i <= 200; ++i) sketch.update(std::to_string(i), 1000.0 / i); + std::ofstream os("varopt_sketch_string_exact_cpp.sk", std::ios::binary); + sketch.serialize(os); +} + +TEST_CASE("varopt sketch long sampling", "[serialize_for_java]") { + var_opt_sketch sketch(1024); + for (unsigned i = 0; i < 2000; ++i) sketch.update(i); + // negative heavy items to allow a simple predicate to filter + sketch.update(-1L, 100000.0); + sketch.update(-2L, 110000.0); + sketch.update(-3L, 120000.0); + std::ofstream os("varopt_sketch_long_sampling_cpp.sk", std::ios::binary); + sketch.serialize(os); +} + +} /* namespace datasketches */ diff --git a/sampling/test/var_opt_union_serialize_for_java.cpp b/sampling/test/var_opt_union_serialize_for_java.cpp new file mode 100644 index 00000000..e45fc1c4 --- /dev/null +++ b/sampling/test/var_opt_union_serialize_for_java.cpp @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include +#include + +namespace datasketches { + +TEST_CASE("var opt union double sampling", "[serialize_for_java]") { + const unsigned k_small = 16; + const unsigned k_max = 128; + const unsigned n1 = 32; + const unsigned n2 = 64; + + // small k sketch, but sampling + var_opt_sketch sketch1(k_small); + for (unsigned i = 0; i < n1; ++i) sketch1.update(i); + // negative heavy item to allow a simple predicate to filter + sketch1.update(-1, n1 * n1); + + // another one, but different n to get a different per-item weight + var_opt_sketch sketch2(k_small); + for (unsigned i = 0; i < n2; ++i) sketch2.update(i); + + var_opt_union u(k_max); + u.update(sketch1); + u.update(sketch2); + + // must reduce k in the process + auto result = u.get_result(); + REQUIRE(result.get_k() < k_max); + REQUIRE(result.get_k() >= k_small); + REQUIRE(result.get_n() == 97); + + std::ofstream os("varopt_union_double_sampling_cpp.sk", std::ios::binary); + u.serialize(os); +} + +} /* namespace datasketches */ diff --git a/theta/test/CMakeLists.txt b/theta/test/CMakeLists.txt index 48998310..24b22fe1 100644 --- a/theta/test/CMakeLists.txt +++ b/theta/test/CMakeLists.txt @@ -52,3 +52,10 @@ target_sources(theta_test theta_sketch_deserialize_from_java_test.cpp ) endif() + +if (GENERATE) +target_sources(theta_test + PRIVATE + theta_sketch_serialize_for_java.cpp +) +endif() diff --git a/theta/test/theta_sketch_serialize_for_java.cpp b/theta/test/theta_sketch_serialize_for_java.cpp new file mode 100644 index 00000000..487553f0 --- /dev/null +++ b/theta/test/theta_sketch_serialize_for_java.cpp @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include +#include + +namespace datasketches { + +TEST_CASE("theta sketch generate", "[serialize_for_java]") { + const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; + for (const unsigned n: n_arr) { + auto sketch = update_theta_sketch::builder().build(); + for (unsigned i = 0; i < n; ++i) sketch.update(i); + REQUIRE(sketch.is_empty() == (n == 0)); + REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03)); + std::ofstream os("theta_n" + std::to_string(n) + "_cpp.sk", std::ios::binary); + sketch.compact().serialize(os); + } +} + +TEST_CASE("theta sketch generate compressed", "[serialize_for_java]") { + const unsigned n_arr[] = {10, 100, 1000, 10000, 100000, 1000000}; + for (const unsigned n: n_arr) { + auto sketch = update_theta_sketch::builder().build(); + for (unsigned i = 0; i < n; ++i) sketch.update(i); + REQUIRE_FALSE(sketch.is_empty()); + REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03)); + std::ofstream os("theta_compressed_n" + std::to_string(n) + "_cpp.sk", std::ios::binary); + sketch.compact().serialize(os); + } +} + +TEST_CASE("theta sketch generate non-empty no entries", "[serialize_for_java]") { + auto sketch = update_theta_sketch::builder().set_p(0.01).build(); + // here we rely on the fact that hash of 1 happens to be greater than 0.01 (when normalized) + // and therefore gets rejected + sketch.update(1); + REQUIRE_FALSE(sketch.is_empty()); + REQUIRE(sketch.get_num_retained() == 0); + std::ofstream os("theta_non_empty_no_entries_cpp.sk", std::ios::binary); + sketch.compact().serialize(os); +} + +} /* namespace datasketches */ diff --git a/tuple/test/CMakeLists.txt b/tuple/test/CMakeLists.txt index 003c02ca..4ca6a503 100644 --- a/tuple/test/CMakeLists.txt +++ b/tuple/test/CMakeLists.txt @@ -51,5 +51,14 @@ if (SERDE_COMPAT) target_sources(tuple_test PRIVATE aod_sketch_deserialize_from_java_test.cpp + tuple_sketch_deserialize_from_java_test.cpp +) +endif() + +if (GENERATE) +target_sources(tuple_test + PRIVATE + aod_sketch_serialize_for_java.cpp + tuple_sketch_serialize_for_java.cpp ) endif() diff --git a/tuple/test/aod_sketch_serialize_for_java.cpp b/tuple/test/aod_sketch_serialize_for_java.cpp new file mode 100644 index 00000000..4ca978a3 --- /dev/null +++ b/tuple/test/aod_sketch_serialize_for_java.cpp @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include +#include + +namespace datasketches { + +TEST_CASE("aod sketch generate one value", "[serialize_for_java]") { + const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; + for (const unsigned n: n_arr) { + auto sketch = update_array_of_doubles_sketch::builder().build(); + for (unsigned i = 0; i < n; ++i) sketch.update(i, std::vector(1, i)); + REQUIRE(sketch.is_empty() == (n == 0)); + REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03)); + std::ofstream os("aod_1_n" + std::to_string(n) + "_cpp.sk", std::ios::binary); + sketch.compact().serialize(os); + } +} + +TEST_CASE("aod sketch generate three values", "[serialize_for_java]") { + const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; + for (const unsigned n: n_arr) { + auto sketch = update_array_of_doubles_sketch::builder(3).build(); + for (unsigned i = 0; i < n; ++i) sketch.update(i, std::vector(3, i)); + REQUIRE(sketch.is_empty() == (n == 0)); + REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03)); + std::ofstream os("aod_3_n" + std::to_string(n) + "_cpp.sk", std::ios::binary); + sketch.compact().serialize(os); + } +} + +TEST_CASE("aod sketch generate non-empty no entries", "[serialize_for_java]") { + auto sketch = update_array_of_doubles_sketch::builder().set_p(0.01).build(); + // here we rely on the fact that hash of 1 happens to be greater than 0.01 (when normalized) + // and therefore gets rejected + sketch.update(1, std::vector({1})); + REQUIRE_FALSE(sketch.is_empty()); + REQUIRE(sketch.get_num_retained() == 0); + std::ofstream os("aod_1_non_empty_no_entries_cpp.sk", std::ios::binary); + sketch.compact().serialize(os); +} + +} /* namespace datasketches */ diff --git a/tuple/test/tuple_sketch_deserialize_from_java_test.cpp b/tuple/test/tuple_sketch_deserialize_from_java_test.cpp new file mode 100644 index 00000000..408223f9 --- /dev/null +++ b/tuple/test/tuple_sketch_deserialize_from_java_test.cpp @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include +#include + +namespace datasketches { + +// assume the binary sketches for this test have been generated by datasketches-java code +// in the subdirectory called "java" in the root directory of this project +static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/"; + +TEST_CASE("tuple sketch int", "[serde_compat]") { + const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; + for (const unsigned n: n_arr) { + std::ifstream is; + is.exceptions(std::ios::failbit | std::ios::badbit); + is.open(testBinaryInputPath + "tuple_int_n" + std::to_string(n) + "_java.sk", std::ios::binary); + const auto sketch = compact_tuple_sketch::deserialize(is); + REQUIRE(sketch.is_empty() == (n == 0)); + REQUIRE(sketch.is_estimation_mode() == (n > 1000)); + REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03)); + for (const auto& entry: sketch) { + REQUIRE(entry.first < sketch.get_theta64()); + REQUIRE(entry.second < static_cast(n)); + } + } +} + +} /* namespace datasketches */ diff --git a/tuple/test/tuple_sketch_serialize_for_java.cpp b/tuple/test/tuple_sketch_serialize_for_java.cpp new file mode 100644 index 00000000..6f2bf502 --- /dev/null +++ b/tuple/test/tuple_sketch_serialize_for_java.cpp @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include +#include + +namespace datasketches { + +TEST_CASE("tuple sketch int generate", "[serialize_for_java]") { + const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; + for (const unsigned n: n_arr) { + auto sketch = update_tuple_sketch::builder().build(); + for (unsigned i = 0; i < n; ++i) sketch.update(i, i); + REQUIRE(sketch.is_empty() == (n == 0)); + REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03)); + std::ofstream os("tuple_int_n" + std::to_string(n) + "_cpp.sk", std::ios::binary); + sketch.compact().serialize(os); + } +} + +} /* namespace datasketches */