diff --git a/README.md b/README.md index 62b5172..701703d 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,10 @@ history, despite being deeply flawed). deleting them, and rates for setting and getting TruthValues and IncomingSets. -* __micro__ -- Micro-benchmarks for misc items. +* __micro__ -- Micro-benchmarks for misc items. **These benchmarks + do not work correctly! They're just plain broken! They report + incorrect results! DO NOT USE!** See the README in the subdirectory + for details about what's wrong. * __python__ -- Benchmarks for the python bindings to various AtomSpace subsystems. diff --git a/atomspace/AtomSpaceBenchmark.cc b/atomspace/AtomSpaceBenchmark.cc index e379fa5..5bbd865 100644 --- a/atomspace/AtomSpaceBenchmark.cc +++ b/atomspace/AtomSpaceBenchmark.cc @@ -478,7 +478,7 @@ void AtomSpaceBenchmark::startBenchmark(int numThreads) else { asp = new AtomSpace(); #if HAVE_CYTHON - pyev = new PythonEval(asp); + pyev = new PythonEval(); // And now ... create a Python instance of the atomspace. // Pass in the raw C++ atomspace address into cython. // Kind-of tacky, but I don't see any better way. @@ -486,7 +486,7 @@ void AtomSpaceBenchmark::startBenchmark(int numThreads) // run on a different atomspace, than the one containing // all the atoms. And that would give bad results. std::ostringstream dss; - dss << "from opencog.atomspace import AtomSpace, types, TruthValue, Atom" << std::endl; + dss << "from atomspace import AtomSpace, types, TruthValue, Atom" << std::endl; dss << "aspace = AtomSpace(" << asp << ")" << std::endl; pyev->eval(dss.str()); #endif @@ -597,10 +597,12 @@ Type AtomSpaceBenchmark::randomType(Type t) } while (!nameserver().isA(candidateType, t) or nameserver().isA(candidateType, BOOLEAN_LINK) or nameserver().isA(candidateType, FREE_LINK) or + nameserver().isA(candidateType, TYPE_INPUT_LINK) or + nameserver().isA(candidateType, TYPE_OUTPUT_LINK) or nameserver().isA(candidateType, NUMERIC_LINK) or nameserver().isA(candidateType, SCOPE_LINK) or - nameserver().isA(candidateType, TYPE_LINK) or nameserver().isA(candidateType, UNIQUE_LINK) or + nameserver().isA(candidateType, TYPED_VARIABLE_LINK) or candidateType == VARIABLE_LIST or candidateType == VARIABLE_SET or candidateType == DEFINE_LINK or diff --git a/atomspace/CMakeLists.txt b/atomspace/CMakeLists.txt index f59b017..767dd7a 100644 --- a/atomspace/CMakeLists.txt +++ b/atomspace/CMakeLists.txt @@ -15,8 +15,10 @@ IF (HAVE_CYTHON) ${PYTHON_INCLUDE_DIRS} ) TARGET_LINK_LIBRARIES (atomspace_bm - ${Boost_SYSTEM_LIBRARY} + PythonEval + # ${ATOMSPACE_PythonEval_LIBRARY} + # ${Boost_SYSTEM_LIBRARY} ${PYTHON_LIBRARIES} - ${ATOMSPACE_PythonEval_LIBRARY} + boost_system ) ENDIF(HAVE_CYTHON) diff --git a/atomspace/diary.txt b/atomspace/diary.txt index 3760ae6..e463f1b 100644 --- a/atomspace/diary.txt +++ b/atomspace/diary.txt @@ -1921,3 +1921,30 @@ Feb 2020 compiler: gcc (Debian 8.3.0-6) 8.3.0 Notes: Expected to maybe see some kind of change in performance. It seems to be miniscule, lost in the noise. + +4 Aug 2020 +----------- +Partial measurements. Fanny. One pull req of concern: + + Pull req #2744 - Stub out signals. No one uses them. + +./atomspace_bm -A -n 4000000 + + -A -A + test pull #2744 Feb 2020 + name K-ops/sec K-ops/sec Comments + ---- --------- --------- ----------------- + noop 1.70e6 2.00e6 no change expected + getType 94K 96K " + getTV 4689 4535 see notes + setTV 1244 1163 " + pointerCast 11.0K 11.4K no change expected + getIncomingSet 3253 3332 " + getOutgoingSet 50.5K 49.8K " + addNode 711 701 see notes + addLink -A 189 183 " + addLink -m 217 206 " + removeAtom 1359 1213 " + +Notes: Removing the signal callbacks should improve performance. +... and now we know how much. We'll go for 3% to 7%, depending... diff --git a/micro/CMakeLists.txt b/micro/CMakeLists.txt index 4243baa..32f117d 100644 --- a/micro/CMakeLists.txt +++ b/micro/CMakeLists.txt @@ -3,8 +3,14 @@ LIST(APPEND LIST_MODULES benchmark.cc pointercast_bm.cc - values_bm.cc + atomspace_bm.cc + addnode_bm.cc + addlink_bm.cc evaluationlink_bm.cc + large_simple_bm.cc + large_flat_bm.cc + large_zipf_bm.cc + values_bm.cc scopelink_bm.cc variables_bm.cc ) diff --git a/micro/README.md b/micro/README.md index f8ab267..2666120 100644 --- a/micro/README.md +++ b/micro/README.md @@ -1,5 +1,38 @@ # Micro-Benchmarking +## WARNING +**THIS BENCHMARK DOES NOT MEASURE WHAT YOU THINK IT DOES** +The "Google Benchmark" assumes that all calls to some function +are all exactly the same, and that there is no hysteresis or +side-effects. This means that this benchmark is not suitable +for measuring AtomSpace insertions, because these have both +hysteresis and side-effects. Thus, the reported times +***ARE JUST PLAIN WRONG!*** + +The issue is this: Say we want to measure how much time it takes +to add 1M Atoms to the atomspace, vs how much time it takes to +add 64K Atoms. Well, the actual timing loop does NOT actually +iterate 1M times, or 64K times. It iterates some other number of +times that is not predictable. + +Thus, to measure 64K insertions, you have to count to 64K, and then +clear the atomspace... inside the timing loop! But we didn't want +to measure how long it takes to clear the atomspace! Worse, the loop +might run 150K times, so there are 150K == 64K+64K+22K adds, and +the short fill-up at the end skews everything. Worse still, if +you ask for 1M insertions, the loop maight only run to 400K, so you +never even got to the end of the fill! + +As a result, the numbers generated by this benchmark are insane. +They might give you some kind of general impression for what's going +on, but they will be wrong by factors of 2x or 4x or 5x ... usually, +reporting performance that is much faster than what it actually is. + +If you have a lot of free time on your hands, maybe you can read the +docs and figure out how to fix this thing and make it work right. +But right now, its fubar'ed and totally unusable. + + ## Prerequisites This requires the "Google Benchmark" micro-benchmarking tool v1.3.0 or higher. diff --git a/micro/addlink_bm.cc b/micro/addlink_bm.cc new file mode 100644 index 0000000..e89c409 --- /dev/null +++ b/micro/addlink_bm.cc @@ -0,0 +1,105 @@ +/* + * addlink_bm.cc + * + * Copyright (C) 2020 OpenCog Foundation + * All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License v3 as + * published by the Free Software Foundation and including the exceptions + * at http://opencog.org/wiki/Licenses + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program; if not, write to: + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include + +#include +#include +#include + +#include "benchmark.h" + +using namespace opencog; + +static void BM_AddLink(benchmark::State& state) +{ + const size_t num_to_add = state.range(0); + AtomSpace* as = new AtomSpace(); + + // 101 and 233 are prime numbers. Thus, the links will interconnect + // to form some kind of nice polytope. + const size_t number_of_atoms = state.range(0); + std::vector atoms(number_of_atoms); + for (size_t i = 0; i < number_of_atoms; ++i) + atoms[i] = createLink(LIST_LINK, + createNode(CONCEPT_NODE, "barfology" + std::to_string(i%101)), + createNode(CONCEPT_NODE, "blingometry" + std::to_string(i%233))); + + size_t i = 0; + size_t j = 0; + for (auto _ : state) + { + if (j%3 == 0) // we are creating 3 atoms per pop... + as->add_atom(atoms[i++ % number_of_atoms]); + j++; + + if (num_to_add < j) + { + as->clear(); + j = 0; + } + } + delete as; +} + +// Cannot go higher than 17 because the benchmark doesn't +// iterate enough times. +BENCHMARK(BM_AddLink)->Arg(2<<9)->Arg(2<<16)->Arg(2<<17); + +static void BM_CreateAddLink(benchmark::State& state) +{ + const size_t num_to_add = state.range(0); + AtomSpace* as = new AtomSpace(); + + std::vector aname(101); + std::vector bname(233); + for (size_t i = 0; i < 101; ++i) + aname[i] = "barfology" + std::to_string(i); + for (size_t i = 0; i < 233; ++i) + bname[i] = "blingometry" + std::to_string(i); + + size_t i = 0; + size_t j = 0; + for (auto _ : state) + { + if (j%3 == 0) // we are creating 3 atoms per pop... + { + // Make a copy so that move constructore works right. + as->add_link(LIST_LINK, + as->add_node(CONCEPT_NODE, std::string({aname[i % 101]})), + as->add_node(CONCEPT_NODE, std::string({bname[i % 233]}))); + i++; + } + j++; + + if (num_to_add < j) + { + as->clear(); + j = 0; + } + } + delete as; +} + +// Cannot go higher than 17 because the benchmark doesn't +// iterate enough times. +BENCHMARK(BM_CreateAddLink)->Arg(2<<9)->Arg(2<<16)->Arg(2<<17); diff --git a/micro/addnode_bm.cc b/micro/addnode_bm.cc new file mode 100644 index 0000000..82c08db --- /dev/null +++ b/micro/addnode_bm.cc @@ -0,0 +1,85 @@ +/* + * addnode_bm.cc + * + * Copyright (C) 2020 OpenCog Foundation + * All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License v3 as + * published by the Free Software Foundation and including the exceptions + * at http://opencog.org/wiki/Licenses + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program; if not, write to: + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include + +#include +#include + +#include "benchmark.h" + +using namespace opencog; + +static void BM_AddNode(benchmark::State& state) +{ + AtomSpace* as = new AtomSpace(); + + size_t seed = 0; + const size_t number_of_nodes = state.range(0); + std::vector nodes(number_of_nodes); + for (size_t i = 0; i < number_of_nodes; ++i) + nodes[i] = createNode(CONCEPT_NODE, get_unique_name("barfology", seed)); + + size_t i = 0; + for (auto _ : state) + { + as->add_atom(nodes[i++]); + if (number_of_nodes <= i) + { + as->clear(); + i = 0; + } + } + delete as; +} + +// Cannot go higher than 17 because the benchmark doesn't +// iterate enough times. +BENCHMARK(BM_AddNode)->Arg(2<<9)->Arg(2<<16)->Arg(2<<17); + +static void BM_CreateAddNode(benchmark::State& state) +{ + AtomSpace* as = new AtomSpace(); + + size_t seed = 0; + const size_t number_of_names = state.range(0); + std::vector names(number_of_names); + for (size_t i = 0; i < number_of_names; ++i) + names[i] = get_unique_name("barfology", seed); + + size_t i = 0; + for (auto _ : state) + { + // Make a copy so that move constructore works right. + as->add_node(CONCEPT_NODE, std::string({names[i++]})); + if (number_of_names <= i) + { + as->clear(); + i = 0; + } + } + delete as; +} + +// Cannot go higher than 17 because the benchmark doesn't +// iterate enough times. +BENCHMARK(BM_CreateAddNode)->Arg(2<<9)->Arg(2<<16)->Arg(2<<17); diff --git a/micro/atomspace_bm.cc b/micro/atomspace_bm.cc new file mode 100644 index 0000000..4682885 --- /dev/null +++ b/micro/atomspace_bm.cc @@ -0,0 +1,55 @@ +/* + * atomspace_bm.cc + * + * Copyright (C) 2020 OpenCog Foundation + * All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License v3 as + * published by the Free Software Foundation and including the exceptions + * at http://opencog.org/wiki/Licenses + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program; if not, write to: + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include + +#include +#include + +#include "benchmark.h" + +using namespace opencog; + +static void BM_AtomSpace_Basic(benchmark::State& state) +{ + AtomSpace* as; + + for (auto _ : state) + { + as = new AtomSpace(); + delete as; + } +} +BENCHMARK(BM_AtomSpace_Basic); + +static void BM_AtomSpace_OneNode(benchmark::State& state) +{ + AtomSpace* as; + + for (auto _ : state) + { + as = new AtomSpace(); + as->add_node(CONCEPT_NODE, "foobariffic"); + delete as; + } +} +BENCHMARK(BM_AtomSpace_OneNode); diff --git a/micro/diary.txt b/micro/diary.txt index f0ed73a..8544609 100644 --- a/micro/diary.txt +++ b/micro/diary.txt @@ -1,4 +1,8 @@ +IMPORTANT Update 5 August 2020: +This benchmark is fucked up. Read the README. All numbers that it +reports are JUST PLAIN WRONG! + Linas micro-benchmark diary --------------------------- General critique of the micro-benchmarks: they're not realistic, they @@ -175,3 +179,20 @@ BM_VariablesExt_NewVarNoRestrict 214 ns 214 ns 3262795 BM_VariablesExt_SameVarRestrict 385 ns 385 ns 1839271 BM_ForwardChainer_Basic 1108895 ns 1108773 ns 627 + + +5 Aug 2020 +---------- +All new, all immproved. Hacked on the Evaluation test to do it +correctly. Ported LargeFLatUTest and LargeZipfUTest to get a +sanity check. gcc and ldconfig unchanged. Atomspace mostly +unchanged. + +$ gcc --version +gcc (Debian 8.3.0-6) 8.3.0 + +$ /sbin/ldconfig --version +ldconfig (Debian GLIBC 2.28-10) 2.28 + +This benchmark is fucked up. Read the README. All numbers that it +reports are JUST PLAIN WRONG! diff --git a/micro/evaluationlink_bm.cc b/micro/evaluationlink_bm.cc index b4c3826..e70934e 100644 --- a/micro/evaluationlink_bm.cc +++ b/micro/evaluationlink_bm.cc @@ -34,72 +34,111 @@ using namespace opencog; -// XXX FIXME, this is not a realistic EvaluationLink. -// This is more like an ordinary ListLink... -static Handle create_evaluation_link(AtomSpace& atomspace, size_t& seed) +static Handle create_evaluation_link(size_t i) { - Handle X = atomspace.add_node(VARIABLE_NODE, get_unique_name("$X", seed)); - Handle P = atomspace.add_node(PREDICATE_NODE, get_unique_name("P", seed)); - return createLink(EVALUATION_LINK, P, X); -} - -static Handle create_evaluation_link(AtomSpace& atomspace) -{ - size_t seed = 0; - return create_evaluation_link(atomspace, seed); + // The numbers 101 and 233 are prime numbers, so this whole + // thing will interconnect to form some nice polytope. + return createLink(EVALUATION_LINK, + createNode(PREDICATE_NODE, "kind-of " + std::to_string(i%11)), + createLink(LIST_LINK, + createNode(CONCEPT_NODE, "barfology" + std::to_string(i%101)), + createNode(CONCEPT_NODE, "blingometry" + std::to_string(i%233)))); } static void BM_CreateEvaluationLink(benchmark::State& state) { - AtomSpace atomspace; - Handle X = atomspace.add_node(VARIABLE_NODE, "$X"); - Handle P = atomspace.add_node(PREDICATE_NODE, "P"); - + size_t j = 0; for (auto _ : state) { - createLink(EVALUATION_LINK, P, X); + if (j % 5 == 0) // we are creating 5 atoms per pop. + create_evaluation_link(0); + j++; } } BENCHMARK(BM_CreateEvaluationLink); static void BM_AddSameEvaluationLink(benchmark::State& state) { - AtomSpace atomspace; - Handle evaluationLink = create_evaluation_link(atomspace); - - logger().fine("atomspace size before adding: %d", atomspace.get_size()); + AtomSpace* as = new AtomSpace; + size_t j = 0; + logger().fine("atomspace size before adding: %d", as->get_size()); for (auto _ : state) { - atomspace.add_atom(evaluationLink); + if (j % 5 == 0) // we are creating 5 atoms per pop. + { + // Create multiple copies of the same atom. + // The AtomSpace takes over memory management of the + // added C++ Atom, so we have to keep feeding it unique Atoms. + as->add_atom(create_evaluation_link(0)); + } + j++; } - logger().fine("atomspace size after adding: %d", atomspace.get_size()); - + logger().fine("atomspace size after adding: %d", as->get_size()); + delete as; } BENCHMARK(BM_AddSameEvaluationLink); -static void BM_AddEvaluationLink(benchmark::State& state) +static void BM_AddEvalLink(benchmark::State& state) { - AtomSpace atomspace; + AtomSpace* as = new AtomSpace; const size_t number_of_links = state.range(0); std::vector links(number_of_links); - size_t seed = 0; + for (size_t i = 0; i < number_of_links; ++i) + links[i] = create_evaluation_link(i); + + logger().fine("atomspace size before adding: %d", as->get_size()); + + size_t i = 0; + size_t j = 0; + for (auto _ : state) { - links[i] = create_evaluation_link(atomspace, seed); + if (j < as->get_size()) // we are creating 5 atoms per pop. + as->add_atom(links[i++ % number_of_links]); + j++; + if (number_of_links < j) + { + as->clear(); + j = 0; + } } - logger().fine("atomspace size before adding: %d", atomspace.get_size()); + logger().fine("atomspace size after adding: %d", as->get_size()); + delete as; +} + +// Cannot go higher than 17 because the benchmark doesn't +// iterate enough times. +BENCHMARK(BM_AddEvalLink)->Arg(2<<9)->Arg(2<<16)->Arg(2<<17); + +static void BM_CreateAddEvalLink(benchmark::State& state) +{ + const size_t num_to_add = state.range(0); + AtomSpace* as = new AtomSpace; + + logger().fine("atomspace size before adding: %d", as->get_size()); size_t i = 0; + size_t j = 0; for (auto _ : state) { - atomspace.add_atom(links[i++ % number_of_links]); + if (j < as->get_size()) // we are creating 5 atoms per pop. + as->add_atom(create_evaluation_link(i++)); + j++; + if (num_to_add < j) + { + as->clear(); + j = 0; + } } - logger().fine("atomspace size after adding: %d", atomspace.get_size()); + logger().fine("atomspace size after adding: %d", as->get_size()); + delete as; } -BENCHMARK(BM_AddEvaluationLink)->Arg(2<<13)->Arg(2<<14)->Arg(2<<15); +// Cannot go higher than 17 because the benchmark doesn't +// iterate enough times. +BENCHMARK(BM_CreateAddEvalLink)->Arg(2<<9)->Arg(2<<16)->Arg(2<<17); diff --git a/micro/large_flat_bm.cc b/micro/large_flat_bm.cc new file mode 100644 index 0000000..4e75e7c --- /dev/null +++ b/micro/large_flat_bm.cc @@ -0,0 +1,188 @@ +/* + * large_flat_bm.cc + * Copy of + * tests/persist/sql/multi-driver/LargeFlatUTest.cxxtest + * + * Copyright (C) 2020 OpenCog Foundation + * All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License v3 as + * published by the Free Software Foundation and including the exceptions + * at http://opencog.org/wiki/Licenses + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program; if not, write to: + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include + +#include +#include +#include +#include + +#include "benchmark.h" + +using namespace opencog; + +// ============================================================ + +// This is a verbatim copy of the business end of +// tests/persist/sql/multi-driver/LargeFlatUTest.cxxtest +// This creates a "flat" Atomspace, where all links have a small +// incoming set. Compare to Zipf, which distributes in a Zipfian +// way. +class LargeFlatUTest +{ + std::vector n1; + std::vector n2; + std::vector n3; + std::vector n4; + std::vector l; + std::vector l2; + std::vector l3; + std::vector h1; + std::vector h2; + std::vector h3; + std::vector h4; + std::vector hl; + std::vector hl2; + std::vector hl3; + +public: + LargeFlatUTest(size_t nchunks); + void add_to_space(size_t idx, AtomSpace *as, std::string id); + size_t filler_up(AtomSpace *space, size_t ichk); +}; + +LargeFlatUTest::LargeFlatUTest(size_t nchunks) +{ + for (size_t idx = 0; idx < nchunks; idx++) + { + n1.push_back(NodePtr()); + n2.push_back(NodePtr()); + n3.push_back(NodePtr()); + n4.push_back(NodePtr()); + l.push_back(LinkPtr()); + l2.push_back(LinkPtr()); + l3.push_back(LinkPtr()); + h1.push_back(Handle::UNDEFINED); + h2.push_back(Handle::UNDEFINED); + h3.push_back(Handle::UNDEFINED); + h4.push_back(Handle::UNDEFINED); + hl.push_back(Handle::UNDEFINED); + hl2.push_back(Handle::UNDEFINED); + hl3.push_back(Handle::UNDEFINED); + } +} + +void LargeFlatUTest::add_to_space(size_t idx, AtomSpace *as, std::string id) +{ + // Create an atom ... + TruthValuePtr stv(SimpleTruthValue::createTV(0.11, 100+idx)); + h1[idx] = as->add_node(SCHEMA_NODE, id + "fromNode"); + h1[idx]->setTruthValue(stv); + n1[idx] = NodeCast(h1[idx]); + + TruthValuePtr stv2(SimpleTruthValue::createTV(0.22, 200+idx)); + h2[idx] = as->add_node(SCHEMA_NODE, id + "toNode"); + h2[idx]->setTruthValue(stv2); + n2[idx] = NodeCast(h2[idx]); + + TruthValuePtr stv3(SimpleTruthValue::createTV(0.33, 300+idx)); + h3[idx] = as->add_node(SCHEMA_NODE, id + "third wheel"); + h3[idx]->setTruthValue(stv3); + n3[idx] = NodeCast(h3[idx]); + + // The NumberNode will go through the AtomTable clone factory + // and should thus elicit any errors in clone uuid handling. + char buf[40]; sprintf(buf, "%f", idx+0.14159265358979); + h4[idx] = as->add_node(NUMBER_NODE, buf); + TruthValuePtr stv4(SimpleTruthValue::createTV(0.44, 400+idx)); + h4[idx]->setTruthValue(stv4); + n4[idx] = NodeCast(h4[idx]); + + HandleSeq hvec; + hvec.push_back(h1[idx]); + hvec.push_back(h2[idx]); + hvec.push_back(h3[idx]); + hvec.push_back(h4[idx]); + + // Note that SetLink is an unordered link. + hl[idx] = as->add_link(SET_LINK, std::move(hvec)); + l[idx] = LinkCast(hl[idx]); + + hl2[idx] = as->add_link(LIST_LINK, hl[idx], h2[idx]); + l2[idx] = LinkCast(hl2[idx]); + + hl3[idx] = as->add_link(EVALUATION_LINK, h1[idx], hl2[idx], h3[idx]); + l3[idx] = LinkCast(hl3[idx]); +} + +// ============================================================ + +size_t LargeFlatUTest::filler_up(AtomSpace* _as, size_t idx) +{ + std::string lbl = std::to_string(idx); + add_to_space(idx++, _as, "AA-aa-wow " + lbl); + add_to_space(idx++, _as, "BB-bb-wow " + lbl); + add_to_space(idx++, _as, "CC-cc-wow " + lbl); + add_to_space(idx++, _as, "DD-dd-wow " + lbl); + add_to_space(idx++, _as, "EE-ee-wow " + lbl); + + /* Make sure UTF-8 works fine. */ + add_to_space(idx++, _as, "Попытка выбраться вызвала слабый стон " + lbl); + add_to_space(idx++, _as, "はにがうりだそうであってるのかはち " + lbl); + add_to_space(idx++, _as, "係拉丁字母" + lbl); + + return idx; +} + +// ============================================================ + +static void BM_LargeFlat(benchmark::State& state) +{ + size_t num_adds = state.range(0); + + // The LargeFlatUTest create 56 atoms for each call to filler_up() + // That's (4 nodes + 3 links) x 8 for each call. + LargeFlatUTest* lfut = new LargeFlatUTest(num_adds/7+10); + + AtomSpace* as = new AtomSpace(); + size_t i=0; + size_t j=0; + for (auto _ : state) + { + if (j < num_adds) + { + if (j < as->get_size()) + { + i = lfut->filler_up(as, i); + i %= num_adds/7; + } + j++; + } + else + { + delete lfut; + delete as; + as = new AtomSpace(); + lfut = new LargeFlatUTest(num_adds/7+10); + i = 0; + j = 0; + } + } + delete as; +} + +// Cannot go higher than 17 because the benchmark doesn't +// iterate enough times. +BENCHMARK(BM_LargeFlat)->Arg(2<<9)->Arg(2<<16)->Arg(2<<17); diff --git a/micro/large_simple_bm.cc b/micro/large_simple_bm.cc new file mode 100644 index 0000000..1d15ea0 --- /dev/null +++ b/micro/large_simple_bm.cc @@ -0,0 +1,150 @@ +/* + * large_sflat_bm.cc + * Copy of + * tests/persist/sql/multi-driver/LargeSimpleFlatUTest.cxxtest + * + * Copyright (C) 2020 OpenCog Foundation + * All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License v3 as + * published by the Free Software Foundation and including the exceptions + * at http://opencog.org/wiki/Licenses + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program; if not, write to: + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include + +#include +#include +#include +#include + +#include "benchmark.h" + +using namespace opencog; + +// ============================================================ + +// This was a verbatim copy of the business end of +// tests/persist/sql/multi-driver/LargeSimpleFlatUTest.cxxtest +// except it's been simplfied to remove the cruft. +// This creates a "flat" Atomspace, where all links have a small +// incoming set. Compare to Zipf, which distributes in a Zipfian +// way. +class LargeSimpleFlatUTest +{ + AtomSpace* _as; +public: + LargeSimpleFlatUTest(AtomSpace*); + void add_to_space(size_t idx, const std::string& id); + void filler_up(size_t idx); +}; + +LargeSimpleFlatUTest::LargeSimpleFlatUTest(AtomSpace* as) +{ + _as = as; +} + +void LargeSimpleFlatUTest::add_to_space(size_t idx, const std::string& id) +{ + // Create an atom ... + TruthValuePtr stv(SimpleTruthValue::createTV(0.11, 100+idx)); + Handle h1 = _as->add_node(SCHEMA_NODE, id + "fromNode"); + h1->setTruthValue(stv); + + TruthValuePtr stv2(SimpleTruthValue::createTV(0.22, 200+idx)); + Handle h2 = _as->add_node(SCHEMA_NODE, id + "toNode"); + h2->setTruthValue(stv2); + + TruthValuePtr stv3(SimpleTruthValue::createTV(0.33, 300+idx)); + Handle h3 = _as->add_node(SCHEMA_NODE, id + "third wheel"); + h3->setTruthValue(stv3); + + // The NumberNode will go through the AtomTable clone factory + // and should thus elicit any errors in clone uuid handling. + char buf[40]; sprintf(buf, "%f", idx+0.14159265358979); + Handle h4 = _as->add_node(NUMBER_NODE, buf); + TruthValuePtr stv4(SimpleTruthValue::createTV(0.44, 400+idx)); + h4->setTruthValue(stv4); + + HandleSeq hvec; + hvec.push_back(h1); + hvec.push_back(h2); + hvec.push_back(h3); + hvec.push_back(h4); + + // Note that SetLink is an unordered link. + Handle hl = _as->add_link(SET_LINK, std::move(hvec)); + + Handle hl2 = _as->add_link(LIST_LINK, hl, h2); + _as->add_link(EVALUATION_LINK, h1, hl2, h3); +} + +// ============================================================ + +void LargeSimpleFlatUTest::filler_up(size_t idx) +{ + std::string lbl = std::to_string(idx); + add_to_space(idx, "AA-aa-wow " + lbl); + add_to_space(idx, "BB-bb-wow " + lbl); + add_to_space(idx, "CC-cc-wow " + lbl); + add_to_space(idx, "DD-dd-wow " + lbl); + add_to_space(idx, "EE-ee-wow " + lbl); + + /* Make sure UTF-8 works fine. */ + add_to_space(idx, "Попытка выбраться вызвала слабый стон " + lbl); + add_to_space(idx, "はにがうりだそうであってるのかはち " + lbl); + add_to_space(idx, "係拉丁字母" + lbl); +} + +// ============================================================ + +static void BM_LargeSimpleFlat(benchmark::State& state) +{ + const size_t num_adds = state.range(0); + + AtomSpace* as = new AtomSpace(); + + // The LargeSimpleFlatUTest create 56 atoms for each call + // to filler_up() -- That's (4 nodes + 3 links) x 8 for each call. + LargeSimpleFlatUTest* lfut = new LargeSimpleFlatUTest(as); + + size_t i=0; + size_t j=0; + for (auto _ : state) + { + if (j < num_adds) + { + if (j < as->get_size()) + { + lfut->filler_up(i); + i++; + } + j++; + } + else + { + delete lfut; + delete as; + as = new AtomSpace(); + lfut = new LargeSimpleFlatUTest(as); + i = 0; + j = 0; + } + } + delete as; +} + +// Cannot go higher than 17 because the benchmark doesn't +// iterate enough times. +BENCHMARK(BM_LargeSimpleFlat)->Arg(2<<9)->Arg(2<<16)->Arg(2<<17); diff --git a/micro/large_zipf_bm.cc b/micro/large_zipf_bm.cc new file mode 100644 index 0000000..8f74638 --- /dev/null +++ b/micro/large_zipf_bm.cc @@ -0,0 +1,176 @@ +/* + * large_zipf_bm.cc + * Copy of + * tests/persist/sql/multi-driver/LargeZipfUTest.cxxtest + * + * Copyright (C) 2020 OpenCog Foundation + * All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License v3 as + * published by the Free Software Foundation and including the exceptions + * at http://opencog.org/wiki/Licenses + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program; if not, write to: + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include +#include + +#include +#include +#include +#include +#include + +#include "benchmark.h" + +using namespace opencog; + +// ============================================================ + +// This is a redesigned copy of the business end of +// tests/persist/sql/multi-driver/LargeZipfUTest.cxxtest +// This creates an Atomspace with a Zipfian distribution of incoming +// sets. + +// ============================================================ + +class Zipf +{ + // Emulate a Zipfian distribution. + // Half of words linked once. rpt = 1, wmax = NWORDS + // quarter of words linked twice. rpt = 2, wmax = NWORDS/2 + // eighth of words linked 4 times. rpt = 4, wmax = NWORDS/4 + // 1/16 of words linked 8 times. rpt = 8, wmax = NWORDS/8 + // + // I'm no longer sure the below works correctly, but whatever... + // good enough, maybe? + std::vector hword; + size_t nwords; + size_t w1; + size_t w2; + size_t wmax; + size_t npairs; + + // Emulate a word, with a spelling that is not long, not short ... + std::string wrdbase; + + AtomSpace* _as; + +public: + Zipf(AtomSpace* as) + { + _as = as; + + w1 = 0; + w2 = 0; + npairs = 0; + + // Emulate a word, with a spelling that is not long, not short ... + wrdbase = "Word-ishy "; + + Handle hw = as->add_node(CONCEPT_NODE, wrdbase + "foo"); + TruthValuePtr tv(CountTruthValue::createTV(1, 0, 0)); + hw->setTruthValue(tv); + hword.emplace_back(hw); + + nwords = 1; + wmax = 1; + } + size_t add_some(); + + void report(); +}; + +// ============================================================ + +size_t Zipf::add_some() +{ + Handle hpair = _as->add_link(LIST_LINK, hword[w1], hword[w2]); + TruthValuePtr tv = hpair->getTruthValue(); + size_t cnt = 0; + if (COUNT_TRUTH_VALUE == tv->get_type()) + cnt = CountTruthValueCast(tv)->get_count(); + hpair->setTruthValue(CountTruthValue::createTV(1, 0, ++cnt)); + + w2++; + npairs++; + if (wmax <= w2) + { + w2=0; + w1++; + if (nwords <= w1) + { + Handle hw = _as->add_node(CONCEPT_NODE, + wrdbase + std::to_string(nwords)); + TruthValuePtr tv(CountTruthValue::createTV(1, 0, 0)); + hw->setTruthValue(tv); + hword.emplace_back(hw); + nwords++; + w1 = 0; + } + + wmax = nwords / (w1+1); + } + return nwords + npairs; +} + +void Zipf::report() +{ + double rt = sqrt(npairs); + printf("In the end, nwords=%lu npairs=%lu sqrt*log=%f\n", + nwords, npairs, rt*log(rt)); + printf("AtomSpace holds nwords=%lu npairs=%lu\n", + _as->get_num_atoms_of_type(CONCEPT_NODE), + _as->get_num_atoms_of_type(LIST_LINK)); + printf("\n"); +} + +// ============================================================ + +static void BM_LargeZipf(benchmark::State& state) +{ + size_t num_adds = state.range(0); + // printf("Running benchmark to add: %lu\n", num_adds); + + AtomSpace* as = new AtomSpace(); + Zipf* zp = new Zipf(as); + + size_t nadds = 1; + size_t j=0; + for (auto _ : state) + { + if (j < num_adds) + { + if (nadds < j) + { + nadds = zp->add_some(); + } + j++; + } + else + { + // zp->report(); + delete zp; + delete as; + as = new AtomSpace(); + zp = new Zipf(as); + j = 0; + nadds = 1; + } + } + + delete as; +} +// Cannot go higher than 17 because the benchmark doesn't +// iterate enough times. +BENCHMARK(BM_LargeZipf)->Arg(2<<9)->Arg(2<<16)->Arg(2<<17);