From bab9f23fa44031423630cb41180ebb1c7c26073f Mon Sep 17 00:00:00 2001 From: ampli Date: Mon, 7 Sep 2020 13:12:17 +0300 Subject: [PATCH 01/11] connectors.h: Move LENGTH_LIMIT --- link-grammar/connectors.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/link-grammar/connectors.h b/link-grammar/connectors.h index b46aca173..c7190f860 100644 --- a/link-grammar/connectors.h +++ b/link-grammar/connectors.h @@ -100,6 +100,9 @@ struct condesc_struct }; typedef struct condesc_struct condesc_t; +/* Length-limits for how far connectors can reach out. */ +#define UNLIMITED_LEN 255 + typedef struct length_limit_def { const char *defword; @@ -225,9 +228,6 @@ static inline Connector *connector_deepest(const Connector *c) return (Connector *)c; /* Note: Constness removed. */ } -/* Length-limits for how far connectors can reach out. */ -#define UNLIMITED_LEN 255 - /** * Returns TRUE if s and t match according to the connector matching * rules. The connector strings must be properly formed, starting with From 4d2c3f79060830c04edcd3c8e3a456827ccaa3c2 Mon Sep 17 00:00:00 2001 From: ampli Date: Mon, 7 Sep 2020 13:07:44 +0300 Subject: [PATCH 02/11] connectors.h: Comment cleanup --- link-grammar/connectors.h | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/link-grammar/connectors.h b/link-grammar/connectors.h index c7190f860..dca8d19fe 100644 --- a/link-grammar/connectors.h +++ b/link-grammar/connectors.h @@ -14,10 +14,10 @@ #ifndef _LINK_GRAMMAR_CONNECTORS_H_ #define _LINK_GRAMMAR_CONNECTORS_H_ -#include // for islower() +#include // islower() #include #include -#include // for uint8_t +#include // uint8_t ... #include "api-types.h" #include "error.h" @@ -151,14 +151,14 @@ struct Connector_struct const gword_set *originating_gword; /* Used while and after parsing */ struct { - int32_t refcount;/* Memory-sharing reference count - for pruning. */ - uint16_t exp_pos; /* The position in the originating expression, - currently used only for debugging dict macros. */ - bool shallow; /* TRUE if this is a shallow connector. - * A connectors is shallow if it is the first in - * its list on its disjunct. (It is deep if it is - * not the first in its list; it is deepest if it - * is the last on its list.) */ + int32_t refcount; /* Memory-sharing reference count - for pruning. */ + uint16_t exp_pos; /* The position in the originating expression, + * currently used only for debugging dict macros. */ + bool shallow; /* TRUE if this is a shallow connector. + * A connectors is shallow if it is the first in + * its list on its disjunct. (It is deep if it is + * not the first in its list; it is deepest if it + * is the last on its list.) */ }; }; }; @@ -383,17 +383,20 @@ static inline size_t pair_hash(int lw, int rw, /** * Get the word number of the given tracon. - * c is the leading tracon connector. The word number is extracted from - * the nearest_word of the deepest connector. + * It is extracted from the nearest_word of the deepest connector. + * @param c The leading tracon connector. + * @param dir Direction - 0: left; 1: right. + * @return Sentence word number. + * * This function depends on setup_connectors() (which initializes - * nearest_word). It should not be called after power_prune() (which - * changes nearest_word). + * nearest_word). It should not be called during or after power_prune() + * (which changes nearest_word). * * Note: An alternative for getting the word number of a tracon is to keep * it in the tracon list table or in a separate array. Both ways add * noticeable overhead, maybe due to the added CPU cache footprint. - * However, if the word number will be needed after power_prune() there - * will be a need to keep it in an alternative way. + * However, if the need arises for the word number of a tracon during/after + * power_prune(), there will be a need to keep it in an alternative way. */ static inline int get_tracon_word_number(Connector *c, int dir) { From 476f83c9c70d9eca66a76c068e2cd3ec56443cc0 Mon Sep 17 00:00:00 2001 From: ampli Date: Thu, 18 Apr 2024 22:21:50 +0300 Subject: [PATCH 03/11] .gitignore: Add default vim session file --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 6ddc45c12..2a6ec1f41 100644 --- a/.gitignore +++ b/.gitignore @@ -57,3 +57,4 @@ data/th/old_words/** # a.wasm is created by "emcc" (in "emconfigure ./configure"). # For now just ignore it. a.wasm +Session.vim From f46fe2227efd912d86775756d4e7ef304646ac5b Mon Sep 17 00:00:00 2001 From: ampli Date: Thu, 18 Apr 2024 23:53:45 +0300 Subject: [PATCH 04/11] tracon-set.c: Fix missing print_connector_list_str on DEBUG --- link-grammar/tracon-set.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/link-grammar/tracon-set.c b/link-grammar/tracon-set.c index 0847c5722..58575d760 100644 --- a/link-grammar/tracon-set.c +++ b/link-grammar/tracon-set.c @@ -15,11 +15,12 @@ #include "const-prime.h" #include "connectors.h" +#include "tracon-set.h" +#include "utilities.h" + #ifdef TRACON_SET_DEBUG #include "disjunct-utils.h" // print_connector_list_str #endif -#include "tracon-set.h" -#include "utilities.h" /** * This is an adaptation of the string_set module for detecting unique From 4adf270714e148049e001bf3f5da758601613c41 Mon Sep 17 00:00:00 2001 From: ampli Date: Fri, 19 Apr 2024 00:04:26 +0300 Subject: [PATCH 05/11] count_clauses(): Mark as unused These prevents omission warning on DEBUG, and also makes it available in gdb. --- link-grammar/prepare/build-disjuncts.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/link-grammar/prepare/build-disjuncts.c b/link-grammar/prepare/build-disjuncts.c index 84d1c37b3..15b301641 100644 --- a/link-grammar/prepare/build-disjuncts.c +++ b/link-grammar/prepare/build-disjuncts.c @@ -129,7 +129,7 @@ static void debug_last(Clause *c, Clause **c_last, const char *type) * Return the number of clauses that would be generated by expanding * the expression. */ -static unsigned long count_clauses(Exp *e) +GNUC_UNUSED static unsigned long count_clauses(Exp *e) { if (e->type == AND_type) { From 437038a2ee8efc30f991f424bc2ef1955c4f1471 Mon Sep 17 00:00:00 2001 From: ampli Date: Fri, 19 Apr 2024 01:45:14 +0300 Subject: [PATCH 06/11] UNINITIALIZED_MAX_DISJUNCTS: Change to INT_MAX The code in build_disjuncts_for_exp(), due to "int rand_r()", cannot handle more than INT_MAX disjuncts anyway, so use int here for consistency. In any case, the number of disjuncts per sentence is not expected to reach anything near this number (and here we handle the number of disjuncts per X_node). --- link-grammar/dict-common/dict-common.h | 4 +++- link-grammar/prepare/build-disjuncts.c | 7 ++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/link-grammar/dict-common/dict-common.h b/link-grammar/dict-common/dict-common.h index 4f4bba989..543a59846 100644 --- a/link-grammar/dict-common/dict-common.h +++ b/link-grammar/dict-common/dict-common.h @@ -14,6 +14,8 @@ #ifndef _LG_DICT_COMMON_H_ #define _LG_DICT_COMMON_H_ +#include // INT_MAX + #include "api-types.h" // pp_knowledge #include "connectors.h" // ConTable #include "dict-defines.h" @@ -34,7 +36,7 @@ * is used. */ static const float UNINITIALIZED_MAX_DISJUNCT_COST = -10000.0f; static const float DEFAULT_MAX_DISJUNCT_COST = 2.7f; -static const float UNINITIALIZED_MAX_DISJUNCTS = -1; +static const int UNINITIALIZED_MAX_DISJUNCTS = INT_MAX; /* We need some of these as literal strings. */ #define LG_DICTIONARY_VERSION_NUMBER "dictionary-version-number" diff --git a/link-grammar/prepare/build-disjuncts.c b/link-grammar/prepare/build-disjuncts.c index 15b301641..e875c19e8 100644 --- a/link-grammar/prepare/build-disjuncts.c +++ b/link-grammar/prepare/build-disjuncts.c @@ -358,9 +358,10 @@ Disjunct *build_disjuncts_for_exp(Sentence sent, Exp* exp, const char *word, * with uniform weighting; no attempt to look at the cost * is made. A fancier algo might selectively choose those * with lower cost. + * We don't care for now that this doesn't work if discnt > INT_MAX. */ - unsigned int maxdj = opts->max_disjuncts; - unsigned int discnt = count_disjuncts(dis); + int maxdj = opts->max_disjuncts; + int discnt = count_disjuncts(dis); if (discnt < maxdj) return dis; /* If we are here, we need to trim down the list */ @@ -369,7 +370,7 @@ Disjunct *build_disjuncts_for_exp(Sentence sent, Exp* exp, const char *word, Disjunct *ktail = dis; for (Disjunct *d = dis->next; d != NULL; d=d->next) { - unsigned int pick = rand_r(&rst) % discnt; + int pick = rand_r(&rst) % discnt; if (pick < maxdj) { ktail->next = d; From b674eb448a0af2614057e261a12bfa82b91860cc Mon Sep 17 00:00:00 2001 From: ampli Date: Tue, 23 Apr 2024 19:32:02 +0300 Subject: [PATCH 07/11] complete_command(): Init prev & addlen to avoid FP maybe-uninitialized clang 17.0.6. --- link-parser/lg_readline.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/link-parser/lg_readline.c b/link-parser/lg_readline.c index c7308a092..d2e872ba5 100644 --- a/link-parser/lg_readline.c +++ b/link-parser/lg_readline.c @@ -64,8 +64,8 @@ static char *complete_command(const wchar_t *input, size_t len, bool is_help) const Switch **start = NULL; const Switch **end; const Switch **match; - const char *prev; - size_t addlen; + const char *prev = NULL; + size_t addlen = 0; bool is_assignment = false; /* marking for the help facility */ if ((1 < len) && L'=' == input[len-1] && !is_help) From c0e14254668bec63fcac2ae530d91986ab58f526 Mon Sep 17 00:00:00 2001 From: ampli Date: Wed, 24 Apr 2024 00:40:58 +0300 Subject: [PATCH 08/11] swig: Remove -py3 (deprecated) --- bindings/python/Makefile.am | 2 +- debug/README.md | 2 +- msvc/Python3.vcxproj | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/bindings/python/Makefile.am b/bindings/python/Makefile.am index 11e2f5a42..878c47b3b 100644 --- a/bindings/python/Makefile.am +++ b/bindings/python/Makefile.am @@ -51,7 +51,7 @@ if HAVE_SWIG # Swig builds these .... $(built_c_sources) $(built_py_sources): $(SWIG_INCLUDES) $(built_c_sources) $(built_py_sources): $(SWIG_SOURCES) - $(AM_V_GEN) $(SWIG) -python -py3 -module clinkgrammar -I$(top_srcdir)/link-grammar -o $@ $< + $(AM_V_GEN) $(SWIG) -python -module clinkgrammar -I$(top_srcdir)/link-grammar -o $@ $< else $(built_c_sources) $(built_py_sources): touch $(built_c_sources) $(built_py_sources) diff --git a/debug/README.md b/debug/README.md index 0792f58e7..1710e5f8a 100644 --- a/debug/README.md +++ b/debug/README.md @@ -122,7 +122,7 @@ messages.) Or, in order to display the word array: -`link-parser -v=7 -debug=tokenize.c,print_sentence_word_alternatives` +`link-parser -v=8 -debug=build_sentence_expressions,print_sentence_word_alternatives` 5) Debug post-processing: diff --git a/msvc/Python3.vcxproj b/msvc/Python3.vcxproj index 1e1b1e8d6..f5d78ba21 100644 --- a/msvc/Python3.vcxproj +++ b/msvc/Python3.vcxproj @@ -24,22 +24,22 @@ <nul set/p x="Invoking "& where.exe swig.exe echo on cd $(IntDir) -swig.exe -c++ -python -py3 -outdir $(OutDir) -module clinkgrammar -I..\..\..\link-grammar -o lg_python_wrap.cpp "%(FullPath)" +swig.exe -c++ -python -outdir $(OutDir) -module clinkgrammar -I..\..\..\link-grammar -o lg_python_wrap.cpp "%(FullPath)" %40echo off <nul set/p x="Invoking "& where.exe swig.exe echo on cd $(IntDir) -swig.exe -c++ -python -py3 -outdir $(OutDir) -module clinkgrammar -I..\..\..\link-grammar -o lg_python_wrap.cpp "%(FullPath)" +swig.exe -c++ -python -outdir $(OutDir) -module clinkgrammar -I..\..\..\link-grammar -o lg_python_wrap.cpp "%(FullPath)" %40echo off <nul set/p x="Invoking "& where.exe swig.exe echo on cd $(IntDir) -swig.exe -c++ -python -py3 -outdir $(OutDir) -module clinkgrammar -I..\..\..\link-grammar -o lg_python_wrap.cpp "%(FullPath)" +swig.exe -c++ -python -outdir $(OutDir) -module clinkgrammar -I..\..\..\link-grammar -o lg_python_wrap.cpp "%(FullPath)" %40echo off <nul set/p x="Invoking "& where.exe swig.exe echo on cd $(IntDir) -swig.exe -c++ -python -py3 -outdir $(OutDir) -module clinkgrammar -I..\..\..\link-grammar -o lg_python_wrap.cpp "%(FullPath)" +swig.exe -c++ -python -outdir $(OutDir) -module clinkgrammar -I..\..\..\link-grammar -o lg_python_wrap.cpp "%(FullPath)" %40echo off Generating Python3 wrapper ^& interface $(IntDir)\lg_python_wrap.cpp;$(OutDir)\clinkgrammar.py @@ -266,4 +266,4 @@ swig.exe -c++ -python -py3 -outdir $(OutDir) -module clinkgrammar -I..\..\..\lin - \ No newline at end of file + From 4057d06b0423bbbda9cd08639f65d92f6daf4929 Mon Sep 17 00:00:00 2001 From: ampli Date: Wed, 24 Apr 2024 01:17:52 +0300 Subject: [PATCH 09/11] Fix comment typos/rot --- bindings/python-examples/tests.py | 4 ++-- link-grammar/api-structures.h | 6 +++--- link-grammar/connectors.h | 4 ++-- link-grammar/dict-common/regex-morph.c | 2 +- link-grammar/dict-file/dictionary.c | 2 +- link-grammar/disjunct-utils.c | 2 +- link-grammar/linkage/linkage.c | 3 +-- link-grammar/memory-pool.c | 2 +- link-grammar/parse/count.c | 4 ++-- link-grammar/prepare/build-disjuncts.c | 2 +- link-grammar/tokenize/word-structures.h | 2 +- link-parser/link-generator.c | 2 +- 12 files changed, 17 insertions(+), 18 deletions(-) diff --git a/bindings/python-examples/tests.py b/bindings/python-examples/tests.py index 0dc9ece04..cc9ab441d 100755 --- a/bindings/python-examples/tests.py +++ b/bindings/python-examples/tests.py @@ -1090,12 +1090,12 @@ def test_no_op_resolving(self): def test_resolving(self): """ - Test expression resolving using the default headline:4 setting from + Test expression resolving using the default headline:99 setting from data/en/4.0.dialect. """ dictnode = clg.dictionary_lookup_list(self.d._obj, sm('book.n')) exp_old = dictnode[0].exp - exp_new = clg.lg_exp_resolve(self.d._obj, exp_old, ParseOptions()._obj) # headline:4 + exp_new = clg.lg_exp_resolve(self.d._obj, exp_old, ParseOptions()._obj) # headline:99 # Find the 2 locations with a difference when comparing # exp_old to exp_new and validate them. diff --git a/link-grammar/api-structures.h b/link-grammar/api-structures.h index c308401a6..3924a947b 100644 --- a/link-grammar/api-structures.h +++ b/link-grammar/api-structures.h @@ -55,10 +55,10 @@ * included tracon sharing) should always be done. And now the overhead * is negligible. * - * Note: setting this to non-zero values disables some of the more + * Note: setting this to 254(MAX_SENTENCE) disables some of the more * subtle tracon encoding code, and thus can be used to create a - * baseline parse, skipping that code. This can be setin with the - * test_enabled("min-len-encoding") flag (see api.c) + * baseline parse, skipping that code. This can be done using + * -test="min-len-encoding:254" (see sentence.c). */ #define SENTENCE_MIN_LENGTH_TRAILING_HASH 0 diff --git a/link-grammar/connectors.h b/link-grammar/connectors.h index dca8d19fe..b6c4bc5bf 100644 --- a/link-grammar/connectors.h +++ b/link-grammar/connectors.h @@ -148,14 +148,14 @@ struct Connector_struct Connector *next; union { - const gword_set *originating_gword; /* Used while and after parsing */ + const gword_set *originating_gword; /* Used while and after parsing. */ struct { int32_t refcount; /* Memory-sharing reference count - for pruning. */ uint16_t exp_pos; /* The position in the originating expression, * currently used only for debugging dict macros. */ bool shallow; /* TRUE if this is a shallow connector. - * A connectors is shallow if it is the first in + * A connector is shallow if it is the first in * its list on its disjunct. (It is deep if it is * not the first in its list; it is deepest if it * is the last on its list.) */ diff --git a/link-grammar/dict-common/regex-morph.c b/link-grammar/dict-common/regex-morph.c index 5010ee641..e01881fa1 100644 --- a/link-grammar/dict-common/regex-morph.c +++ b/link-grammar/dict-common/regex-morph.c @@ -347,7 +347,7 @@ static void reg_free(Regex_node *rn) /** * Check the specified capture group of the pattern (if any). - * Return true if no capture group specified if it is valid, + * Return true if no capture group specified or if it is valid, * and -1 on error. * * Algo: Append the specified capture group specification to the pattern diff --git a/link-grammar/dict-file/dictionary.c b/link-grammar/dict-file/dictionary.c index 9feedee2a..e2252d400 100644 --- a/link-grammar/dict-file/dictionary.c +++ b/link-grammar/dict-file/dictionary.c @@ -132,7 +132,7 @@ static bool load_regexes(Dictionary dict, const char *regex_name) } /** - * Read dictionary entries from a wide-character string "input". + * Read dictionary entries from a utf-8 string "input". * All other parts are read from files. */ #define D_DICT 10 diff --git a/link-grammar/disjunct-utils.c b/link-grammar/disjunct-utils.c index 98b629c05..b5e456ab1 100644 --- a/link-grammar/disjunct-utils.c +++ b/link-grammar/disjunct-utils.c @@ -265,7 +265,7 @@ static bool disjuncts_equal(Disjunct * d1, Disjunct * d2, bool ignore_string) /* A shortcut to detect NULL and non-NULL jets on the same side. * Note that it is not possible to share memory between the - * right/left jets due to filed value differences (sharing would + * right/left jets due to field value differences (sharing would * invalidate this check). */ if (d1->left == d2->right) return false; diff --git a/link-grammar/linkage/linkage.c b/link-grammar/linkage/linkage.c index 62f85aa6d..7b61760fb 100644 --- a/link-grammar/linkage/linkage.c +++ b/link-grammar/linkage/linkage.c @@ -127,10 +127,9 @@ static Gword *wordgraph_null_join(Sentence sent, Gword **start, Gword **end) * Add a display wordgraph placeholder for a combined morpheme with links * that are not discardable. * This is needed only when hiding morphology. This is a kind of a hack. - * It it is not deemed nice, the "hide morphology" mode should just not be + * If it is not deemed nice, the "hide morphology" mode should just not be * used for languages with morphemes which have links that cannot be * discarded on that mode (like Hebrew). - * Possible FIXME: Currently it is also used by w/ in English. */ static Gword *wordgraph_link_placeholder(Sentence sent, Gword *w) { diff --git a/link-grammar/memory-pool.c b/link-grammar/memory-pool.c index 5e7895995..4a4d9272e 100644 --- a/link-grammar/memory-pool.c +++ b/link-grammar/memory-pool.c @@ -44,7 +44,7 @@ static size_t align_size(size_t element_size) * Create a memory pool descriptor. * 1. If required, set the allocation size to a power of 2 of the element size. * 2. Save the given parameters in the pool descriptor, to be used by - * pool_alloc(); + * pool_alloc_vec(); * 3. Chain the pool descriptor to the given pool_list, so it can be * automatically freed. [ Not implemented. ] */ diff --git a/link-grammar/parse/count.c b/link-grammar/parse/count.c index 16ddb10cd..cec66d1b0 100644 --- a/link-grammar/parse/count.c +++ b/link-grammar/parse/count.c @@ -31,7 +31,7 @@ /* This file contains the exhaustive search algorithm. */ -#define D_COUNT 5 /* General debug level for this file. */ +#define D_COUNT 5 /* General debug level for this file */ typedef uint8_t null_count_m; /* Storage representation of null_count */ typedef uint8_t WordIdx_m; /* Storage representation of word index */ @@ -571,7 +571,7 @@ static Count_bin table_store(count_context_t *ctxt, if (!USE_TABLE_TRACON) { - // In case a table count already exist, check its consistency. + // In case a table count already exists, check its consistency. Count_bin *e = table_lookup(ctxt, lw, rw, le, re, null_count, NULL); if (e != NULL) { diff --git a/link-grammar/prepare/build-disjuncts.c b/link-grammar/prepare/build-disjuncts.c index e875c19e8..45ac3db33 100644 --- a/link-grammar/prepare/build-disjuncts.c +++ b/link-grammar/prepare/build-disjuncts.c @@ -350,7 +350,7 @@ Disjunct *build_disjuncts_for_exp(Sentence sent, Exp* exp, const char *word, pool_reuse(ct.Clause_pool); pool_reuse(ct.Tconnector_pool); - /* We are done, in the concvetional case. */ + /* We are done, in the convectional case. */ if (NULL == opts || 0 == opts->max_disjuncts) return dis; /* If there are more than the allowed number of disjuncts, diff --git a/link-grammar/tokenize/word-structures.h b/link-grammar/tokenize/word-structures.h index e6a8007e3..c4efcb75a 100644 --- a/link-grammar/tokenize/word-structures.h +++ b/link-grammar/tokenize/word-structures.h @@ -43,7 +43,7 @@ struct Word_struct X_node * x; /* Sentence starts out with these, */ Disjunct * d; /* eventually these get generated. */ - uint32_t num_disjuncts; /* Length of above */ + uint32_t num_disjuncts; /* Length of above. */ bool optional; /* Linkage is optional. */ diff --git a/link-parser/link-generator.c b/link-parser/link-generator.c index 77d331a6a..350cbf409 100644 --- a/link-parser/link-generator.c +++ b/link-parser/link-generator.c @@ -61,7 +61,7 @@ typedef struct /* Originally, this program used argp, but now it uses getopt in * order to make the porting to MS Windows easy. The original * definitions are still being used here because they are more readable - * and the also allow easy a dynamic generation of an help message. + * and also allow an easy dynamic generation of a help message. * They are converted to getopt options. Only the minimal needed * conversion is done (e.g. flags are not supported). */ From adabe9c3228de1890d9554f2ae666f45d4d795a1 Mon Sep 17 00:00:00 2001 From: ampli Date: Wed, 24 Apr 2024 01:12:02 +0300 Subject: [PATCH 10/11] do_count(): Add USE_PSEUDOCOUNT for debug --- link-grammar/parse/count.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/link-grammar/parse/count.c b/link-grammar/parse/count.c index cec66d1b0..4573dbfcc 100644 --- a/link-grammar/parse/count.c +++ b/link-grammar/parse/count.c @@ -39,8 +39,9 @@ typedef uint8_t WordIdx_m; /* Storage representation of word index */ /* Allow to disable the use of the various caches (for debug). */ const bool ENABLE_WORD_SKIP_VECTOR = true; const bool ENABLE_MATCH_LIST_CACHE = true; -const bool ENABLE_TABLE_LRCNT = true; // Also controls the above two caches. -const bool USE_TABLE_TRACON = true; // The table is always maintained. +const bool ENABLE_TABLE_LRCNT = true; // Also controls the above two caches. +const bool USE_TABLE_TRACON = true; // The table is always maintained. +const bool USE_PSEUDOCOUNT = true; // Controls only the non-cyclic solutions. typedef struct Table_tracon_s Table_tracon; struct Table_tracon_s @@ -971,6 +972,7 @@ static Count_bin table_count(count_context_t * ctxt, return *count; } +#ifdef USE_PSEUDOCOUNT /** * Check to see if a parse is even possible, so that we don't later waste * CPU time performing an actual count, only to discover that it is zero. @@ -1007,6 +1009,7 @@ static bool pseudocount(count_context_t * ctxt, Count_bin *count, return false; } +#endif // USE_PSEUDOCOUNT /** * Return the number of optional words strictly between w1 and w2. @@ -1529,18 +1532,23 @@ static Count_bin do_count(const char dlabel[], count_context_t *ctxt, * lookup can be skipped in cases we cannot skip the actual * calculation and a table entry exists. */ Count_bin lcount[4] = { NO_COUNT, NO_COUNT, NO_COUNT, NO_COUNT }; + Count_bin rcount[4] = { NO_COUNT, NO_COUNT, NO_COUNT, NO_COUNT }; +#ifdef USE_PSEUDOCOUNT if (Lmatch && !leftpcount) { leftpcount = pseudocount(ctxt, lcount, lw, w, le, d->left, lnull_cnt); } - Count_bin rcount[4] = { NO_COUNT, NO_COUNT, NO_COUNT, NO_COUNT }; if (Rmatch && !rightpcount && (leftpcount || (le == NULL))) { rightpcount = pseudocount(ctxt, rcount, w, rw, d->right, re, rnull_cnt); } +#else + leftpcount = Lmatch; + rightpcount = Rmatch; +#endif // USE_PSEUDOCOUNT /* Perform a table lookup for a possible cyclic solution. */ if (leftpcount) From c3a00995dae8d9502727902f909d002c22d161f3 Mon Sep 17 00:00:00 2001 From: ampli Date: Mon, 22 Apr 2024 01:01:54 +0300 Subject: [PATCH 11/11] word_new(): Zero all fields ... so we will not need to update it if Word_struct get changed. --- link-grammar/tokenize/tokenize.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/link-grammar/tokenize/tokenize.c b/link-grammar/tokenize/tokenize.c index a3fdca9b6..613a0ab88 100644 --- a/link-grammar/tokenize/tokenize.c +++ b/link-grammar/tokenize/tokenize.c @@ -3124,12 +3124,7 @@ static Word *word_new(Sentence sent) const size_t len = sent->length; sent->word = realloc(sent->word, (len+1)*sizeof(*sent->word)); - sent->word[len].d = NULL; - sent->word[len].x = NULL; - sent->word[len].unsplit_word = NULL; - sent->word[len].alternatives = NULL; - sent->word[len].gwords = NULL; - sent->word[len].optional = false; + memset(&sent->word[len], 0, sizeof(sent->word[0])); sent->length++; return &sent->word[len];