Skip to content

Commit

Permalink
Merge pull request #1506 from ampli/small-fixes
Browse files Browse the repository at this point in the history
Small fixes
  • Loading branch information
linas authored Apr 24, 2024
2 parents 77254ab + c3a0099 commit ea94b09
Show file tree
Hide file tree
Showing 20 changed files with 71 additions and 61 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,4 @@ data/th/old_words/**
# a.wasm is created by "emcc" (in "emconfigure ./configure").
# For now just ignore it.
a.wasm
Session.vim
4 changes: 2 additions & 2 deletions bindings/python-examples/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -1090,12 +1090,12 @@ def test_no_op_resolving(self):

def test_resolving(self):
"""
Test expression resolving using the default headline:4 setting from
Test expression resolving using the default headline:99 setting from
data/en/4.0.dialect.
"""
dictnode = clg.dictionary_lookup_list(self.d._obj, sm('book.n'))
exp_old = dictnode[0].exp
exp_new = clg.lg_exp_resolve(self.d._obj, exp_old, ParseOptions()._obj) # headline:4
exp_new = clg.lg_exp_resolve(self.d._obj, exp_old, ParseOptions()._obj) # headline:99

# Find the 2 locations with a difference when comparing
# exp_old to exp_new and validate them.
Expand Down
2 changes: 1 addition & 1 deletion bindings/python/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ if HAVE_SWIG
# Swig builds these ....
$(built_c_sources) $(built_py_sources): $(SWIG_INCLUDES)
$(built_c_sources) $(built_py_sources): $(SWIG_SOURCES)
$(AM_V_GEN) $(SWIG) -python -py3 -module clinkgrammar -I$(top_srcdir)/link-grammar -o $@ $<
$(AM_V_GEN) $(SWIG) -python -module clinkgrammar -I$(top_srcdir)/link-grammar -o $@ $<
else
$(built_c_sources) $(built_py_sources):
touch $(built_c_sources) $(built_py_sources)
Expand Down
2 changes: 1 addition & 1 deletion debug/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ messages.)

Or, in order to display the word array:

`link-parser -v=7 -debug=tokenize.c,print_sentence_word_alternatives`
`link-parser -v=8 -debug=build_sentence_expressions,print_sentence_word_alternatives`

5) Debug post-processing:

Expand Down
6 changes: 3 additions & 3 deletions link-grammar/api-structures.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,10 @@
* included tracon sharing) should always be done. And now the overhead
* is negligible.
*
* Note: setting this to non-zero values disables some of the more
* Note: setting this to 254(MAX_SENTENCE) disables some of the more
* subtle tracon encoding code, and thus can be used to create a
* baseline parse, skipping that code. This can be setin with the
* test_enabled("min-len-encoding") flag (see api.c)
* baseline parse, skipping that code. This can be done using
* -test="min-len-encoding:254" (see sentence.c).
*/
#define SENTENCE_MIN_LENGTH_TRAILING_HASH 0

Expand Down
43 changes: 23 additions & 20 deletions link-grammar/connectors.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@
#ifndef _LINK_GRAMMAR_CONNECTORS_H_
#define _LINK_GRAMMAR_CONNECTORS_H_

#include <ctype.h> // for islower()
#include <ctype.h> // islower()
#include <stdlib.h>
#include <stdbool.h>
#include <stdint.h> // for uint8_t
#include <stdint.h> // uint8_t ...

#include "api-types.h"
#include "error.h"
Expand Down Expand Up @@ -100,6 +100,9 @@ struct condesc_struct
};
typedef struct condesc_struct condesc_t;

/* Length-limits for how far connectors can reach out. */
#define UNLIMITED_LEN 255

typedef struct length_limit_def
{
const char *defword;
Expand Down Expand Up @@ -145,17 +148,17 @@ struct Connector_struct
Connector *next;
union
{
const gword_set *originating_gword; /* Used while and after parsing */
const gword_set *originating_gword; /* Used while and after parsing. */
struct
{
int32_t refcount;/* Memory-sharing reference count - for pruning. */
uint16_t exp_pos; /* The position in the originating expression,
currently used only for debugging dict macros. */
bool shallow; /* TRUE if this is a shallow connector.
* A connectors is shallow if it is the first in
* its list on its disjunct. (It is deep if it is
* not the first in its list; it is deepest if it
* is the last on its list.) */
int32_t refcount; /* Memory-sharing reference count - for pruning. */
uint16_t exp_pos; /* The position in the originating expression,
* currently used only for debugging dict macros. */
bool shallow; /* TRUE if this is a shallow connector.
* A connector is shallow if it is the first in
* its list on its disjunct. (It is deep if it is
* not the first in its list; it is deepest if it
* is the last on its list.) */
};
};
};
Expand Down Expand Up @@ -225,9 +228,6 @@ static inline Connector *connector_deepest(const Connector *c)
return (Connector *)c; /* Note: Constness removed. */
}

/* Length-limits for how far connectors can reach out. */
#define UNLIMITED_LEN 255

/**
* Returns TRUE if s and t match according to the connector matching
* rules. The connector strings must be properly formed, starting with
Expand Down Expand Up @@ -383,17 +383,20 @@ static inline size_t pair_hash(int lw, int rw,

/**
* Get the word number of the given tracon.
* c is the leading tracon connector. The word number is extracted from
* the nearest_word of the deepest connector.
* It is extracted from the nearest_word of the deepest connector.
* @param c The leading tracon connector.
* @param dir Direction - 0: left; 1: right.
* @return Sentence word number.
*
* This function depends on setup_connectors() (which initializes
* nearest_word). It should not be called after power_prune() (which
* changes nearest_word).
* nearest_word). It should not be called during or after power_prune()
* (which changes nearest_word).
*
* Note: An alternative for getting the word number of a tracon is to keep
* it in the tracon list table or in a separate array. Both ways add
* noticeable overhead, maybe due to the added CPU cache footprint.
* However, if the word number will be needed after power_prune() there
* will be a need to keep it in an alternative way.
* However, if the need arises for the word number of a tracon during/after
* power_prune(), there will be a need to keep it in an alternative way.
*/
static inline int get_tracon_word_number(Connector *c, int dir)
{
Expand Down
4 changes: 3 additions & 1 deletion link-grammar/dict-common/dict-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
#ifndef _LG_DICT_COMMON_H_
#define _LG_DICT_COMMON_H_

#include <limits.h> // INT_MAX

#include "api-types.h" // pp_knowledge
#include "connectors.h" // ConTable
#include "dict-defines.h"
Expand All @@ -34,7 +36,7 @@
* is used. */
static const float UNINITIALIZED_MAX_DISJUNCT_COST = -10000.0f;
static const float DEFAULT_MAX_DISJUNCT_COST = 2.7f;
static const float UNINITIALIZED_MAX_DISJUNCTS = -1;
static const int UNINITIALIZED_MAX_DISJUNCTS = INT_MAX;

/* We need some of these as literal strings. */
#define LG_DICTIONARY_VERSION_NUMBER "dictionary-version-number"
Expand Down
2 changes: 1 addition & 1 deletion link-grammar/dict-common/regex-morph.c
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ static void reg_free(Regex_node *rn)

/**
* Check the specified capture group of the pattern (if any).
* Return true if no capture group specified if it is valid,
* Return true if no capture group specified or if it is valid,
* and -1 on error.
*
* Algo: Append the specified capture group specification to the pattern
Expand Down
2 changes: 1 addition & 1 deletion link-grammar/dict-file/dictionary.c
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ static bool load_regexes(Dictionary dict, const char *regex_name)
}

/**
* Read dictionary entries from a wide-character string "input".
* Read dictionary entries from a utf-8 string "input".
* All other parts are read from files.
*/
#define D_DICT 10
Expand Down
2 changes: 1 addition & 1 deletion link-grammar/disjunct-utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ static bool disjuncts_equal(Disjunct * d1, Disjunct * d2, bool ignore_string)

/* A shortcut to detect NULL and non-NULL jets on the same side.
* Note that it is not possible to share memory between the
* right/left jets due to filed value differences (sharing would
* right/left jets due to field value differences (sharing would
* invalidate this check). */
if (d1->left == d2->right) return false;

Expand Down
3 changes: 1 addition & 2 deletions link-grammar/linkage/linkage.c
Original file line number Diff line number Diff line change
Expand Up @@ -127,10 +127,9 @@ static Gword *wordgraph_null_join(Sentence sent, Gword **start, Gword **end)
* Add a display wordgraph placeholder for a combined morpheme with links
* that are not discardable.
* This is needed only when hiding morphology. This is a kind of a hack.
* It it is not deemed nice, the "hide morphology" mode should just not be
* If it is not deemed nice, the "hide morphology" mode should just not be
* used for languages with morphemes which have links that cannot be
* discarded on that mode (like Hebrew).
* Possible FIXME: Currently it is also used by w/ in English.
*/
static Gword *wordgraph_link_placeholder(Sentence sent, Gword *w)
{
Expand Down
2 changes: 1 addition & 1 deletion link-grammar/memory-pool.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ static size_t align_size(size_t element_size)
* Create a memory pool descriptor.
* 1. If required, set the allocation size to a power of 2 of the element size.
* 2. Save the given parameters in the pool descriptor, to be used by
* pool_alloc();
* pool_alloc_vec();
* 3. Chain the pool descriptor to the given pool_list, so it can be
* automatically freed. [ Not implemented. ]
*/
Expand Down
18 changes: 13 additions & 5 deletions link-grammar/parse/count.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,17 @@

/* This file contains the exhaustive search algorithm. */

#define D_COUNT 5 /* General debug level for this file. */
#define D_COUNT 5 /* General debug level for this file */

typedef uint8_t null_count_m; /* Storage representation of null_count */
typedef uint8_t WordIdx_m; /* Storage representation of word index */

/* Allow to disable the use of the various caches (for debug). */
const bool ENABLE_WORD_SKIP_VECTOR = true;
const bool ENABLE_MATCH_LIST_CACHE = true;
const bool ENABLE_TABLE_LRCNT = true; // Also controls the above two caches.
const bool USE_TABLE_TRACON = true; // The table is always maintained.
const bool ENABLE_TABLE_LRCNT = true; // Also controls the above two caches.
const bool USE_TABLE_TRACON = true; // The table is always maintained.
const bool USE_PSEUDOCOUNT = true; // Controls only the non-cyclic solutions.

typedef struct Table_tracon_s Table_tracon;
struct Table_tracon_s
Expand Down Expand Up @@ -571,7 +572,7 @@ static Count_bin table_store(count_context_t *ctxt,

if (!USE_TABLE_TRACON)
{
// In case a table count already exist, check its consistency.
// In case a table count already exists, check its consistency.
Count_bin *e = table_lookup(ctxt, lw, rw, le, re, null_count, NULL);
if (e != NULL)
{
Expand Down Expand Up @@ -971,6 +972,7 @@ static Count_bin table_count(count_context_t * ctxt,
return *count;
}

#ifdef USE_PSEUDOCOUNT
/**
* Check to see if a parse is even possible, so that we don't later waste
* CPU time performing an actual count, only to discover that it is zero.
Expand Down Expand Up @@ -1007,6 +1009,7 @@ static bool pseudocount(count_context_t * ctxt, Count_bin *count,

return false;
}
#endif // USE_PSEUDOCOUNT

/**
* Return the number of optional words strictly between w1 and w2.
Expand Down Expand Up @@ -1529,18 +1532,23 @@ static Count_bin do_count(const char dlabel[], count_context_t *ctxt,
* lookup can be skipped in cases we cannot skip the actual
* calculation and a table entry exists. */
Count_bin lcount[4] = { NO_COUNT, NO_COUNT, NO_COUNT, NO_COUNT };
Count_bin rcount[4] = { NO_COUNT, NO_COUNT, NO_COUNT, NO_COUNT };
#ifdef USE_PSEUDOCOUNT
if (Lmatch && !leftpcount)
{
leftpcount =
pseudocount(ctxt, lcount, lw, w, le, d->left, lnull_cnt);
}

Count_bin rcount[4] = { NO_COUNT, NO_COUNT, NO_COUNT, NO_COUNT };
if (Rmatch && !rightpcount && (leftpcount || (le == NULL)))
{
rightpcount =
pseudocount(ctxt, rcount, w, rw, d->right, re, rnull_cnt);
}
#else
leftpcount = Lmatch;
rightpcount = Rmatch;
#endif // USE_PSEUDOCOUNT

/* Perform a table lookup for a possible cyclic solution. */
if (leftpcount)
Expand Down
11 changes: 6 additions & 5 deletions link-grammar/prepare/build-disjuncts.c
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ static void debug_last(Clause *c, Clause **c_last, const char *type)
* Return the number of clauses that would be generated by expanding
* the expression.
*/
static unsigned long count_clauses(Exp *e)
GNUC_UNUSED static unsigned long count_clauses(Exp *e)
{
if (e->type == AND_type)
{
Expand Down Expand Up @@ -350,17 +350,18 @@ Disjunct *build_disjuncts_for_exp(Sentence sent, Exp* exp, const char *word,
pool_reuse(ct.Clause_pool);
pool_reuse(ct.Tconnector_pool);

/* We are done, in the concvetional case. */
/* We are done, in the convectional case. */
if (NULL == opts || 0 == opts->max_disjuncts) return dis;

/* If there are more than the allowed number of disjuncts,
* then randomly discard some of them. The discard is done
* with uniform weighting; no attempt to look at the cost
* is made. A fancier algo might selectively choose those
* with lower cost.
* We don't care for now that this doesn't work if discnt > INT_MAX.
*/
unsigned int maxdj = opts->max_disjuncts;
unsigned int discnt = count_disjuncts(dis);
int maxdj = opts->max_disjuncts;
int discnt = count_disjuncts(dis);
if (discnt < maxdj) return dis;

/* If we are here, we need to trim down the list */
Expand All @@ -369,7 +370,7 @@ Disjunct *build_disjuncts_for_exp(Sentence sent, Exp* exp, const char *word,
Disjunct *ktail = dis;
for (Disjunct *d = dis->next; d != NULL; d=d->next)
{
unsigned int pick = rand_r(&rst) % discnt;
int pick = rand_r(&rst) % discnt;
if (pick < maxdj)
{
ktail->next = d;
Expand Down
7 changes: 1 addition & 6 deletions link-grammar/tokenize/tokenize.c
Original file line number Diff line number Diff line change
Expand Up @@ -3124,12 +3124,7 @@ static Word *word_new(Sentence sent)
const size_t len = sent->length;

sent->word = realloc(sent->word, (len+1)*sizeof(*sent->word));
sent->word[len].d = NULL;
sent->word[len].x = NULL;
sent->word[len].unsplit_word = NULL;
sent->word[len].alternatives = NULL;
sent->word[len].gwords = NULL;
sent->word[len].optional = false;
memset(&sent->word[len], 0, sizeof(sent->word[0]));
sent->length++;

return &sent->word[len];
Expand Down
2 changes: 1 addition & 1 deletion link-grammar/tokenize/word-structures.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ struct Word_struct

X_node * x; /* Sentence starts out with these, */
Disjunct * d; /* eventually these get generated. */
uint32_t num_disjuncts; /* Length of above */
uint32_t num_disjuncts; /* Length of above. */

bool optional; /* Linkage is optional. */

Expand Down
5 changes: 3 additions & 2 deletions link-grammar/tracon-set.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,12 @@

#include "const-prime.h"
#include "connectors.h"
#include "tracon-set.h"
#include "utilities.h"

#ifdef TRACON_SET_DEBUG
#include "disjunct-utils.h" // print_connector_list_str
#endif
#include "tracon-set.h"
#include "utilities.h"

/**
* This is an adaptation of the string_set module for detecting unique
Expand Down
4 changes: 2 additions & 2 deletions link-parser/lg_readline.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@ static char *complete_command(const wchar_t *input, size_t len, bool is_help)
const Switch **start = NULL;
const Switch **end;
const Switch **match;
const char *prev;
size_t addlen;
const char *prev = NULL;
size_t addlen = 0;
bool is_assignment = false; /* marking for the help facility */

if ((1 < len) && L'=' == input[len-1] && !is_help)
Expand Down
2 changes: 1 addition & 1 deletion link-parser/link-generator.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ typedef struct
/* Originally, this program used argp, but now it uses getopt in
* order to make the porting to MS Windows easy. The original
* definitions are still being used here because they are more readable
* and the also allow easy a dynamic generation of an help message.
* and also allow an easy dynamic generation of a help message.
* They are converted to getopt options. Only the minimal needed
* conversion is done (e.g. flags are not supported).
*/
Expand Down
Loading

0 comments on commit ea94b09

Please sign in to comment.