Skip to content

Commit

Permalink
Merge pull request #1520 from ampli/mlist-nosort
Browse files Browse the repository at this point in the history
linkage_equiv_p(): Bugfix string comparisons
  • Loading branch information
linas authored May 4, 2024
2 parents ea8c04e + 058eff4 commit 65afe01
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 15 deletions.
6 changes: 4 additions & 2 deletions link-grammar/linkage/analyze-linkage.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,10 @@
* If the GCD is equal to one of them, a pointer to it is returned.
* Otherwise a new string for the GCD is put in the string set.
*
* Note: The head and dependent indicators (lower-case h and d) are
* ignored, as the intersection cannot include them.
* Notes:
* 1. The head and dependent indicators (lower-case h and d) are
* ignored, as the intersection cannot include them.
* 2. The returned strings is not always in the same string set.
*/
const char *intersect_strings(String_set *sset, const Connector *c1,
const Connector *c2)
Expand Down
19 changes: 13 additions & 6 deletions link-grammar/parse/parse.c
Original file line number Diff line number Diff line change
Expand Up @@ -312,10 +312,12 @@ static int linkage_equiv_p(Linkage lpv, Linkage lnx)
Link * plk = &lpv->link_array[li];
Link * nlk = &lnx->link_array[li];

// String set guarantees that if the pointer differs,
// then the string does too.
if (plk->link_name != nlk->link_name)
return strcmp(plk->link_name, nlk->link_name);
// Note (see intersect_strings()):
// link_name is not always in the same string set, so inequality
// test cannot be done here.
if (plk->link_name == nlk->link_name) continue;
int lncmp = strcmp(plk->link_name, nlk->link_name);
if (lncmp) return lncmp;
}

// Compare words. The chosen_disjuncts->word_string is the
Expand All @@ -338,8 +340,13 @@ static int linkage_equiv_p(Linkage lpv, Linkage lnx)
if (NULL == ndj) continue;
return 1;
}
if (pdj->word_string != ndj->word_string)
return strcmp(pdj->word_string, ndj->word_string);

// Note (see build_word_expressions()):
// word_string is not always in the same string set, so inequality
// test cannot be done here.
if (pdj->word_string == ndj->word_string) continue;
int wscmp = strcmp(pdj->word_string, ndj->word_string);
if (wscmp) return wscmp;
}

// Compare connector types at the link endpoints. If we are here,
Expand Down
17 changes: 10 additions & 7 deletions link-grammar/tokenize/lookup-exprs.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,16 @@ static Dict_node *dictionary_all_categories(Dictionary dict)
/**
* build_word_expressions() -- build list of expressions for a word.
*
* Looks up a word in the dictionary, fetching from it matching words and their
* expressions. Returns NULL if it's not there. If there, it builds the list
* of expressions for the word, and returns a pointer to it.
* The subword of Gword w is used for this lookup, unless the subword is
* explicitly given as parameter s. The subword of Gword w is always used as
* the base word for each expression, and its subscript is the one from the
* dictionary word of the expression.
* Look up the subword of \p w in the dictionary, fetching from it
* matching words and their expressions. Return NULL if it's not there.
* If there, build an X_node list for the word and return it.
*
* For each dictionary word and its expression, set the X_node string
* and exp fields accordingly. However, if a non-NULL \p s is provided,
* use it instead of the dictionary word, and as its subscript, use the
* subscript of the dictionary word. Note that this means that the
* X_node strings (which are later assigned to the disjuncts that are
* derived from them) may be from two different string sets.
*/
static X_node * build_word_expressions(Sentence sent, const Gword *w,
const char *s, Parse_Options opts)
Expand Down

0 comments on commit 65afe01

Please sign in to comment.