Skip to content

Commit

Permalink
Put casing of &ADDED word on first non-added to the right
Browse files Browse the repository at this point in the history
Also changes withCasing to apply lowering where casing==lower – should
have no effect unless we're changing casing like this, since getCasing
only returns lower if there are no uppers.

Closes #77
  • Loading branch information
unhammer committed Sep 19, 2024
1 parent 9496db4 commit c33489a
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 17 deletions.
30 changes: 18 additions & 12 deletions src/suggest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -576,22 +576,11 @@ if(verbose) std::cerr << "\033[1;33mright=\t" << i_right << "\033[0m" << std::en
UStringVector reps = {u""};
UStringVector reps_suggestwf = {}; // If we're doing SUGGESTWF, we ignore reps
string prev_added_before_blank = "";
std::optional<Casing> addedcasing = std::nullopt;
for (size_t i = i_left; i <= i_right; ++i) {
const auto& trg = sentence.cohorts[i];
Casing casing = getCasing(toUtf8(trg.form));

// std::cerr << "\033[0;35mtrg.added=\t" << trg.added << " i=" << i << "i_left" << i_left<< "\033[0m" << std::endl;
if(trg.added) {
for(size_t j = i; j <= i_right; j++) {
const auto& right_of_trg = sentence.cohorts[j];
if(!right_of_trg.added) {
// std::cerr << "\033[1;35mright_of_added=\t" << toUtf8(right_of_trg.form) << " j=" << j << "\033[0m" << std::endl;
casing = getCasing(toUtf8(right_of_trg.form));
break;
}
}
}

if(verbose) std::cerr << "\033[1;34mi=\t" << i << "\033[0m" << std::endl;
if(verbose) std::cerr << "\033[1;34mtrg.form=\t'" << toUtf8(trg.form) << "'\033[0m" << std::endl;
if(verbose) std::cerr << "\033[1;34mtrg.id=\t" << trg.id << "\033[0m" << std::endl;
Expand All @@ -604,6 +593,23 @@ if(verbose) std::cerr << "\033[1;35mtrg.raw_pre_blank=\t'" << trg.raw_pre_blank
if(verbose) std::cerr << "\t\t\033[1;36mdelete=\t" << toUtf8(trg.form) << "\033[0m" << std::endl;
}

if(trg.added) {
// This word was added, get casing from a non-added word to the right:
for(size_t j = i; j <= i_right; j++) {
const auto& right_of_trg = sentence.cohorts[j];
if(!right_of_trg.added) {
addedcasing = casing;
casing = getCasing(toUtf8(right_of_trg.form));
break;
}
}
}
else if(addedcasing.has_value() && !del) {
// This word was not &ADDED, but is preceded by an added word:
casing = addedcasing.value();
addedcasing = std::nullopt;
}

bool added_before_blank = false;
bool fixedcase = false;
bool applies_deletion = trg.id == src.id && src_applies_deletion;
Expand Down
14 changes: 11 additions & 3 deletions src/suggest.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,12 @@
# include <hfst/HfstTransducer.h>
// variants:
# include <variant>
# include <optional>

namespace divvun {

using std::variant;
using std::optional;
using std::pair;
using std::string;
using std::stringstream;
Expand Down Expand Up @@ -144,10 +146,16 @@ inline std::string totitle(const string& input) {
std::transform(w.begin(), w.begin() + 1, w.begin(), std::towupper);
return wideToUtf8(w);
}

inline std::string tolower(const string& input) {
std::wstring w = wideFromUtf8(input);
setlocale(LC_ALL, "");
std::transform(w.begin(), w.begin() + 1, w.begin(), std::towlower);
return wideToUtf8(w);
}
// #endif

inline std::string withCasing(
bool fixedcase, const Casing& inputCasing, const string& input) {
inline std::string withCasing(bool fixedcase, const Casing& inputCasing, const string& input) {
if (fixedcase) {
return input;
}
Expand All @@ -159,7 +167,7 @@ inline std::string withCasing(
case mIxed:
return input;
case lower:
return input;
return tolower(input);
}
// should never get to this point
return input;
Expand Down
2 changes: 1 addition & 1 deletion test/suggest/expected.move-after.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"errs":[["Nrel mellom nabs",4,20,"syn-abs-wordorder","syn-abs-wordorder",["Nabs Nrel mellom"],"syn-abs-wordorder"]],"text":"før Nrel mellom nabs"}
{"errs":[["Nrel mellom nabs",4,20,"syn-abs-wordorder","syn-abs-wordorder",["Nabs nrel mellom"],"syn-abs-wordorder"]],"text":"før Nrel mellom nabs"}
2 changes: 1 addition & 1 deletion test/suggest/expected.move.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"errs":[["Nrel mellom nabs",0,16,"syn-abs-wordorder","syn-abs-wordorder",["Nabs Nrel mellom"],"syn-abs-wordorder"]],"text":"Nrel mellom nabs"}
{"errs":[["Nrel mellom nabs",0,16,"syn-abs-wordorder","syn-abs-wordorder",["Nabs nrel mellom"],"syn-abs-wordorder"]],"text":"Nrel mellom nabs"}

0 comments on commit c33489a

Please sign in to comment.