From 78aa3ee250ac94be8a92bedbc2a937cf95f64f96 Mon Sep 17 00:00:00 2001 From: Kevin Brubeck Unhammer Date: Thu, 19 Sep 2024 19:30:36 +0200 Subject: [PATCH] Put casing of &ADDED word on first non-added to the right MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also changes withCasing to apply lowering where casing==lower – should have no effect unless we're changing casing like this, since getCasing only returns lower if there are no uppers. Closes https://github.com/divvun/libdivvun/issues/77 --- src/suggest.cpp | 30 ++++++++++++++++----------- src/suggest.hpp | 13 +++++++++--- test/suggest/expected.move-after.json | 2 +- test/suggest/expected.move.json | 2 +- 4 files changed, 30 insertions(+), 17 deletions(-) diff --git a/src/suggest.cpp b/src/suggest.cpp index 9dae47c..174c733 100644 --- a/src/suggest.cpp +++ b/src/suggest.cpp @@ -576,22 +576,11 @@ if(verbose) std::cerr << "\033[1;33mright=\t" << i_right << "\033[0m" << std::en UStringVector reps = {u""}; UStringVector reps_suggestwf = {}; // If we're doing SUGGESTWF, we ignore reps string prev_added_before_blank = ""; + std::optional addedcasing = std::nullopt; for (size_t i = i_left; i <= i_right; ++i) { const auto& trg = sentence.cohorts[i]; Casing casing = getCasing(toUtf8(trg.form)); - // std::cerr << "\033[0;35mtrg.added=\t" << trg.added << " i=" << i << "i_left" << i_left<< "\033[0m" << std::endl; - if(trg.added) { - for(size_t j = i; j <= i_right; j++) { - const auto& right_of_trg = sentence.cohorts[j]; - if(!right_of_trg.added) { - // std::cerr << "\033[1;35mright_of_added=\t" << toUtf8(right_of_trg.form) << " j=" << j << "\033[0m" << std::endl; - casing = getCasing(toUtf8(right_of_trg.form)); - break; - } - } - } - if(verbose) std::cerr << "\033[1;34mi=\t" << i << "\033[0m" << std::endl; if(verbose) std::cerr << "\033[1;34mtrg.form=\t'" << toUtf8(trg.form) << "'\033[0m" << std::endl; if(verbose) std::cerr << "\033[1;34mtrg.id=\t" << trg.id << "\033[0m" << std::endl; @@ -604,6 +593,23 @@ if(verbose) std::cerr << "\033[1;35mtrg.raw_pre_blank=\t'" << trg.raw_pre_blank if(verbose) std::cerr << "\t\t\033[1;36mdelete=\t" << toUtf8(trg.form) << "\033[0m" << std::endl; } + if(trg.added) { + // This word was added, get casing from a non-added word to the right: + for(size_t j = i; j <= i_right; j++) { + const auto& right_of_trg = sentence.cohorts[j]; + if(!right_of_trg.added) { + addedcasing = casing; + casing = getCasing(toUtf8(right_of_trg.form)); + break; + } + } + } + else if(addedcasing.has_value() && !del) { + // This word was not &ADDED, but is preceded by an added word: + casing = addedcasing.value(); + addedcasing = std::nullopt; + } + bool added_before_blank = false; bool fixedcase = false; bool applies_deletion = trg.id == src.id && src_applies_deletion; diff --git a/src/suggest.hpp b/src/suggest.hpp index e9778c4..b40ec56 100644 --- a/src/suggest.hpp +++ b/src/suggest.hpp @@ -46,6 +46,7 @@ namespace divvun { using std::variant; +using std::optional; using std::pair; using std::string; using std::stringstream; @@ -144,10 +145,16 @@ inline std::string totitle(const string& input) { std::transform(w.begin(), w.begin() + 1, w.begin(), std::towupper); return wideToUtf8(w); } + +inline std::string tolower(const string& input) { + std::wstring w = wideFromUtf8(input); + setlocale(LC_ALL, ""); + std::transform(w.begin(), w.begin() + 1, w.begin(), std::towlower); + return wideToUtf8(w); +} // #endif -inline std::string withCasing( - bool fixedcase, const Casing& inputCasing, const string& input) { +inline std::string withCasing(bool fixedcase, const Casing& inputCasing, const string& input) { if (fixedcase) { return input; } @@ -159,7 +166,7 @@ inline std::string withCasing( case mIxed: return input; case lower: - return input; + return tolower(input); } // should never get to this point return input; diff --git a/test/suggest/expected.move-after.json b/test/suggest/expected.move-after.json index a252a82..dd13e0b 100644 --- a/test/suggest/expected.move-after.json +++ b/test/suggest/expected.move-after.json @@ -1 +1 @@ -{"errs":[["Nrel mellom nabs",4,20,"syn-abs-wordorder","syn-abs-wordorder",["Nabs Nrel mellom"],"syn-abs-wordorder"]],"text":"før Nrel mellom nabs"} \ No newline at end of file +{"errs":[["Nrel mellom nabs",4,20,"syn-abs-wordorder","syn-abs-wordorder",["Nabs nrel mellom"],"syn-abs-wordorder"]],"text":"før Nrel mellom nabs"} \ No newline at end of file diff --git a/test/suggest/expected.move.json b/test/suggest/expected.move.json index a239e62..8a2714f 100644 --- a/test/suggest/expected.move.json +++ b/test/suggest/expected.move.json @@ -1 +1 @@ -{"errs":[["Nrel mellom nabs",0,16,"syn-abs-wordorder","syn-abs-wordorder",["Nabs Nrel mellom"],"syn-abs-wordorder"]],"text":"Nrel mellom nabs"} \ No newline at end of file +{"errs":[["Nrel mellom nabs",0,16,"syn-abs-wordorder","syn-abs-wordorder",["Nabs nrel mellom"],"syn-abs-wordorder"]],"text":"Nrel mellom nabs"} \ No newline at end of file