Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[translit] Add using fst:: directives #528

Merged
merged 1 commit into from
Jan 31, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions nisaba/translit/fst/pairlm_decoder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ using ::fst::MutableArcIterator;
using ::fst::OLabelCompare;
using ::fst::PhiMatcher;
using ::fst::Project;
using ::fst::ProjectType;
using ::fst::Prune;
using ::fst::Push;
using ::fst::REWEIGHT_TO_INITIAL;
Expand All @@ -103,6 +104,7 @@ using ::fst::SymbolTable;
using ::fst::TopSort;
using ::fst::Union;
using ::fst::VectorFst;
using ::fst::kNoSymbol;

namespace nisaba {
namespace translit {
Expand Down Expand Up @@ -578,7 +580,7 @@ StdVectorFst PairLMDecoder::GetWordTransliterations(
} else {
StdVectorFst pair_lattice;
Compose(string_fst, *unicode_to_pair_fst_, &pair_lattice);
Project(&pair_lattice, ::fst::ProjectType::OUTPUT);
Project(&pair_lattice, ProjectType::OUTPUT);
ArcSort(&pair_lattice, OLabelCompare<StdArc>());

// Composes lattice of pair strings with pair language model.
Expand All @@ -600,7 +602,7 @@ StdVectorFst PairLMDecoder::GetWordTransliterations(
if (prune_lattice) {
Prune(&pair_lm_composed_output, /*weight_threshold=*/word_cand_thresh_);
}
Project(&pair_lm_composed_output, ::fst::ProjectType::OUTPUT);
Project(&pair_lm_composed_output, ProjectType::OUTPUT);
return pair_lm_composed_output;
}

Expand Down Expand Up @@ -702,7 +704,7 @@ void PairLMDecoder::AddCandSymArc(absl::string_view new_symbol, double cost,
StdVectorFst *word_transliterations) {
// Look for candidate symbol in the symbol list, add if not there.
int cand_sym = fst_params.cand_syms.Find(new_symbol);
if (cand_sym == ::fst::kNoSymbol) {
if (cand_sym == kNoSymbol) {
cand_sym = fst_params.cand_syms.AddSymbol(new_symbol);
AddToCandsToLMFst(new_symbol, cand_sym, fst_params);
}
Expand Down Expand Up @@ -805,7 +807,7 @@ StdVectorFst PairLMDecoder::TransliterateSegmentedWord(

void PairLMDecoder::ExtractCachedWordTransliterations(
absl::string_view input_word, TranslitContext &fst_params,
::fst::StdVectorFst &cached) {
StdVectorFst &cached) {
mutex_.ReaderLock();
const auto cached_pairs = global_word_transliteration_cache_.at(input_word);
mutex_.ReaderUnlock();
Expand Down Expand Up @@ -1045,7 +1047,7 @@ StdVectorFst PairLMDecoder::ComposeLatticeWithLM(
new PhiMatcher<Matcher<StdFst>>(transliteration_fst, MATCH_NONE, -1),
new PhiMatcher<Matcher<StdFst>>(*lm_fst_, MATCH_INPUT, kPhiSymbol,
true, MATCHER_REWRITE_NEVER))));
Project(&string_lm_composed_fst, ::fst::ProjectType::INPUT);
Project(&string_lm_composed_fst, ProjectType::INPUT);
Connect(&string_lm_composed_fst);
impl::PushInLogSemiring(&string_lm_composed_fst);
TopSort(&string_lm_composed_fst);
Expand Down
12 changes: 6 additions & 6 deletions nisaba/translit/fst/pairlm_decoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class PairLMDecoder {

// Transliterates a full string, returning an Fst of results.
::fst::StdVectorFst TransliterateString(absl::string_view input_line,
int k_best);
int k_best);

// Prints output from transliteration along with user provided line prefix.
std::string PrintTransliterations(
Expand Down Expand Up @@ -95,8 +95,8 @@ class PairLMDecoder {

// Transliterates a single word, returning an Fst of results.
::fst::StdVectorFst TransliterateWord(absl::string_view input_word,
int k_best,
TranslitContext &fst_params)
int k_best,
TranslitContext &fst_params)
ABSL_LOCKS_EXCLUDED(mutex_);

// Initializes full class for transliteration.
Expand Down Expand Up @@ -130,7 +130,7 @@ class PairLMDecoder {

// Returns a lattice of possible single word transliterations.
::fst::StdVectorFst GetWordTransliterations(absl::string_view input_word,
bool prune_lattice) const;
bool prune_lattice) const;

// Puts arc in Fst that maps from candidate to lm_fst_ symbols.
void AddToCandsToLMFst(absl::string_view new_symbol, int cand_sym,
Expand Down Expand Up @@ -187,8 +187,8 @@ class PairLMDecoder {
::fst::StdVectorFst *transliteration_fst) const;

// Performs a final pruning of transliterations per word position.
void ApplyFinalKBestFilter(int k_best,
::fst::StdVectorFst *transliteration_fst) const;
void ApplyFinalKBestFilter(
int k_best, ::fst::StdVectorFst *transliteration_fst) const;

std::unique_ptr<::fst::StdVectorFst> translit_fst_; // PairLM model.
bool translit_fst_is_transducer_; // Whether PairLM model is transducer.
Expand Down
5 changes: 3 additions & 2 deletions nisaba/translit/fst/wordpiece-segmenter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ namespace fst {
using ::fst::StdArc;
using ::fst::StdVectorFst;
using ::fst::SymbolTable;
using ::fst::kNoSymbol;

namespace impl {
namespace {
Expand Down Expand Up @@ -228,9 +229,9 @@ absl::StatusOr<StdVectorFst> WordpieceSegmenter::GetWordpieceTransducer(
int curr_state = fst.Start();
for (const auto &wordpiece : wordpieces) {
int wordpiece_idx = wordpiece_syms.Find(wordpiece);
if (wordpiece_idx == ::fst::kNoSymbol) {
if (wordpiece_idx == kNoSymbol) {
// Wordpiece is not in vocabulary, hence replaced with <unk>.
if (unk_label == ::fst::kNoSymbol) {
if (unk_label == kNoSymbol) {
return absl::InternalError("Need OOV in wordpiece syms.");
}
wordpiece_idx = unk_label;
Expand Down
Loading