From 646dfc7beb7add0eba32edd11691d7c43fbe3fe3 Mon Sep 17 00:00:00 2001 From: Brady Fomegne Date: Tue, 24 Oct 2023 21:12:45 +0100 Subject: [PATCH] feat(translator): Improve auto-suggestion (#102) --- Cargo.lock | 20 +++-- config/Cargo.toml | 1 + config/src/lib.rs | 27 +++---- engine/translator/Cargo.toml | 5 +- engine/translator/src/lib.rs | 140 ++++++++++++++++++++++++++--------- service/Cargo.toml | 3 +- service/data/test.toml | 2 +- service/src/lib.rs | 16 ++-- 8 files changed, 146 insertions(+), 68 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 48a317a..959d59a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -19,6 +19,7 @@ dependencies = [ name = "afrim-config" version = "0.4.1" dependencies = [ + "indexmap", "rhai", "serde", "toml", @@ -40,7 +41,9 @@ dependencies = [ name = "afrim-translator" version = "0.0.1" dependencies = [ + "indexmap", "rhai", + "strsim", ] [[package]] @@ -140,9 +143,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "clap" -version = "4.4.6" +version = "4.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d04704f56c2cde07f43e8e2c154b43f216dc5c92fc98ada720177362f953b956" +checksum = "ac495e00dcec98c83465d5ad66c5c4fabd652fd6686e7c6269b117e729a6f17b" dependencies = [ "clap_builder", "clap_derive", @@ -150,9 +153,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.4.6" +version = "4.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e231faeaca65ebd1ea3c737966bf858971cd38c3849107aa3ea7de90a804e45" +checksum = "c77ed9a32a62e6ca27175d00d29d05ca32e396ea1eb5fb01d8256b669cec7663" dependencies = [ "anstream", "anstyle", @@ -162,9 +165,9 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.4.2" +version = "4.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0862016ff20d69b84ef8247369fabf5c008a7417002411897d40ee1f4532b873" +checksum = "cf9804afaaf59a91e75b022a30fb7229a7901f60c755489cc61c9b423b836442" dependencies = [ "heck", "proc-macro2", @@ -174,9 +177,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.5.1" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd7cc57abe963c6d3b9d8be5b06ba7c8957a930305ca90304f24ef040aa6f961" +checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1" [[package]] name = "cocoa" @@ -397,6 +400,7 @@ checksum = "8adf3ddd720272c6ea8bf59463c04e0f93d0bbf7c5439b691bca2987e0270897" dependencies = [ "equivalent", "hashbrown", + "serde", ] [[package]] diff --git a/config/Cargo.toml b/config/Cargo.toml index d08a442..b38a480 100644 --- a/config/Cargo.toml +++ b/config/Cargo.toml @@ -16,5 +16,6 @@ rhai = ["dep:rhai"] [dependencies] rhai = { version = "1.16.2", optional = true } +indexmap = { version = "2.0.2", features = ["serde"] } serde = { version = "1.0.188", features = ["derive"] } toml = "0.8.2" diff --git a/config/src/lib.rs b/config/src/lib.rs index 49aa1a9..ecf0a35 100644 --- a/config/src/lib.rs +++ b/config/src/lib.rs @@ -3,11 +3,12 @@ #![deny(missing_docs)] +use indexmap::IndexMap; #[cfg(feature = "rhai")] use rhai::{Engine, AST}; use serde::Deserialize; use std::result::Result; -use std::{collections::HashMap, error, fs, path::Path}; +use std::{error, fs, path::Path}; use toml::{self}; /// Hold information about a configuration. @@ -15,10 +16,10 @@ use toml::{self}; pub struct Config { /// The core config. pub core: Option, - data: Option>, + data: Option>, #[cfg(feature = "rhai")] - translators: Option>, - translation: Option>, + translators: Option>, + translation: Option>, } /// Core information about a configuration. @@ -89,7 +90,7 @@ impl Config { .unwrap_or(true); // Data - let mut data = HashMap::new(); + let mut data = IndexMap::new(); config.data.unwrap_or_default().iter().try_for_each( |(key, value)| -> Result<(), Box> { @@ -117,7 +118,7 @@ impl Config { // Translators #[cfg(feature = "rhai")] { - let mut translators = HashMap::new(); + let mut translators = IndexMap::new(); config.translators.unwrap_or_default().iter().try_for_each( |(key, value)| -> Result<(), Box> { @@ -140,7 +141,7 @@ impl Config { } // Translation - let mut translation = HashMap::new(); + let mut translation = IndexMap::new(); config.translation.unwrap_or_default().iter().try_for_each( |(key, value)| -> Result<(), Box> { @@ -174,8 +175,8 @@ impl Config { } /// Extract the data from the configuration. - pub fn extract_data(&self) -> HashMap { - let empty = HashMap::default(); + pub fn extract_data(&self) -> IndexMap { + let empty = IndexMap::default(); self.data .as_ref() @@ -193,8 +194,8 @@ impl Config { /// Extract the translators from the configuration. #[cfg(feature = "rhai")] - pub fn extract_translators(&self) -> Result, Box> { - let empty = HashMap::default(); + pub fn extract_translators(&self) -> Result, Box> { + let empty = IndexMap::default(); let mut engine = Engine::new(); // allow nesting up to 50 layers of expressions/statements @@ -226,8 +227,8 @@ impl Config { } /// Extract the translation from the configuration. - pub fn extract_translation(&self) -> HashMap> { - let empty = HashMap::new(); + pub fn extract_translation(&self) -> IndexMap> { + let empty = IndexMap::new(); self.translation .as_ref() diff --git a/engine/translator/Cargo.toml b/engine/translator/Cargo.toml index 0c4a231..b7387fd 100644 --- a/engine/translator/Cargo.toml +++ b/engine/translator/Cargo.toml @@ -12,8 +12,11 @@ authors = ["Brady Fomegne "] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [features] -default = ["rhai"] +default = ["rhai", "strsim"] rhai = ["dep:rhai"] +strsim = ["dep:strsim"] [dependencies] rhai = { version = "1.16.2", optional = true } +indexmap = { version = "2.0.2", features = ["serde"] } +strsim = { version = "0.10.0", optional = true } diff --git a/engine/translator/src/lib.rs b/engine/translator/src/lib.rs index f02555f..91be5b5 100644 --- a/engine/translator/src/lib.rs +++ b/engine/translator/src/lib.rs @@ -5,11 +5,12 @@ //! #[cfg(feature = "rhai")] //! use afrim_translator::Engine; //! use afrim_translator::Translator; -//! use std::collections::HashMap; +//! use indexmap::IndexMap; //! //! // Translation via dictionary -//! let mut dictionary = HashMap::new(); -//! dictionary.insert("halo".to_string(), ["hello".to_string()].to_vec()); +//! let mut dictionary = IndexMap::new(); +//! dictionary.insert("jump".to_string(), ["sauter".to_string()].to_vec()); +//! dictionary.insert("jumper".to_string(), ["sauteur".to_string()].to_vec()); //! dictionary.insert("nihao".to_string(), ["hello".to_string()].to_vec()); //! //! // We build the translator. @@ -19,24 +20,52 @@ //! #[cfg(feature = "rhai")] //! { //! let engine = Engine::new(); -//! let hi = engine.compile(r#" +//! let jump = engine.compile(r#" //! fn translate(input) { -//! if input == "hi" { -//! ["hi", "", "hello", true] +//! if input == "jump" { +//! [input, "", "\n", false] //! } //! } //! "#).unwrap(); -//! translator.register("hi".to_string(), hi); +//! translator.register("jump".to_string(), jump); //! } //! -//! #[cfg(feature = "rhai")] //! assert_eq!( -//! translator.translate("hi"), +//! translator.translate("jump"), +//! vec![ +//! ( +//! "jump".to_owned(), +//! "".to_owned(), +//! vec!["sauter".to_owned()], +//! true +//! ), +//! #[cfg(feature = "rhai")] +//! // Programmable translation +//! ( +//! "jump".to_owned(), +//! "".to_owned(), +//! vec!["\n".to_owned()], +//! false +//! ), +//! // Auto-completion +//! ( +//! "jumper".to_owned(), +//! "er".to_owned(), +//! vec!["sauteur".to_owned()], +//! false +//! ) +//! ] +//! ); +//! +//! // Auto-suggestion / Auto-correction +//! #[cfg(feature = "strsim")] +//! assert_eq!( +//! translator.translate("junp"), //! vec![( -//! "hi".to_owned(), +//! "jump".to_owned(), //! "".to_owned(), -//! vec!["hello".to_owned()], -//! true +//! vec!["sauter".to_owned()], +//! false //! )] //! ); //! ``` @@ -44,28 +73,33 @@ #![deny(missing_docs)] +use indexmap::IndexMap; #[cfg(feature = "rhai")] pub use rhai::Engine; #[cfg(feature = "rhai")] use rhai::{Array, Scope, AST}; -use std::collections::HashMap; +use std::cmp::Ordering; +#[cfg(feature = "strsim")] +use strsim::{self}; + +type P = (String, String, Vec, bool); /// Core structure of the translator. pub struct Translator { - dictionary: HashMap>, + dictionary: IndexMap>, #[cfg(feature = "rhai")] - translators: HashMap, + translators: IndexMap, auto_commit: bool, } impl Translator { /// Initiate a new translator. - pub fn new(dictionary: HashMap>, auto_commit: bool) -> Self { + pub fn new(dictionary: IndexMap>, auto_commit: bool) -> Self { Self { dictionary, auto_commit, #[cfg(feature = "rhai")] - translators: HashMap::default(), + translators: IndexMap::default(), } } @@ -82,30 +116,53 @@ impl Translator { } /// Generate a list of predicates based on the input. - pub fn translate(&self, input: &str) -> Vec<(String, String, Vec, bool)> { + pub fn translate(&self, input: &str) -> Vec

{ #[cfg(feature = "rhai")] let mut scope = Scope::new(); #[cfg(feature = "rhai")] let engine = Engine::new(); - let predicates = self.dictionary.iter().filter_map(|(key, value)| { - if key == input { - Some(( + if input.len() < 2 || input.len() > key.len() { + return None; + }; + + let predicate = (key == input).then_some(( + 1.0, + ( key.to_owned(), "".to_owned(), value.to_owned(), self.auto_commit, + ), + )); + #[cfg(feature = "strsim")] + let predicate = predicate.or_else(|| { + if key.len() == input.len() { + let confidence = strsim::hamming(key.as_ref(), input) + .map(|n| 1.0 - (n as f64 / key.len() as f64)) + .unwrap_or(0.0); + + (confidence > 0.7).then(|| { + ( + confidence, + (key.to_owned(), "".to_owned(), value.to_owned(), false), + ) + }) + } else { + None + } + }); + predicate.or_else(|| { + key.starts_with(input).then_some(( + 0.5, + ( + key.to_owned(), + key.chars().skip(input.len()).collect(), + value.to_owned(), + false, + ), )) - } else if input.len() > 1 && key.starts_with(input) { - Some(( - key.to_owned(), - key.chars().skip(input.len()).collect(), - value.to_owned(), - false, - )) - } else { - None - } + }) }); #[cfg(feature = "rhai")] let predicates = @@ -126,10 +183,18 @@ impl Translator { .collect(); let translated = data[3].clone().as_bool().unwrap(); - (code, remaining_code, texts, translated) + (1.0, (code, remaining_code, texts, translated)) }) })); - predicates.collect() + let mut predicates = predicates.collect::>(); + + // from the best to the worst + predicates.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(Ordering::Equal)); + + predicates + .into_iter() + .map(|(_, predicate)| predicate) + .collect() } } @@ -140,10 +205,10 @@ mod tests { #[cfg(feature = "rhai")] use crate::Engine; use crate::Translator; - use std::collections::HashMap; + use indexmap::IndexMap; // We build the translation - let mut dictionary = HashMap::new(); + let mut dictionary = IndexMap::new(); dictionary.insert("halo".to_string(), ["hello".to_string()].to_vec()); // We config the translator @@ -193,13 +258,14 @@ mod tests { false )] ); + #[cfg(feature = "strsim")] assert_eq!( - translator.translate("halo"), + translator.translate("helo"), vec![( "halo".to_owned(), "".to_owned(), vec!["hello".to_owned()], - true + false )] ); } diff --git a/service/Cargo.toml b/service/Cargo.toml index c471abd..8a174a6 100644 --- a/service/Cargo.toml +++ b/service/Cargo.toml @@ -17,8 +17,9 @@ name = "afrim" path = "./src/main.rs" [features] -default = ["rhai"] +default = ["rhai", "strsim"] rhai = ["afrim-config/rhai", "afrim-translator/rhai"] +strsim = ["afrim-translator/strsim"] [dependencies] clap = { version = "4.4.6", features = ["derive"] } diff --git a/service/data/test.toml b/service/data/test.toml index 3da7d40..9997824 100644 --- a/service/data/test.toml +++ b/service/data/test.toml @@ -27,5 +27,5 @@ hi = "./scripts/hi.rhai" [translation] hello = "hi" heli = "helicopter" -hea = "health" +heal = { value = "health", alias = ["heql"] } vuue = "vʉe" diff --git a/service/src/lib.rs b/service/src/lib.rs index 05ec10d..9c18ad7 100644 --- a/service/src/lib.rs +++ b/service/src/lib.rs @@ -115,8 +115,11 @@ pub fn run(config: Config, mut frontend: impl Frontend) -> Result<(), Box Result<(), Box