Skip to content

Commit

Permalink
stupid gxhash
Browse files Browse the repository at this point in the history
  • Loading branch information
Lips7 committed Jul 18, 2024
1 parent debe14e commit b4df2bf
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 23 deletions.
63 changes: 52 additions & 11 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion matcher_rs/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@ name = "matcher_rs"
crate-type = ["rlib"]

[dependencies]
ahash = "0.8.11"
# aho-corasick = "1.1.3"
aho-corasick-unsafe = { version = "0.0.4", git = "https://github.com/Lips7/aho-corasick" }
bitflags = { version = "2.6.0", features = ["serde"] }
daachorse = "1.0.0"
fancy-regex = "0.13.0"
gxhash = "3.4.1"
id-set = "0.2.2"
lazy_static = "1.5.0"
nohash-hasher = "0.2.0"
Expand Down
10 changes: 5 additions & 5 deletions matcher_rs/src/process/process_matcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ use std::borrow::Cow;
use std::fmt::Display;
use std::sync::Arc;

#[cfg(any(feature = "runtime_build", feature = "dfa"))]
use ahash::AHashMap;
use ahash::HashMapExt;
use aho_corasick_unsafe::AhoCorasick;
#[cfg(any(feature = "runtime_build", feature = "dfa"))]
use aho_corasick_unsafe::{AhoCorasickBuilder, AhoCorasickKind, MatchKind as AhoCorasickMatchKind};
Expand All @@ -13,9 +16,6 @@ use daachorse::{
CharwiseDoubleArrayAhoCorasick, CharwiseDoubleArrayAhoCorasickBuilder,
MatchKind as DoubleArrayAhoCorasickMatchKind,
};
#[cfg(any(feature = "runtime_build", feature = "dfa"))]
use gxhash::HashMap as GxHashMap;
use gxhash::HashMapExt;
use id_set::IdSet;
use lazy_static::lazy_static;
use nohash_hasher::{IntMap, IsEnabled};
Expand Down Expand Up @@ -405,7 +405,7 @@ pub fn get_process_matcher(

#[cfg(feature = "runtime_build")]
{
let mut process_dict = GxHashMap::default();
let mut process_dict = AHashMap::default();

match process_type_bit {
ProcessType::None => {}
Expand Down Expand Up @@ -531,7 +531,7 @@ pub fn get_process_matcher(
ProcessType::Delete => {
#[cfg(feature = "dfa")]
{
let mut process_dict = GxHashMap::default();
let mut process_dict = AHashMap::default();
process_dict.extend(TEXT_DELETE.trim().lines().map(|pair_str| (pair_str, "")));
process_dict.extend(WHITE_SPACE.iter().map(|&c| (c, "")));
process_dict.retain(|&key, &mut value| key != value);
Expand Down
12 changes: 6 additions & 6 deletions matcher_rs/src/simple_matcher.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use std::iter;
use std::{borrow::Cow, collections::HashMap};

use ahash::AHashMap;
use aho_corasick_unsafe::{AhoCorasick, AhoCorasickBuilder, AhoCorasickKind};
use gxhash::HashMap as GxHashMap;
use id_set::IdSet;
use nohash_hasher::IntMap;
use sonic_rs::{Deserialize, Serialize};
Expand Down Expand Up @@ -211,15 +211,15 @@ impl SimpleMatcher {

let mut ac_dedup_word_id = 0;
let mut ac_dedup_word_list = Vec::new();
let mut ac_dedup_word_id_map = GxHashMap::default();
let mut ac_dedup_word_id_map = AHashMap::default();

for (&process_type, simple_word_map) in process_type_word_map {
let word_process_type = process_type - ProcessType::Delete;
process_type_list.push(process_type);

for (&simple_word_id, simple_word) in simple_word_map {
let mut ac_split_word_and_counter = GxHashMap::default();
let mut ac_split_word_not_counter = GxHashMap::default();
let mut ac_split_word_and_counter = AHashMap::default();
let mut ac_split_word_not_counter = AHashMap::default();

let mut start = 0;
let mut is_and = false;
Expand Down Expand Up @@ -403,7 +403,7 @@ impl<'a> TextMatcherTrait<'a, SimpleResult<'a>> for SimpleMatcher {
&'a self,
processed_text_process_type_set: &[(Cow<'a, str>, IdSet)],
) -> bool {
let mut word_id_split_bit_map = GxHashMap::default();
let mut word_id_split_bit_map = AHashMap::default();
let mut word_id_set = IdSet::default();
let mut not_word_id_set = IdSet::default();

Expand Down Expand Up @@ -535,7 +535,7 @@ impl<'a> TextMatcherTrait<'a, SimpleResult<'a>> for SimpleMatcher {
&'a self,
processed_text_process_type_set: &[(Cow<'a, str>, IdSet)],
) -> Vec<SimpleResult<'a>> {
let mut word_id_split_bit_map = GxHashMap::default();
let mut word_id_split_bit_map = AHashMap::default();
let mut not_word_id_set = IdSet::default();

let processed_times = processed_text_process_type_set.len();
Expand Down

0 comments on commit b4df2bf

Please sign in to comment.