Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial attempt at efficient range queries across multiple Set objects. #10

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ package.
## Status
The package exposes almost all functionality of the `fst` crate, except for:

- Combining the results of slicing, `search` and `search_re` with set operations
- Using raw transducers


Expand Down Expand Up @@ -83,6 +82,24 @@ m = Map.from_iter( file_iterator('/your/input/file/'), '/your/mmapped/output.fst

# re-open a file you built previously with from_iter()
m = Map(path='/path/to/existing.fst')

# slicing multiple sets efficiently
a = Set.from_iter(["bar", "foo"])
b = Set.from_iter(["baz", "foo"])
list(UnionSet(a, b)['ba':'bb'])
['bar', 'baz']

# searching multiple sets efficiently
a = Set.from_iter(["bar", "foo"])
b = Set.from_iter(["baz", "foo"])
list(UnionSet(a, b).search('ba', 1)
['bar', 'baz']

# searching multiple sets with a regex efficiently
a = Set.from_iter(["bar", "foo"])
b = Set.from_iter(["baz", "foo"])
list(UnionSet(a, b).search_re(r'b\w{2}')
['bar', 'baz']
```


Expand Down
10 changes: 10 additions & 0 deletions rust/rust_fst.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,10 @@ SetStream* fst_set_stream(Set*);
SetLevStream* fst_set_levsearch(Set*, Levenshtein*);
SetRegexStream* fst_set_regexsearch(Set*, Regex*);
SetOpBuilder* fst_set_make_opbuilder(Set*);
SetOpBuilder* fst_set_make_opbuilder_streambuilder(SetStreamBuilder*);
SetOpBuilder* fst_set_make_opbuilder_levstream(SetLevStream*);
SetOpBuilder* fst_set_make_opbuilder_regexstream(SetRegexStream*);
SetOpBuilder* fst_set_make_opbuilder_union(SetUnion*);
void fst_set_free(Set*);

char* fst_set_stream_next(SetStream*);
Expand All @@ -76,6 +80,10 @@ char* fst_set_regexstream_next(SetRegexStream*);
void fst_set_regexstream_free(SetRegexStream*);

void fst_set_opbuilder_push(SetOpBuilder*, Set*);
void fst_set_opbuilder_push_levstream(SetOpBuilder*, SetLevStream*);
void fst_set_opbuilder_push_regexstream(SetOpBuilder*, SetRegexStream*);
void fst_set_opbuilder_push_streambuilder(SetOpBuilder*, SetStreamBuilder*);
void fst_set_opbuilder_push_union(SetOpBuilder*, SetUnion*);
void fst_set_opbuilder_free(SetOpBuilder*);
SetUnion* fst_set_opbuilder_union(SetOpBuilder*);
SetIntersection* fst_set_opbuilder_intersection(SetOpBuilder*);
Expand All @@ -97,6 +105,8 @@ void fst_set_symmetricdifference_free(SetSymmetricDifference*);

SetStreamBuilder* fst_set_streambuilder_new(Set*);
SetStreamBuilder* fst_set_streambuilder_add_ge(SetStreamBuilder*, char*);
SetStreamBuilder* fst_set_streambuilder_add_gt(SetStreamBuilder*, char*);
SetStreamBuilder* fst_set_streambuilder_add_le(SetStreamBuilder*, char*);
SetStreamBuilder* fst_set_streambuilder_add_lt(SetStreamBuilder*, char*);
SetStream* fst_set_streambuilder_finish(SetStreamBuilder*);

Expand Down
76 changes: 76 additions & 0 deletions rust/src/set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -146,13 +146,73 @@ pub extern "C" fn fst_set_make_opbuilder(ptr: *mut Set) -> *mut set::OpBuilder<'
}
make_free_fn!(fst_set_opbuilder_free, *mut set::OpBuilder);

#[no_mangle]
pub extern "C" fn fst_set_make_opbuilder_levstream(ptr: *mut SetLevStream) -> *mut set::OpBuilder<'static> {
let sls = val_from_ptr!(ptr);
let mut ob = set::OpBuilder::new();
ob.push(sls.into_stream());
to_raw_ptr(ob)
}

#[no_mangle]
pub extern "C" fn fst_set_make_opbuilder_regexstream(ptr: *mut SetRegexStream) -> *mut set::OpBuilder<'static> {
let srs = val_from_ptr!(ptr);
let mut ob = set::OpBuilder::new();
ob.push(srs.into_stream());
to_raw_ptr(ob)
}

#[no_mangle]
pub extern "C" fn fst_set_make_opbuilder_streambuilder(ptr: *mut set::StreamBuilder<'static>) -> *mut set::OpBuilder<'static> {
let sb = val_from_ptr!(ptr);
let mut ob = set::OpBuilder::new();
ob.push(sb.into_stream());
to_raw_ptr(ob)
}

#[no_mangle]
pub extern "C" fn fst_set_make_opbuilder_union(ptr: *mut set::Union<'static>) -> *mut set::OpBuilder<'static> {
let union = val_from_ptr!(ptr);
let mut ob = set::OpBuilder::new();
ob.push(union.into_stream());
to_raw_ptr(ob)
}

#[no_mangle]
pub extern "C" fn fst_set_opbuilder_push(ptr: *mut set::OpBuilder, set_ptr: *mut Set) {
let set = ref_from_ptr!(set_ptr);
let ob = mutref_from_ptr!(ptr);
ob.push(set);
}

#[no_mangle]
pub extern "C" fn fst_set_opbuilder_push_levstream(ptr: *mut set::OpBuilder<'static>, sls_ptr: *mut SetLevStream) {
let sls = val_from_ptr!(sls_ptr);
let ob = mutref_from_ptr!(ptr);
ob.push(sls.into_stream());
}

#[no_mangle]
pub extern "C" fn fst_set_opbuilder_push_regexstream(ptr: *mut set::OpBuilder<'static>, srs_ptr: *mut SetRegexStream) {
let srs = val_from_ptr!(srs_ptr);
let ob = mutref_from_ptr!(ptr);
ob.push(srs.into_stream());
}

#[no_mangle]
pub extern "C" fn fst_set_opbuilder_push_streambuilder(ptr: *mut set::OpBuilder<'static>, sb_ptr: *mut set::StreamBuilder<'static>) {
let sb = val_from_ptr!(sb_ptr);
let ob = mutref_from_ptr!(ptr);
ob.push(sb.into_stream());
}

#[no_mangle]
pub extern "C" fn fst_set_opbuilder_push_union(ptr: *mut set::OpBuilder<'static>, union_ptr: *mut set::Union<'static>) {
let union = val_from_ptr!(union_ptr);
let ob = mutref_from_ptr!(ptr);
ob.push(union.into_stream());
}

#[no_mangle]
pub extern "C" fn fst_set_opbuilder_union(ptr: *mut set::OpBuilder)
-> *mut set::Union {
Expand Down Expand Up @@ -205,6 +265,22 @@ pub extern "C" fn fst_set_streambuilder_add_ge(ptr: *mut set::StreamBuilder<'sta
to_raw_ptr(sb.ge(cstr_to_str(c_bound)))
}

#[no_mangle]
pub extern "C" fn fst_set_streambuilder_add_gt(ptr: *mut set::StreamBuilder<'static>,
c_bound: *mut libc::c_char)
-> *mut set::StreamBuilder<'static> {
let sb = val_from_ptr!(ptr);
to_raw_ptr(sb.gt(cstr_to_str(c_bound)))
}

#[no_mangle]
pub extern "C" fn fst_set_streambuilder_add_le(ptr: *mut set::StreamBuilder<'static>,
c_bound: *mut libc::c_char)
-> *mut set::StreamBuilder<'static> {
let sb = val_from_ptr!(ptr);
to_raw_ptr(sb.le(cstr_to_str(c_bound)))
}

#[no_mangle]
pub extern "C" fn fst_set_streambuilder_add_lt(ptr: *mut set::StreamBuilder<'static>,
c_bound: *mut libc::c_char)
Expand Down
4 changes: 2 additions & 2 deletions rust_fst/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .set import Set
from .set import Set, UnionSet
from .map import Map

__all__ = ["Set", "Map"]
__all__ = ["Set", "UnionSet", "Map"]
Loading