Simplify the API some more
This commit is contained in:
parent
4338ff2c0c
commit
9dd1cf089d
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "instant-segment"
|
name = "instant-segment"
|
||||||
version = "0.6.1"
|
version = "0.7.0"
|
||||||
authors = ["Dirkjan Ochtman <dirkjan@ochtman.nl>"]
|
authors = ["Dirkjan Ochtman <dirkjan@ochtman.nl>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
license = "Apache-2.0"
|
license = "Apache-2.0"
|
||||||
|
|
|
@ -8,5 +8,7 @@ benchmark_main!(benches);
|
||||||
fn short(bench: &mut Bencher) {
|
fn short(bench: &mut Bencher) {
|
||||||
let segmenter = instant_segment::test_data::segmenter();
|
let segmenter = instant_segment::test_data::segmenter();
|
||||||
let mut search = instant_segment::Search::default();
|
let mut search = instant_segment::Search::default();
|
||||||
bench.iter(|| segmenter.segment("thisisatest", &mut search));
|
bench.iter(|| {
|
||||||
|
let _ = segmenter.segment("thisisatest", &mut search);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
15
src/lib.rs
15
src/lib.rs
|
@ -56,11 +56,13 @@ impl Segmenter {
|
||||||
/// Requires that the input `text` consists of lowercase ASCII characters only. Otherwise,
|
/// Requires that the input `text` consists of lowercase ASCII characters only. Otherwise,
|
||||||
/// returns `Err(InvalidCharacter)`. The `search` parameter contains caches that are used
|
/// returns `Err(InvalidCharacter)`. The `search` parameter contains caches that are used
|
||||||
/// segmentation; passing it in allows the callers to reuse the cache allocations.
|
/// segmentation; passing it in allows the callers to reuse the cache allocations.
|
||||||
///
|
pub fn segment<'a>(
|
||||||
/// The segmentation result can be retrieved through the `Search::split()` method.
|
&self,
|
||||||
pub fn segment(&self, input: &str, search: &mut Search) -> Result<(), InvalidCharacter> {
|
input: &str,
|
||||||
|
search: &'a mut Search,
|
||||||
|
) -> Result<impl Iterator<Item = &'a str> + ExactSizeIterator, InvalidCharacter> {
|
||||||
SegmentState::new(Ascii::new(input)?, &self, search).run();
|
SegmentState::new(Ascii::new(input)?, &self, search).run();
|
||||||
Ok(())
|
Ok(search.result.iter().map(|v| v.as_str()))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn score(&self, word: &str, previous: Option<&str>) -> f64 {
|
fn score(&self, word: &str, previous: Option<&str>) -> f64 {
|
||||||
|
@ -200,11 +202,6 @@ impl Search {
|
||||||
}
|
}
|
||||||
self.result.clear();
|
self.result.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get the segmentation result
|
|
||||||
pub fn split(&self) -> impl Iterator<Item = &str> + ExactSizeIterator {
|
|
||||||
self.result.iter().map(|v| v.as_str())
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type MemoKey = (Range<usize>, Range<usize>);
|
type MemoKey = (Range<usize>, Range<usize>);
|
||||||
|
|
|
@ -10,14 +10,14 @@ pub fn run(segmenter: &Segmenter) {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn assert_segments(s: &[&str], search: &mut Search, segmenter: &Segmenter) {
|
pub fn assert_segments(s: &[&str], search: &mut Search, segmenter: &Segmenter) {
|
||||||
segmenter.segment(&s.join(""), search).unwrap();
|
let words = segmenter.segment(&s.join(""), search).unwrap();
|
||||||
let cmp = search.split().collect::<Vec<_>>();
|
let cmp = words.collect::<Vec<_>>();
|
||||||
assert_eq!(cmp, s);
|
assert_eq!(cmp, s);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn check_segments(s: &[&str], search: &mut Search, segmenter: &Segmenter) -> bool {
|
pub fn check_segments(s: &[&str], search: &mut Search, segmenter: &Segmenter) -> bool {
|
||||||
match segmenter.segment(&s.join(""), search) {
|
match segmenter.segment(&s.join(""), search) {
|
||||||
Ok(()) => s == search.split().collect::<Vec<_>>(),
|
Ok(words) => s == words.collect::<Vec<_>>(),
|
||||||
Err(_) => false,
|
Err(_) => false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue