diff --git a/Cargo.toml b/Cargo.toml index 80da68d..0ac9b27 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "instant-segment" -version = "0.5.1" +version = "0.6.0" authors = ["Dirkjan Ochtman "] edition = "2018" license = "Apache-2.0" diff --git a/benches/bench.rs b/benches/bench.rs index 0b01fd8..63a844a 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -7,7 +7,6 @@ benchmark_main!(benches); fn short(bench: &mut Bencher) { let segmenter = instant_segment::test_data::segmenter(); - let mut out = Vec::new(); let mut search = instant_segment::Search::default(); - bench.iter(|| segmenter.segment("thisisatest", &mut out, &mut search)); + bench.iter(|| segmenter.segment("thisisatest", &mut search)); } diff --git a/src/lib.rs b/src/lib.rs index 8746e96..f61d026 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -51,18 +51,19 @@ impl Segmenter { } } - /// Appends list of words that is the best segmentation of `text` to `out` + /// Segment the text in `input` /// /// Requires that the input `text` consists of lowercase ASCII characters only. Otherwise, /// returns `Err(InvalidCharacter)`. The `search` parameter contains caches that are used /// segmentation; passing it in allows the callers to reuse the cache allocations. + /// + /// The segmentation result can be retrieved through the `Search::split()` method. pub fn segment( &self, - text: &str, - out: &mut Vec, + input: &str, search: &mut Search, ) -> Result<(), InvalidCharacter> { - SegmentState::new(Ascii::new(text)?, &self, out, search).run(); + SegmentState::new(Ascii::new(input)?, &self, search).run(); Ok(()) } @@ -101,7 +102,6 @@ impl Segmenter { struct SegmentState<'a> { data: &'a Segmenter, text: Ascii<'a>, - result: &'a mut Vec, search: &'a mut Search, } @@ -109,14 +109,12 @@ impl<'a> SegmentState<'a> { fn new( text: Ascii<'a>, data: &'a Segmenter, - result: &'a mut Vec, search: &'a mut Search, ) -> Self { search.clear(); Self { data, text, - result, search, } } @@ -134,7 +132,7 @@ impl<'a> SegmentState<'a> { } for &split in splits { - self.result.push(self.text[start..split].into()); + self.search.result.push(self.text[start..split].into()); start = split; } } @@ -191,6 +189,7 @@ pub struct Search { memo: HashMap)>, split_cache: Vec, best: Vec>, + result: Vec, } impl Default for Search { @@ -199,6 +198,7 @@ impl Default for Search { memo: HashMap::default(), split_cache: Vec::with_capacity(32), best: vec![vec![]; SEGMENT_SIZE], + result: Vec::new(), } } } @@ -210,6 +210,12 @@ impl Search { for inner in self.best.iter_mut() { inner.clear(); } + self.result.clear(); + } + + /// Get the segmentation result + pub fn split(&self) -> impl Iterator { + self.result.iter().map(|v| v.as_str()) } } diff --git a/src/test_cases.rs b/src/test_cases.rs index ebe3030..50732e3 100644 --- a/src/test_cases.rs +++ b/src/test_cases.rs @@ -10,16 +10,14 @@ pub fn run(segmenter: &Segmenter) { } pub fn assert_segments(s: &[&str], search: &mut Search, segmenter: &Segmenter) { - let mut out = Vec::new(); - segmenter.segment(&s.join(""), &mut out, search).unwrap(); - let cmp = out.iter().map(|s| &*s).collect::>(); + segmenter.segment(&s.join(""), search).unwrap(); + let cmp = search.split().collect::>(); assert_eq!(cmp, s); } pub fn check_segments(s: &[&str], search: &mut Search, segmenter: &Segmenter) -> bool { - let mut out = Vec::new(); - match segmenter.segment(&s.join(""), &mut out, search) { - Ok(()) => s == out.iter().map(|s| &*s).collect::>(), + match segmenter.segment(&s.join(""), search) { + Ok(()) => s == search.split().collect::>(), Err(_) => false, } }