Simplify API by moving result data into Search
This commit is contained in:
parent
9735e64ee4
commit
d190aa5240
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "instant-segment"
|
name = "instant-segment"
|
||||||
version = "0.5.1"
|
version = "0.6.0"
|
||||||
authors = ["Dirkjan Ochtman <dirkjan@ochtman.nl>"]
|
authors = ["Dirkjan Ochtman <dirkjan@ochtman.nl>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
license = "Apache-2.0"
|
license = "Apache-2.0"
|
||||||
|
|
|
@ -7,7 +7,6 @@ benchmark_main!(benches);
|
||||||
|
|
||||||
fn short(bench: &mut Bencher) {
|
fn short(bench: &mut Bencher) {
|
||||||
let segmenter = instant_segment::test_data::segmenter();
|
let segmenter = instant_segment::test_data::segmenter();
|
||||||
let mut out = Vec::new();
|
|
||||||
let mut search = instant_segment::Search::default();
|
let mut search = instant_segment::Search::default();
|
||||||
bench.iter(|| segmenter.segment("thisisatest", &mut out, &mut search));
|
bench.iter(|| segmenter.segment("thisisatest", &mut search));
|
||||||
}
|
}
|
||||||
|
|
22
src/lib.rs
22
src/lib.rs
|
@ -51,18 +51,19 @@ impl Segmenter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Appends list of words that is the best segmentation of `text` to `out`
|
/// Segment the text in `input`
|
||||||
///
|
///
|
||||||
/// Requires that the input `text` consists of lowercase ASCII characters only. Otherwise,
|
/// Requires that the input `text` consists of lowercase ASCII characters only. Otherwise,
|
||||||
/// returns `Err(InvalidCharacter)`. The `search` parameter contains caches that are used
|
/// returns `Err(InvalidCharacter)`. The `search` parameter contains caches that are used
|
||||||
/// segmentation; passing it in allows the callers to reuse the cache allocations.
|
/// segmentation; passing it in allows the callers to reuse the cache allocations.
|
||||||
|
///
|
||||||
|
/// The segmentation result can be retrieved through the `Search::split()` method.
|
||||||
pub fn segment(
|
pub fn segment(
|
||||||
&self,
|
&self,
|
||||||
text: &str,
|
input: &str,
|
||||||
out: &mut Vec<String>,
|
|
||||||
search: &mut Search,
|
search: &mut Search,
|
||||||
) -> Result<(), InvalidCharacter> {
|
) -> Result<(), InvalidCharacter> {
|
||||||
SegmentState::new(Ascii::new(text)?, &self, out, search).run();
|
SegmentState::new(Ascii::new(input)?, &self, search).run();
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -101,7 +102,6 @@ impl Segmenter {
|
||||||
struct SegmentState<'a> {
|
struct SegmentState<'a> {
|
||||||
data: &'a Segmenter,
|
data: &'a Segmenter,
|
||||||
text: Ascii<'a>,
|
text: Ascii<'a>,
|
||||||
result: &'a mut Vec<String>,
|
|
||||||
search: &'a mut Search,
|
search: &'a mut Search,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -109,14 +109,12 @@ impl<'a> SegmentState<'a> {
|
||||||
fn new(
|
fn new(
|
||||||
text: Ascii<'a>,
|
text: Ascii<'a>,
|
||||||
data: &'a Segmenter,
|
data: &'a Segmenter,
|
||||||
result: &'a mut Vec<String>,
|
|
||||||
search: &'a mut Search,
|
search: &'a mut Search,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
search.clear();
|
search.clear();
|
||||||
Self {
|
Self {
|
||||||
data,
|
data,
|
||||||
text,
|
text,
|
||||||
result,
|
|
||||||
search,
|
search,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -134,7 +132,7 @@ impl<'a> SegmentState<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
for &split in splits {
|
for &split in splits {
|
||||||
self.result.push(self.text[start..split].into());
|
self.search.result.push(self.text[start..split].into());
|
||||||
start = split;
|
start = split;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -191,6 +189,7 @@ pub struct Search {
|
||||||
memo: HashMap<MemoKey, (f64, Range<usize>)>,
|
memo: HashMap<MemoKey, (f64, Range<usize>)>,
|
||||||
split_cache: Vec<usize>,
|
split_cache: Vec<usize>,
|
||||||
best: Vec<Vec<usize>>,
|
best: Vec<Vec<usize>>,
|
||||||
|
result: Vec<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for Search {
|
impl Default for Search {
|
||||||
|
@ -199,6 +198,7 @@ impl Default for Search {
|
||||||
memo: HashMap::default(),
|
memo: HashMap::default(),
|
||||||
split_cache: Vec::with_capacity(32),
|
split_cache: Vec::with_capacity(32),
|
||||||
best: vec![vec![]; SEGMENT_SIZE],
|
best: vec![vec![]; SEGMENT_SIZE],
|
||||||
|
result: Vec::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -210,6 +210,12 @@ impl Search {
|
||||||
for inner in self.best.iter_mut() {
|
for inner in self.best.iter_mut() {
|
||||||
inner.clear();
|
inner.clear();
|
||||||
}
|
}
|
||||||
|
self.result.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the segmentation result
|
||||||
|
pub fn split(&self) -> impl Iterator<Item = &str> {
|
||||||
|
self.result.iter().map(|v| v.as_str())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -10,16 +10,14 @@ pub fn run(segmenter: &Segmenter) {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn assert_segments(s: &[&str], search: &mut Search, segmenter: &Segmenter) {
|
pub fn assert_segments(s: &[&str], search: &mut Search, segmenter: &Segmenter) {
|
||||||
let mut out = Vec::new();
|
segmenter.segment(&s.join(""), search).unwrap();
|
||||||
segmenter.segment(&s.join(""), &mut out, search).unwrap();
|
let cmp = search.split().collect::<Vec<_>>();
|
||||||
let cmp = out.iter().map(|s| &*s).collect::<Vec<_>>();
|
|
||||||
assert_eq!(cmp, s);
|
assert_eq!(cmp, s);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn check_segments(s: &[&str], search: &mut Search, segmenter: &Segmenter) -> bool {
|
pub fn check_segments(s: &[&str], search: &mut Search, segmenter: &Segmenter) -> bool {
|
||||||
let mut out = Vec::new();
|
match segmenter.segment(&s.join(""), search) {
|
||||||
match segmenter.segment(&s.join(""), &mut out, search) {
|
Ok(()) => s == search.split().collect::<Vec<_>>(),
|
||||||
Ok(()) => s == out.iter().map(|s| &*s).collect::<Vec<_>>(),
|
|
||||||
Err(_) => false,
|
Err(_) => false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue