Allocate a single Vec to back cached splits
This commit is contained in:
parent
947e003a48
commit
47271ff81e
17
src/lib.rs
17
src/lib.rs
|
@ -1,6 +1,7 @@
|
||||||
use std::error::Error;
|
use std::error::Error;
|
||||||
use std::io;
|
use std::io;
|
||||||
use std::num::ParseIntError;
|
use std::num::ParseIntError;
|
||||||
|
use std::ops::Range;
|
||||||
|
|
||||||
use ahash::AHashMap as HashMap;
|
use ahash::AHashMap as HashMap;
|
||||||
use smartstring::alias::String;
|
use smartstring::alias::String;
|
||||||
|
@ -75,7 +76,8 @@ impl Segmenter {
|
||||||
struct SegmentState<'a> {
|
struct SegmentState<'a> {
|
||||||
data: &'a Segmenter,
|
data: &'a Segmenter,
|
||||||
text: &'a str,
|
text: &'a str,
|
||||||
memo: HashMap<(&'a str, &'a str), (f64, Vec<usize>)>,
|
memo: HashMap<(&'a str, &'a str), (f64, Range<usize>)>,
|
||||||
|
split_cache: Vec<usize>,
|
||||||
result: &'a mut Vec<String>,
|
result: &'a mut Vec<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -85,6 +87,7 @@ impl<'a> SegmentState<'a> {
|
||||||
data,
|
data,
|
||||||
text,
|
text,
|
||||||
memo: HashMap::new(),
|
memo: HashMap::new(),
|
||||||
|
split_cache: Vec::new(),
|
||||||
result,
|
result,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -127,14 +130,14 @@ impl<'a> SegmentState<'a> {
|
||||||
let pair = (suffix, prefix);
|
let pair = (suffix, prefix);
|
||||||
|
|
||||||
let (suffix_score, suffix_splits) = match self.memo.get(&pair) {
|
let (suffix_score, suffix_splits) = match self.memo.get(&pair) {
|
||||||
Some((score, splits)) => (*score, splits.as_slice()),
|
Some((score, splits)) => (*score, &self.split_cache[splits.start..splits.end]),
|
||||||
None => {
|
None => {
|
||||||
let (suffix_score, suffix_splits) = self.search(&suffix, Some(prefix));
|
let (suffix_score, suffix_splits) = self.search(&suffix, Some(prefix));
|
||||||
let value = self
|
let start = self.split_cache.len();
|
||||||
.memo
|
self.split_cache.extend(&suffix_splits);
|
||||||
.entry(pair)
|
let end = self.split_cache.len();
|
||||||
.or_insert((suffix_score, suffix_splits));
|
self.memo.insert(pair, (suffix_score, start..end));
|
||||||
(suffix_score, value.1.as_slice())
|
(suffix_score, &self.split_cache[start..end])
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue