Simplify handling of empty tails

This commit is contained in:
Dirkjan Ochtman 2020-11-26 11:20:06 +01:00
parent ae3896b47b
commit 691ecbc3c6
1 changed files with 9 additions and 18 deletions

View File

@ -99,9 +99,7 @@ impl<'a> SegmentState<'a> {
let (mut start, mut end) = (0, 0); let (mut start, mut end) = (0, 0);
while end < self.text.len() { while end < self.text.len() {
end = self.text.len().min(end + SEGMENT_SIZE); end = self.text.len().min(end + SEGMENT_SIZE);
if !self.search(0, start..end, None).1 { self.search(0, start..end, None);
continue;
}
let mut splits = &self.best[0][..]; let mut splits = &self.best[0][..];
if end < self.text.len() { if end < self.text.len() {
@ -116,14 +114,10 @@ impl<'a> SegmentState<'a> {
} }
/// Score `word` in the context of `previous` word /// Score `word` in the context of `previous` word
fn search( fn search(&mut self, level: usize, range: Range<usize>, previous: Option<Range<usize>>) -> f64 {
&mut self,
level: usize,
range: Range<usize>,
previous: Option<Range<usize>>,
) -> (f64, bool) {
if range.is_empty() { if range.is_empty() {
return (0.0, false); self.best[level].clear();
return 0.0;
} }
let mut best = f64::MIN; let mut best = f64::MIN;
@ -136,16 +130,13 @@ impl<'a> SegmentState<'a> {
let (suffix_score, suffix_splits) = match self.memo.get(&pair) { let (suffix_score, suffix_splits) = match self.memo.get(&pair) {
Some((score, splits)) => (*score, &self.split_cache[splits.start..splits.end]), Some((score, splits)) => (*score, &self.split_cache[splits.start..splits.end]),
None => { None => {
let (suffix_score, has_splits) = let suffix_score = self.search(level + 1, split..end, Some(start..split));
self.search(level + 1, split..end, Some(start..split));
let start = self.split_cache.len(); let start = self.split_cache.len();
self.split_cache.extend(if has_splits { self.split_cache.extend(&self.best[level + 1][..]);
&self.best[level + 1][..]
} else {
&[]
});
let end = self.split_cache.len(); let end = self.split_cache.len();
self.memo.insert(pair, (suffix_score, start..end)); self.memo.insert(pair, (suffix_score, start..end));
(suffix_score, &self.split_cache[start..end]) (suffix_score, &self.split_cache[start..end])
} }
}; };
@ -160,7 +151,7 @@ impl<'a> SegmentState<'a> {
} }
} }
(best, true) best
} }
} }