Simplify handling of empty tails
This commit is contained in:
parent
ae3896b47b
commit
691ecbc3c6
27
src/lib.rs
27
src/lib.rs
|
@ -99,9 +99,7 @@ impl<'a> SegmentState<'a> {
|
||||||
let (mut start, mut end) = (0, 0);
|
let (mut start, mut end) = (0, 0);
|
||||||
while end < self.text.len() {
|
while end < self.text.len() {
|
||||||
end = self.text.len().min(end + SEGMENT_SIZE);
|
end = self.text.len().min(end + SEGMENT_SIZE);
|
||||||
if !self.search(0, start..end, None).1 {
|
self.search(0, start..end, None);
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut splits = &self.best[0][..];
|
let mut splits = &self.best[0][..];
|
||||||
if end < self.text.len() {
|
if end < self.text.len() {
|
||||||
|
@ -116,14 +114,10 @@ impl<'a> SegmentState<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Score `word` in the context of `previous` word
|
/// Score `word` in the context of `previous` word
|
||||||
fn search(
|
fn search(&mut self, level: usize, range: Range<usize>, previous: Option<Range<usize>>) -> f64 {
|
||||||
&mut self,
|
|
||||||
level: usize,
|
|
||||||
range: Range<usize>,
|
|
||||||
previous: Option<Range<usize>>,
|
|
||||||
) -> (f64, bool) {
|
|
||||||
if range.is_empty() {
|
if range.is_empty() {
|
||||||
return (0.0, false);
|
self.best[level].clear();
|
||||||
|
return 0.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut best = f64::MIN;
|
let mut best = f64::MIN;
|
||||||
|
@ -136,16 +130,13 @@ impl<'a> SegmentState<'a> {
|
||||||
let (suffix_score, suffix_splits) = match self.memo.get(&pair) {
|
let (suffix_score, suffix_splits) = match self.memo.get(&pair) {
|
||||||
Some((score, splits)) => (*score, &self.split_cache[splits.start..splits.end]),
|
Some((score, splits)) => (*score, &self.split_cache[splits.start..splits.end]),
|
||||||
None => {
|
None => {
|
||||||
let (suffix_score, has_splits) =
|
let suffix_score = self.search(level + 1, split..end, Some(start..split));
|
||||||
self.search(level + 1, split..end, Some(start..split));
|
|
||||||
let start = self.split_cache.len();
|
let start = self.split_cache.len();
|
||||||
self.split_cache.extend(if has_splits {
|
self.split_cache.extend(&self.best[level + 1][..]);
|
||||||
&self.best[level + 1][..]
|
|
||||||
} else {
|
|
||||||
&[]
|
|
||||||
});
|
|
||||||
let end = self.split_cache.len();
|
let end = self.split_cache.len();
|
||||||
self.memo.insert(pair, (suffix_score, start..end));
|
self.memo.insert(pair, (suffix_score, start..end));
|
||||||
|
|
||||||
(suffix_score, &self.split_cache[start..end])
|
(suffix_score, &self.split_cache[start..end])
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -160,7 +151,7 @@ impl<'a> SegmentState<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
(best, true)
|
best
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue