Make split values absolute instead of relative

This commit is contained in:
Dirkjan Ochtman 2020-11-26 10:52:14 +01:00
parent b7daaff47a
commit 4be435e0fb
1 changed files with 14 additions and 7 deletions

View File

@ -100,7 +100,7 @@ impl<'a> SegmentState<'a> {
while end < self.text.len() { while end < self.text.len() {
end = self.text.len().min(end + SEGMENT_SIZE); end = self.text.len().min(end + SEGMENT_SIZE);
let prefix = &self.text[start..end]; let prefix = &self.text[start..end];
if !self.search(0, &prefix, None).1 { if !self.search(0, start, &prefix, None).1 {
continue; continue;
} }
@ -109,15 +109,21 @@ impl<'a> SegmentState<'a> {
splits = &splits[..splits.len().saturating_sub(5)]; splits = &splits[..splits.len().saturating_sub(5)];
} }
for split in splits { for &split in splits {
self.result.push(self.text[start..start + split].into()); self.result.push(self.text[start..split].into());
start += split; start = split;
} }
} }
} }
/// Score `word` in the context of `previous` word /// Score `word` in the context of `previous` word
fn search(&mut self, level: usize, text: &'a str, previous: Option<&str>) -> (f64, bool) { fn search(
&mut self,
level: usize,
start: usize,
text: &'a str,
previous: Option<&str>,
) -> (f64, bool) {
if text.is_empty() { if text.is_empty() {
return (0.0, false); return (0.0, false);
} }
@ -131,7 +137,8 @@ impl<'a> SegmentState<'a> {
let (suffix_score, suffix_splits) = match self.memo.get(&pair) { let (suffix_score, suffix_splits) = match self.memo.get(&pair) {
Some((score, splits)) => (*score, &self.split_cache[splits.start..splits.end]), Some((score, splits)) => (*score, &self.split_cache[splits.start..splits.end]),
None => { None => {
let (suffix_score, has_splits) = self.search(level + 1, &suffix, Some(prefix)); let (suffix_score, has_splits) =
self.search(level + 1, start + split, &suffix, Some(prefix));
let start = self.split_cache.len(); let start = self.split_cache.len();
self.split_cache.extend(if has_splits { self.split_cache.extend(if has_splits {
&self.best[level + 1][..] &self.best[level + 1][..]
@ -149,7 +156,7 @@ impl<'a> SegmentState<'a> {
best = score; best = score;
let splits = &mut self.best[level]; let splits = &mut self.best[level];
splits.clear(); splits.clear();
splits.push(split); splits.push(start + split);
splits.extend(suffix_splits); splits.extend(suffix_splits);
} }
} }