Better typed handling of previous word

This commit is contained in:
Dirkjan Ochtman 2020-11-25 14:12:38 +01:00
parent ea4438f2e8
commit ead9a3064b
1 changed files with 5 additions and 5 deletions

View File

@ -96,7 +96,7 @@ impl<'a> SegmentState<'a> {
loop { loop {
end = self.text.len().min(end + SEGMENT_SIZE); end = self.text.len().min(end + SEGMENT_SIZE);
let prefix = &self.text[start..end]; let prefix = &self.text[start..end];
let window_words = self.search(&prefix, "<s>").1; let window_words = self.search(&prefix, None).1;
for word in &window_words[..window_words.len().saturating_sub(5)] { for word in &window_words[..window_words.len().saturating_sub(5)] {
start += word.len(); start += word.len();
@ -108,26 +108,26 @@ impl<'a> SegmentState<'a> {
} }
} }
let window_words = self.search(&self.text[start..], "<s>").1; let window_words = self.search(&self.text[start..], None).1;
self.result self.result
.extend(window_words.into_iter().map(|s| s.into())); .extend(window_words.into_iter().map(|s| s.into()));
} }
/// Score `word` in the context of `previous` word /// Score `word` in the context of `previous` word
fn search(&mut self, text: &'a str, previous: &str) -> (f64, Vec<&'a str>) { fn search(&mut self, text: &'a str, previous: Option<&str>) -> (f64, Vec<&'a str>) {
if text.is_empty() { if text.is_empty() {
return (0.0, vec![]); return (0.0, vec![]);
} }
let mut best = (f64::MIN, vec![]); let mut best = (f64::MIN, vec![]);
for (prefix, suffix) in TextDivider::new(text, self.data.limit) { for (prefix, suffix) in TextDivider::new(text, self.data.limit) {
let prefix_score = self.data.score(prefix, Some(previous)).log10(); let prefix_score = self.data.score(prefix, previous).log10();
let pair = (suffix, prefix); let pair = (suffix, prefix);
let (suffix_score, suffix_words) = match self.memo.get(&pair) { let (suffix_score, suffix_words) = match self.memo.get(&pair) {
Some((score, words)) => (*score, words.as_slice()), Some((score, words)) => (*score, words.as_slice()),
None => { None => {
let (suffix_score, suffix_words) = self.search(&suffix, prefix); let (suffix_score, suffix_words) = self.search(&suffix, Some(prefix));
let value = self let value = self
.memo .memo
.entry(pair) .entry(pair)