Better typed handling of previous word
This commit is contained in:
parent
ea4438f2e8
commit
ead9a3064b
10
src/lib.rs
10
src/lib.rs
|
@ -96,7 +96,7 @@ impl<'a> SegmentState<'a> {
|
||||||
loop {
|
loop {
|
||||||
end = self.text.len().min(end + SEGMENT_SIZE);
|
end = self.text.len().min(end + SEGMENT_SIZE);
|
||||||
let prefix = &self.text[start..end];
|
let prefix = &self.text[start..end];
|
||||||
let window_words = self.search(&prefix, "<s>").1;
|
let window_words = self.search(&prefix, None).1;
|
||||||
|
|
||||||
for word in &window_words[..window_words.len().saturating_sub(5)] {
|
for word in &window_words[..window_words.len().saturating_sub(5)] {
|
||||||
start += word.len();
|
start += word.len();
|
||||||
|
@ -108,26 +108,26 @@ impl<'a> SegmentState<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let window_words = self.search(&self.text[start..], "<s>").1;
|
let window_words = self.search(&self.text[start..], None).1;
|
||||||
self.result
|
self.result
|
||||||
.extend(window_words.into_iter().map(|s| s.into()));
|
.extend(window_words.into_iter().map(|s| s.into()));
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Score `word` in the context of `previous` word
|
/// Score `word` in the context of `previous` word
|
||||||
fn search(&mut self, text: &'a str, previous: &str) -> (f64, Vec<&'a str>) {
|
fn search(&mut self, text: &'a str, previous: Option<&str>) -> (f64, Vec<&'a str>) {
|
||||||
if text.is_empty() {
|
if text.is_empty() {
|
||||||
return (0.0, vec![]);
|
return (0.0, vec![]);
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut best = (f64::MIN, vec![]);
|
let mut best = (f64::MIN, vec![]);
|
||||||
for (prefix, suffix) in TextDivider::new(text, self.data.limit) {
|
for (prefix, suffix) in TextDivider::new(text, self.data.limit) {
|
||||||
let prefix_score = self.data.score(prefix, Some(previous)).log10();
|
let prefix_score = self.data.score(prefix, previous).log10();
|
||||||
let pair = (suffix, prefix);
|
let pair = (suffix, prefix);
|
||||||
|
|
||||||
let (suffix_score, suffix_words) = match self.memo.get(&pair) {
|
let (suffix_score, suffix_words) = match self.memo.get(&pair) {
|
||||||
Some((score, words)) => (*score, words.as_slice()),
|
Some((score, words)) => (*score, words.as_slice()),
|
||||||
None => {
|
None => {
|
||||||
let (suffix_score, suffix_words) = self.search(&suffix, prefix);
|
let (suffix_score, suffix_words) = self.search(&suffix, Some(prefix));
|
||||||
let value = self
|
let value = self
|
||||||
.memo
|
.memo
|
||||||
.entry(pair)
|
.entry(pair)
|
||||||
|
|
Loading…
Reference in New Issue