Simplify bigram scoring algorithm

This commit is contained in:
Dirkjan Ochtman 2020-12-07 14:24:33 +01:00
parent f26793379b
commit c571996925
1 changed files with 3 additions and 3 deletions

View File

@ -42,12 +42,12 @@ impl Segmenter {
fn score(&self, word: &str, previous: Option<&str>) -> f64 { fn score(&self, word: &str, previous: Option<&str>) -> f64 {
if let Some(prev) = previous { if let Some(prev) = previous {
if let Some(pb) = self.bigrams.get(&(prev.into(), word.into())) { if let Some(bi) = self.bigrams.get(&(prev.into(), word.into())) {
if self.unigrams.get(prev).is_some() { if let Some(uni) = self.unigrams.get(prev) {
// Conditional probability of the word given the previous // Conditional probability of the word given the previous
// word. The technical name is "stupid backoff" and it's // word. The technical name is "stupid backoff" and it's
// not a probability distribution but it works well in practice. // not a probability distribution but it works well in practice.
return pb / self.total / self.score(prev, None); return (bi / self.total) / (uni / self.total);
} }
} }
} }