Make Segmenter::score() slightly more efficient

2020-11-25 13:10:13 +01:00 · 2020-11-25 13:10:13 +01:00 · ea4438f2e8
parent 540348f703
commit ea4438f2e8
1 changed files with 17 additions and 19 deletions
--- a/src/lib.rs
+++ b/src/lib.rs
@ -42,25 +42,23 @@ impl Segmenter {
    }
    fn score(&self, word: &str, previous: Option<&str>) -> f64 {
-        match previous {
+        if let Some(prev) = previous {
-            None => match self.unigrams.get(word) {
+            if let Some(pb) = self.bigrams.get(&(prev.into(), word.into())) {
-                // Probabibility of the given word
+                if self.unigrams.get(prev).is_some() {
                    // Conditional probability of the word given the previous
                    // word. The technical name is "stupid backoff" and it's
                    // not a probability distribution but it works well in practice.
                    return pb / self.total / self.score(prev, None);
                }
            }
        }
        match self.unigrams.get(word) {
            // Probability of the given word
            Some(p) => p / self.total,
            // Penalize words not found in the unigrams according
            // to their length, a crucial heuristic.
            None => 10.0 / (self.total * 10.0f64.powf(word.len() as f64)),
            },
            Some(prev) => match (
                self.bigrams.get(&(prev.into(), word.into())),
                self.unigrams.get(prev),
            ) {
                // Conditional probability of the word given the previous
                // word. The technical name is "stupid backoff" and it's
                // not a probability distribution but it works well in practice.
                (Some(pb), Some(_)) => pb / self.total / self.score(prev, None),
                // Fall back to using the unigram probability
                _ => self.score(word, None),
            },
        }
    }