Make Segmenter::score() slightly more efficient

2020-11-25 13:10:13 +01:00 · 2020-11-25 13:10:13 +01:00 · ea4438f2e8
parent 540348f703
commit ea4438f2e8
1 changed files with 17 additions and 19 deletions
--- a/src/lib.rs
+++ b/src/lib.rs
@ -42,25 +42,23 @@ impl Segmenter {
    }
    fn score(&self, word: &str, previous: Option<&str>) -> f64 {
-        match previous {
+        if let Some(prev) = previous {
-            None => match self.unigrams.get(word) {
+            if let Some(pb) = self.bigrams.get(&(prev.into(), word.into())) {
-                // Probabibility of the given word
+                if self.unigrams.get(prev).is_some() {
-                Some(p) => p / self.total,
+                    // Conditional probability of the word given the previous
-                // Penalize words not found in the unigrams according
+                    // word. The technical name is "stupid backoff" and it's
-                // to their length, a crucial heuristic.
+                    // not a probability distribution but it works well in practice.
-                None => 10.0 / (self.total * 10.0f64.powf(word.len() as f64)),
+                    return pb / self.total / self.score(prev, None);
-            },
+                }
-            Some(prev) => match (
+            }
-                self.bigrams.get(&(prev.into(), word.into())),
+        }
-                self.unigrams.get(prev),
+
-            ) {
+        match self.unigrams.get(word) {
-                // Conditional probability of the word given the previous
+            // Probability of the given word
-                // word. The technical name is "stupid backoff" and it's
+            Some(p) => p / self.total,
-                // not a probability distribution but it works well in practice.
+            // Penalize words not found in the unigrams according
-                (Some(pb), Some(_)) => pb / self.total / self.score(prev, None),
+            // to their length, a crucial heuristic.
-                // Fall back to using the unigram probability
+            None => 10.0 / (self.total * 10.0f64.powf(word.len() as f64)),
                _ => self.score(word, None),
            },
        }
    }