From 85035a9b34fbcb0cae3d7e58934d1dcf5342d228 Mon Sep 17 00:00:00 2001 From: Dirkjan Ochtman Date: Thu, 22 Apr 2021 14:58:06 +0200 Subject: [PATCH] Add Segmenter::sentence_score() method --- instant-segment/src/lib.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/instant-segment/src/lib.rs b/instant-segment/src/lib.rs index 74a9976..0d1da88 100644 --- a/instant-segment/src/lib.rs +++ b/instant-segment/src/lib.rs @@ -66,6 +66,20 @@ impl Segmenter { Ok(search.result.iter().map(|v| v.as_str())) } + /// Returns the sentence's score + /// + /// Returns the relative probability for the given sentence in the the corpus represented by + /// this `Segmenter`. Will return `None` iff given an empty iterator argument. + pub fn sentence_score<'a>(&self, mut words: impl Iterator) -> Option { + let mut prev = words.next()?; + let mut score = self.score(prev, None); + while let Some(word) = words.next() { + score += self.score(word, Some(prev)); + prev = word; + } + Some(score) + } + fn score(&self, word: &str, previous: Option<&str>) -> f64 { if let Some(prev) = previous { if let Some(bi) = self.bigrams.get(&(prev.into(), word.into())) {