From bd014dcc5c3c2306b2244226a5c61980250f5d9b Mon Sep 17 00:00:00 2001 From: Dirkjan Ochtman Date: Thu, 22 Apr 2021 14:54:54 +0200 Subject: [PATCH] Move logarithm conversion into score() --- instant-segment/src/lib.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/instant-segment/src/lib.rs b/instant-segment/src/lib.rs index 658141f..74a9976 100644 --- a/instant-segment/src/lib.rs +++ b/instant-segment/src/lib.rs @@ -73,7 +73,7 @@ impl Segmenter { // Conditional probability of the word given the previous // word. The technical name is "stupid backoff" and it's // not a probability distribution but it works well in practice. - return (bi / self.bi_total) / (uni / self.uni_total); + return ((bi / self.bi_total) / (uni / self.uni_total)).log10(); } } } @@ -85,6 +85,7 @@ impl Segmenter { // to their length, a crucial heuristic. None => 10.0 / (self.uni_total * 10.0f64.powi(word.len() as i32)), } + .log10() } /// Customize the word length `limit` @@ -142,7 +143,7 @@ impl<'a> SegmentState<'a> { for split in 1..(range.len().min(self.data.limit) + 1) { let (start, split, end) = (range.start, range.start + split, range.end); let previous = previous.clone().map(|range| &self.text[range]); - let prefix_score = self.data.score(&self.text[start..split], previous).log10(); + let prefix_score = self.data.score(&self.text[start..split], previous); let key = ( (start - self.offset) as u8,