Move logarithm conversion into score()
This commit is contained in:
parent
85038d1f6f
commit
bd014dcc5c
|
@ -73,7 +73,7 @@ impl Segmenter {
|
||||||
// Conditional probability of the word given the previous
|
// Conditional probability of the word given the previous
|
||||||
// word. The technical name is "stupid backoff" and it's
|
// word. The technical name is "stupid backoff" and it's
|
||||||
// not a probability distribution but it works well in practice.
|
// not a probability distribution but it works well in practice.
|
||||||
return (bi / self.bi_total) / (uni / self.uni_total);
|
return ((bi / self.bi_total) / (uni / self.uni_total)).log10();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -85,6 +85,7 @@ impl Segmenter {
|
||||||
// to their length, a crucial heuristic.
|
// to their length, a crucial heuristic.
|
||||||
None => 10.0 / (self.uni_total * 10.0f64.powi(word.len() as i32)),
|
None => 10.0 / (self.uni_total * 10.0f64.powi(word.len() as i32)),
|
||||||
}
|
}
|
||||||
|
.log10()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Customize the word length `limit`
|
/// Customize the word length `limit`
|
||||||
|
@ -142,7 +143,7 @@ impl<'a> SegmentState<'a> {
|
||||||
for split in 1..(range.len().min(self.data.limit) + 1) {
|
for split in 1..(range.len().min(self.data.limit) + 1) {
|
||||||
let (start, split, end) = (range.start, range.start + split, range.end);
|
let (start, split, end) = (range.start, range.start + split, range.end);
|
||||||
let previous = previous.clone().map(|range| &self.text[range]);
|
let previous = previous.clone().map(|range| &self.text[range]);
|
||||||
let prefix_score = self.data.score(&self.text[start..split], previous).log10();
|
let prefix_score = self.data.score(&self.text[start..split], previous);
|
||||||
|
|
||||||
let key = (
|
let key = (
|
||||||
(start - self.offset) as u8,
|
(start - self.offset) as u8,
|
||||||
|
|
Loading…
Reference in New Issue