mirror of
https://github.com/instant-labs/instant-segment.git
synced 2025-01-19 15:29:05 +00:00
Simplify bigram scoring algorithm
This commit is contained in:
parent
f26793379b
commit
c571996925
@ -42,12 +42,12 @@ impl Segmenter {
|
||||
|
||||
fn score(&self, word: &str, previous: Option<&str>) -> f64 {
|
||||
if let Some(prev) = previous {
|
||||
if let Some(pb) = self.bigrams.get(&(prev.into(), word.into())) {
|
||||
if self.unigrams.get(prev).is_some() {
|
||||
if let Some(bi) = self.bigrams.get(&(prev.into(), word.into())) {
|
||||
if let Some(uni) = self.unigrams.get(prev) {
|
||||
// Conditional probability of the word given the previous
|
||||
// word. The technical name is "stupid backoff" and it's
|
||||
// not a probability distribution but it works well in practice.
|
||||
return pb / self.total / self.score(prev, None);
|
||||
return (bi / self.total) / (uni / self.total);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user