Avoid string allocations for search
This commit is contained in:
parent
b9c8402b0c
commit
98a8368be6
18
src/lib.rs
18
src/lib.rs
|
@ -46,7 +46,7 @@ impl Segmenter {
|
||||||
|
|
||||||
for word in &window_words[..window_words.len().saturating_sub(5)] {
|
for word in &window_words[..window_words.len().saturating_sub(5)] {
|
||||||
start += word.len();
|
start += word.len();
|
||||||
words.push(word.into());
|
words.push((*word).into());
|
||||||
}
|
}
|
||||||
|
|
||||||
if end == clean.len() {
|
if end == clean.len() {
|
||||||
|
@ -54,8 +54,8 @@ impl Segmenter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut window_words = self.search(&clean[start..], "<s>", &mut memo).1;
|
let window_words = self.search(&clean[start..], "<s>", &mut memo).1;
|
||||||
words.append(&mut window_words);
|
words.extend(window_words.into_iter().map(|s| s.to_owned()));
|
||||||
words
|
words
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -65,7 +65,7 @@ impl Segmenter {
|
||||||
text: &'b str,
|
text: &'b str,
|
||||||
previous: &str,
|
previous: &str,
|
||||||
memo: &'a mut MemoMap<'b>,
|
memo: &'a mut MemoMap<'b>,
|
||||||
) -> (f64, Vec<String>) {
|
) -> (f64, Vec<&'b str>) {
|
||||||
if text.is_empty() {
|
if text.is_empty() {
|
||||||
return (0.0, vec![]);
|
return (0.0, vec![]);
|
||||||
}
|
}
|
||||||
|
@ -76,11 +76,11 @@ impl Segmenter {
|
||||||
let pair = (suffix, prefix);
|
let pair = (suffix, prefix);
|
||||||
|
|
||||||
let (suffix_score, suffix_words) = match memo.get(&pair) {
|
let (suffix_score, suffix_words) = match memo.get(&pair) {
|
||||||
Some((score, words)) => (*score, words.clone()),
|
Some((score, words)) => (*score, words.as_slice()),
|
||||||
None => {
|
None => {
|
||||||
let (suffix_score, suffix_words) = self.search(&suffix, prefix, memo);
|
let (suffix_score, suffix_words) = self.search(&suffix, prefix, memo);
|
||||||
memo.insert(pair, (suffix_score, suffix_words.clone()));
|
let value = memo.entry(pair).or_insert((suffix_score, suffix_words));
|
||||||
(suffix_score, suffix_words)
|
(suffix_score, value.1.as_slice())
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -88,7 +88,7 @@ impl Segmenter {
|
||||||
if score > best.0 {
|
if score > best.0 {
|
||||||
best.0 = score;
|
best.0 = score;
|
||||||
best.1.clear();
|
best.1.clear();
|
||||||
best.1.push(prefix.to_owned());
|
best.1.push(prefix);
|
||||||
best.1.extend(suffix_words);
|
best.1.extend(suffix_words);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -235,7 +235,7 @@ pub enum ParseError {
|
||||||
String(String),
|
String(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
type MemoMap<'a> = HashMap<(&'a str, &'a str), (f64, Vec<String>)>;
|
type MemoMap<'a> = HashMap<(&'a str, &'a str), (f64, Vec<&'a str>)>;
|
||||||
|
|
||||||
const DEFAULT_LIMIT: usize = 24;
|
const DEFAULT_LIMIT: usize = 24;
|
||||||
const DEFAULT_TOTAL: f64 = 1_024_908_267_229.0;
|
const DEFAULT_TOTAL: f64 = 1_024_908_267_229.0;
|
||||||
|
|
Loading…
Reference in New Issue