Avoid string allocations for search

This commit is contained in:
Dirkjan Ochtman 2020-05-28 19:56:13 +02:00
parent b9c8402b0c
commit 98a8368be6
1 changed files with 9 additions and 9 deletions

View File

@ -46,7 +46,7 @@ impl Segmenter {
for word in &window_words[..window_words.len().saturating_sub(5)] { for word in &window_words[..window_words.len().saturating_sub(5)] {
start += word.len(); start += word.len();
words.push(word.into()); words.push((*word).into());
} }
if end == clean.len() { if end == clean.len() {
@ -54,8 +54,8 @@ impl Segmenter {
} }
} }
let mut window_words = self.search(&clean[start..], "<s>", &mut memo).1; let window_words = self.search(&clean[start..], "<s>", &mut memo).1;
words.append(&mut window_words); words.extend(window_words.into_iter().map(|s| s.to_owned()));
words words
} }
@ -65,7 +65,7 @@ impl Segmenter {
text: &'b str, text: &'b str,
previous: &str, previous: &str,
memo: &'a mut MemoMap<'b>, memo: &'a mut MemoMap<'b>,
) -> (f64, Vec<String>) { ) -> (f64, Vec<&'b str>) {
if text.is_empty() { if text.is_empty() {
return (0.0, vec![]); return (0.0, vec![]);
} }
@ -76,11 +76,11 @@ impl Segmenter {
let pair = (suffix, prefix); let pair = (suffix, prefix);
let (suffix_score, suffix_words) = match memo.get(&pair) { let (suffix_score, suffix_words) = match memo.get(&pair) {
Some((score, words)) => (*score, words.clone()), Some((score, words)) => (*score, words.as_slice()),
None => { None => {
let (suffix_score, suffix_words) = self.search(&suffix, prefix, memo); let (suffix_score, suffix_words) = self.search(&suffix, prefix, memo);
memo.insert(pair, (suffix_score, suffix_words.clone())); let value = memo.entry(pair).or_insert((suffix_score, suffix_words));
(suffix_score, suffix_words) (suffix_score, value.1.as_slice())
} }
}; };
@ -88,7 +88,7 @@ impl Segmenter {
if score > best.0 { if score > best.0 {
best.0 = score; best.0 = score;
best.1.clear(); best.1.clear();
best.1.push(prefix.to_owned()); best.1.push(prefix);
best.1.extend(suffix_words); best.1.extend(suffix_words);
} }
} }
@ -235,7 +235,7 @@ pub enum ParseError {
String(String), String(String),
} }
type MemoMap<'a> = HashMap<(&'a str, &'a str), (f64, Vec<String>)>; type MemoMap<'a> = HashMap<(&'a str, &'a str), (f64, Vec<&'a str>)>;
const DEFAULT_LIMIT: usize = 24; const DEFAULT_LIMIT: usize = 24;
const DEFAULT_TOTAL: f64 = 1_024_908_267_229.0; const DEFAULT_TOTAL: f64 = 1_024_908_267_229.0;