Optimize search algorithm by keeping track of furthest point

This commit is contained in:
Dirkjan Ochtman 2020-12-14 16:52:44 +01:00
parent 0e0ffe201e
commit 40af226a20
1 changed files with 22 additions and 16 deletions

View File

@ -179,6 +179,8 @@ pub struct Search {
nearest: Vec<Candidate>, nearest: Vec<Candidate>,
/// Maximum number of nearest neighbors to retain (`ef` in the paper) /// Maximum number of nearest neighbors to retain (`ef` in the paper)
num: usize, num: usize,
/// Current furthest node in `nearest`
furthest: OrderedFloat<f32>,
} }
impl Search { impl Search {
@ -200,32 +202,32 @@ impl Search {
let other = &points[pid]; let other = &points[pid];
let distance = OrderedFloat::from(point.distance(other)); let distance = OrderedFloat::from(point.distance(other));
if self.nearest.len() >= self.num { if self.nearest.len() >= self.num && distance > self.furthest {
if let Some(found) = self.nearest.last() { return;
if distance > found.distance {
return;
}
}
} }
if self.nearest.len() > self.num { if self.nearest.len() > self.num * 2 {
self.nearest.pop(); self.nearest.sort_unstable();
self.nearest.truncate(self.num);
self.furthest = self.nearest.last().unwrap().distance;
} }
let new = Candidate { distance, pid }; let new = Candidate { distance, pid };
let idx = self.candidates.binary_search(&new).unwrap_or_else(|e| e); self.candidates.push(new);
self.candidates.insert(idx, new); self.nearest.push(new);
self.furthest = max(self.furthest, distance);
let idx = self.nearest.binary_search(&new).unwrap_or_else(|e| e);
self.nearest.insert(idx, new);
} }
/// Lower the search to the next lower level /// Lower the search to the next lower level
/// ///
/// Re-initialize the `Search`: `nearest`, the output `W` from the last round, now becomes /// Re-initialize the `Search`: `nearest`, the output `W` from the last round, now becomes
/// the set of enter points, which we use to initialize both `candidates` and `visited`. /// the set of enter points, which we use to initialize both `candidates` and `visited`.
///
/// Invariant: `nearest` should be sorted before this is called. This is generally the case
/// because `Layer::search()` is always called right before calling `cull()`.
fn cull(&mut self) { fn cull(&mut self) {
self.nearest.truncate(self.num); // Limit size of the set of nearest neighbors self.nearest.truncate(self.num); // Limit size of the set of nearest neighbors
self.furthest = self.nearest.last().unwrap().distance;
self.candidates.clear(); self.candidates.clear();
self.candidates.extend(&self.nearest); self.candidates.extend(&self.nearest);
self.visited.clear(); self.visited.clear();
@ -240,6 +242,7 @@ impl Default for Search {
candidates: Vec::new(), candidates: Vec::new(),
nearest: Vec::new(), nearest: Vec::new(),
num: 1, num: 1,
furthest: OrderedFloat::from(f32::INFINITY),
} }
} }
} }
@ -328,10 +331,14 @@ trait Layer {
} }
} }
for pid in self.nodes()[candidate.pid.0 as usize].nearest_iter().take(num) { let node = &self.nodes()[candidate.pid.0 as usize];
for pid in node.nearest_iter().take(num) {
search.push(pid, point, points); search.push(pid, point, points);
} }
} }
search.nearest.sort_unstable();
search.nearest.truncate(search.num);
} }
/// Insert new node in this layer /// Insert new node in this layer
@ -370,8 +377,7 @@ trait Layer {
_ => return Ordering::Greater, _ => return Ordering::Greater,
}; };
let third_distance = OrderedFloat::from(old.distance(&points[third.0 as usize])); distance.cmp(&old.distance(&points[third.0 as usize]).into())
distance.cmp(&third_distance)
}) })
.unwrap_or_else(|e| e); .unwrap_or_else(|e| e);