Optimize search algorithm by keeping track of furthest point
This commit is contained in:
parent
0e0ffe201e
commit
40af226a20
38
src/lib.rs
38
src/lib.rs
|
@ -179,6 +179,8 @@ pub struct Search {
|
||||||
nearest: Vec<Candidate>,
|
nearest: Vec<Candidate>,
|
||||||
/// Maximum number of nearest neighbors to retain (`ef` in the paper)
|
/// Maximum number of nearest neighbors to retain (`ef` in the paper)
|
||||||
num: usize,
|
num: usize,
|
||||||
|
/// Current furthest node in `nearest`
|
||||||
|
furthest: OrderedFloat<f32>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Search {
|
impl Search {
|
||||||
|
@ -200,32 +202,32 @@ impl Search {
|
||||||
|
|
||||||
let other = &points[pid];
|
let other = &points[pid];
|
||||||
let distance = OrderedFloat::from(point.distance(other));
|
let distance = OrderedFloat::from(point.distance(other));
|
||||||
if self.nearest.len() >= self.num {
|
if self.nearest.len() >= self.num && distance > self.furthest {
|
||||||
if let Some(found) = self.nearest.last() {
|
return;
|
||||||
if distance > found.distance {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if self.nearest.len() > self.num {
|
if self.nearest.len() > self.num * 2 {
|
||||||
self.nearest.pop();
|
self.nearest.sort_unstable();
|
||||||
|
self.nearest.truncate(self.num);
|
||||||
|
self.furthest = self.nearest.last().unwrap().distance;
|
||||||
}
|
}
|
||||||
|
|
||||||
let new = Candidate { distance, pid };
|
let new = Candidate { distance, pid };
|
||||||
let idx = self.candidates.binary_search(&new).unwrap_or_else(|e| e);
|
self.candidates.push(new);
|
||||||
self.candidates.insert(idx, new);
|
self.nearest.push(new);
|
||||||
|
self.furthest = max(self.furthest, distance);
|
||||||
let idx = self.nearest.binary_search(&new).unwrap_or_else(|e| e);
|
|
||||||
self.nearest.insert(idx, new);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Lower the search to the next lower level
|
/// Lower the search to the next lower level
|
||||||
///
|
///
|
||||||
/// Re-initialize the `Search`: `nearest`, the output `W` from the last round, now becomes
|
/// Re-initialize the `Search`: `nearest`, the output `W` from the last round, now becomes
|
||||||
/// the set of enter points, which we use to initialize both `candidates` and `visited`.
|
/// the set of enter points, which we use to initialize both `candidates` and `visited`.
|
||||||
|
///
|
||||||
|
/// Invariant: `nearest` should be sorted before this is called. This is generally the case
|
||||||
|
/// because `Layer::search()` is always called right before calling `cull()`.
|
||||||
fn cull(&mut self) {
|
fn cull(&mut self) {
|
||||||
self.nearest.truncate(self.num); // Limit size of the set of nearest neighbors
|
self.nearest.truncate(self.num); // Limit size of the set of nearest neighbors
|
||||||
|
self.furthest = self.nearest.last().unwrap().distance;
|
||||||
self.candidates.clear();
|
self.candidates.clear();
|
||||||
self.candidates.extend(&self.nearest);
|
self.candidates.extend(&self.nearest);
|
||||||
self.visited.clear();
|
self.visited.clear();
|
||||||
|
@ -240,6 +242,7 @@ impl Default for Search {
|
||||||
candidates: Vec::new(),
|
candidates: Vec::new(),
|
||||||
nearest: Vec::new(),
|
nearest: Vec::new(),
|
||||||
num: 1,
|
num: 1,
|
||||||
|
furthest: OrderedFloat::from(f32::INFINITY),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -328,10 +331,14 @@ trait Layer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for pid in self.nodes()[candidate.pid.0 as usize].nearest_iter().take(num) {
|
let node = &self.nodes()[candidate.pid.0 as usize];
|
||||||
|
for pid in node.nearest_iter().take(num) {
|
||||||
search.push(pid, point, points);
|
search.push(pid, point, points);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
search.nearest.sort_unstable();
|
||||||
|
search.nearest.truncate(search.num);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Insert new node in this layer
|
/// Insert new node in this layer
|
||||||
|
@ -370,8 +377,7 @@ trait Layer {
|
||||||
_ => return Ordering::Greater,
|
_ => return Ordering::Greater,
|
||||||
};
|
};
|
||||||
|
|
||||||
let third_distance = OrderedFloat::from(old.distance(&points[third.0 as usize]));
|
distance.cmp(&old.distance(&points[third.0 as usize]).into())
|
||||||
distance.cmp(&third_distance)
|
|
||||||
})
|
})
|
||||||
.unwrap_or_else(|e| e);
|
.unwrap_or_else(|e| e);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue