Explicitly group state for HNSW construction

This commit is contained in:
Dirkjan Ochtman 2021-05-18 10:21:52 +02:00
parent c8a9529355
commit 20ca8b0f3a
1 changed files with 134 additions and 114 deletions

View File

@ -280,74 +280,43 @@ where
.map(|_| RwLock::new(ZeroNode::default())) .map(|_| RwLock::new(ZeroNode::default()))
.collect::<Vec<_>>(); .collect::<Vec<_>>();
let pool = SearchPool::new(points.len()); let state = Construction {
#[cfg(feature = "indicatif")] zero: zero.as_slice(),
let done = AtomicUsize::new(0); pool: SearchPool::new(points.len()),
for (layer, range) in ranges { top,
let num = if layer.is_zero() { M * 2 } else { M }; points: &points,
heuristic,
ef_construction,
#[cfg(feature = "indicatif")] #[cfg(feature = "indicatif")]
if let Some(bar) = &progress { progress,
#[cfg(feature = "indicatif")]
done: AtomicUsize::new(0),
};
for (layer, range) in ranges {
#[cfg(feature = "indicatif")]
if let Some(bar) = &state.progress {
bar.set_message(format!("Building index (layer {})", layer.0)); bar.set_message(format!("Building index (layer {})", layer.0));
} }
let end = range.end; let end = range.end;
nodes[range].into_par_iter().for_each(|(_, pid)| { nodes[range].into_par_iter().for_each(|(_, pid)| {
let node = zero.as_slice()[*pid].write(); let node = state.zero[*pid].write();
let (mut search, mut insertion) = pool.pop(); state.insert(*pid, node, layer, &layers);
let point = &points.as_slice()[*pid];
search.reset();
search.push(PointId(0), point, &points);
for cur in top.descend() {
search.ef = if cur <= layer { ef_construction } else { 1 };
match cur > layer {
true => {
search.search(point, layers[cur.0 - 1].as_slice(), &points, num);
search.cull();
}
false => {
search.search(point, zero.as_slice(), &points, num);
break;
}
}
}
insertion.ef = ef_construction;
insert(
*pid,
node,
&mut insertion,
&mut search,
&zero,
&points,
&heuristic,
);
#[cfg(feature = "indicatif")]
if let Some(bar) = &progress {
let value = done.fetch_add(1, atomic::Ordering::Relaxed);
if value % 1000 == 0 {
bar.set_position(value as u64);
}
}
pool.push((search, insertion));
}); });
// For layers above the zero layer, make a copy of the current state of the zero layer // For layers above the zero layer, make a copy of the current state of the zero layer
// with `nearest` truncated to `M` elements. // with `nearest` truncated to `M` elements.
if !layer.is_zero() { if !layer.is_zero() {
let mut upper = Vec::new(); (&state.zero[..end])
(&zero[..end])
.into_par_iter() .into_par_iter()
.map(|zero| UpperNode::from_zero(&zero.read())) .map(|zero| UpperNode::from_zero(&zero.read()))
.collect_into_vec(&mut upper); .collect_into_vec(&mut layers[layer.0 - 1]);
layers[layer.0 - 1] = upper;
} }
} }
#[cfg(feature = "indicatif")] #[cfg(feature = "indicatif")]
if let Some(bar) = progress { if let Some(bar) = &state.progress {
bar.finish(); bar.finish();
} }
@ -408,76 +377,127 @@ where
} }
} }
/// Insert new node in the zero layer struct Construction<'a, P: Point> {
/// zero: &'a [RwLock<ZeroNode>],
/// * `new`: the `PointId` for the new node pool: SearchPool,
/// * `insertion`: a `Search` for shrinking a neighbor set (only used with heuristic neighbor selection) top: LayerId,
/// * `search`: the result for searching potential neighbors for the new node points: &'a [P],
/// * `layer` contains all the nodes at the current layer heuristic: Option<Heuristic>,
/// * `points` is a slice of all the points in the index ef_construction: usize,
/// #[cfg(feature = "indicatif")]
/// Creates the new node, initializing its `nearest` array and updates the nearest neighbors progress: Option<ProgressBar>,
/// for the new node's neighbors if necessary before appending the new node to the layer. #[cfg(feature = "indicatif")]
fn insert<P: Point>( done: AtomicUsize,
new: PointId, }
mut node: parking_lot::RwLockWriteGuard<ZeroNode>,
insertion: &mut Search, impl<'a, P: Point> Construction<'a, P> {
search: &mut Search, /// Insert new node in the zero layer
layer: &[RwLock<ZeroNode>], ///
points: &[P], /// * `new`: the `PointId` for the new node
heuristic: &Option<Heuristic>, /// * `insertion`: a `Search` for shrinking a neighbor set (only used with heuristic neighbor selection)
) { /// * `search`: the result for searching potential neighbors for the new node
let found = match heuristic { /// * `layer` contains all the nodes at the current layer
None => { /// * `points` is a slice of all the points in the index
let candidates = search.select_simple(); ///
&candidates[..Ord::min(candidates.len(), M * 2)] /// Creates the new node, initializing its `nearest` array and updates the nearest neighbors
/// for the new node's neighbors if necessary before appending the new node to the layer.
fn insert(
&self,
new: PointId,
mut node: parking_lot::RwLockWriteGuard<ZeroNode>,
layer: LayerId,
layers: &[Vec<UpperNode>],
) {
let (mut search, mut insertion) = self.pool.pop();
insertion.ef = self.ef_construction;
let point = &self.points[new];
search.reset();
search.push(PointId(0), point, &self.points);
let num = if layer.is_zero() { M * 2 } else { M };
for cur in self.top.descend() {
search.ef = if cur <= layer {
self.ef_construction
} else {
1
};
match cur > layer {
true => {
search.search(point, layers[cur.0 - 1].as_slice(), &self.points, num);
search.cull();
}
false => {
search.search(point, self.zero, &self.points, num);
break;
}
}
} }
Some(heuristic) => search.select_heuristic(&points[new], layer, points, *heuristic),
};
// Just make sure the candidates are all unique let found = match self.heuristic {
debug_assert_eq!( None => {
found.len(), let candidates = search.select_simple();
found.iter().map(|c| c.pid).collect::<HashSet<_>>().len() &candidates[..Ord::min(candidates.len(), M * 2)]
); }
Some(heuristic) => {
search.select_heuristic(&self.points[new], self.zero, self.points, heuristic)
}
};
for (i, candidate) in found.iter().enumerate() { // Just make sure the candidates are all unique
// `candidate` here is the new node's neighbor debug_assert_eq!(
let &Candidate { distance, pid } = candidate; found.len(),
if let Some(heuristic) = heuristic { found.iter().map(|c| c.pid).collect::<HashSet<_>>().len()
let found = insertion.add_neighbor_heuristic( );
new,
layer.nearest_iter(pid),
layer,
&points[pid],
points,
*heuristic,
);
layer[pid] for (i, candidate) in found.iter().enumerate() {
.write() // `candidate` here is the new node's neighbor
.rewrite(found.iter().map(|candidate| candidate.pid)); let &Candidate { distance, pid } = candidate;
node.set(i, pid); if let Some(heuristic) = self.heuristic {
} else { let found = insertion.add_neighbor_heuristic(
// Find the correct index to insert at to keep the neighbor's neighbors sorted new,
let old = &points[pid]; self.zero.nearest_iter(pid),
let idx = layer[pid] self.zero,
.read() &self.points[pid],
.binary_search_by(|third| { self.points,
// `third` here is one of the neighbors of the new node's neighbor. heuristic,
let third = match third { );
pid if pid.is_valid() => *pid,
// if `third` is `None`, our new `node` is always "closer"
_ => return Ordering::Greater,
};
distance.cmp(&old.distance(&points[third]).into()) self.zero[pid]
}) .write()
.unwrap_or_else(|e| e); .rewrite(found.iter().map(|candidate| candidate.pid));
node.set(i, pid);
} else {
// Find the correct index to insert at to keep the neighbor's neighbors sorted
let old = &self.points[pid];
let idx = self.zero[pid]
.read()
.binary_search_by(|third| {
// `third` here is one of the neighbors of the new node's neighbor.
let third = match third {
pid if pid.is_valid() => *pid,
// if `third` is `None`, our new `node` is always "closer"
_ => return Ordering::Greater,
};
layer[pid].write().insert(idx, new); distance.cmp(&old.distance(&self.points[third]).into())
node.set(i, pid); })
.unwrap_or_else(|e| e);
self.zero[pid].write().insert(idx, new);
node.set(i, pid);
}
} }
#[cfg(feature = "indicatif")]
if let Some(bar) = &self.progress {
let value = self.done.fetch_add(1, atomic::Ordering::Relaxed);
if value % 1000 == 0 {
bar.set_position(value as u64);
}
}
self.pool.push((search, insertion));
} }
} }