Make number of layers and sizes deterministic
This commit is contained in:
parent
a99c61fee5
commit
2f7ddad3a2
72
src/lib.rs
72
src/lib.rs
|
@ -9,7 +9,7 @@ use indicatif::ProgressBar;
|
||||||
use ordered_float::OrderedFloat;
|
use ordered_float::OrderedFloat;
|
||||||
use parking_lot::{Mutex, RwLock};
|
use parking_lot::{Mutex, RwLock};
|
||||||
use rand::rngs::SmallRng;
|
use rand::rngs::SmallRng;
|
||||||
use rand::SeedableRng;
|
use rand::{Rng, SeedableRng};
|
||||||
use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator};
|
use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator};
|
||||||
#[cfg(feature = "serde")]
|
#[cfg(feature = "serde")]
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
@ -155,49 +155,52 @@ where
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Determine the number and size of layers.
|
||||||
|
|
||||||
|
let mut sizes = Vec::new();
|
||||||
|
let mut num = points.len();
|
||||||
|
loop {
|
||||||
|
let next = (num as f32 * ml) as usize;
|
||||||
|
if next < M {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
sizes.push((num - next, num));
|
||||||
|
num = next;
|
||||||
|
}
|
||||||
|
sizes.push((num, num));
|
||||||
|
sizes.reverse();
|
||||||
|
|
||||||
// Give all points a random layer and sort the list of nodes by descending order for
|
// Give all points a random layer and sort the list of nodes by descending order for
|
||||||
// construction. This allows us to copy higher layers to lower layers as construction
|
// construction. This allows us to copy higher layers to lower layers as construction
|
||||||
// progresses, while preserving randomness in each point's layer and insertion order.
|
// progresses, while preserving randomness in each point's layer and insertion order.
|
||||||
|
|
||||||
assert!(points.len() < u32::MAX as usize);
|
assert!(points.len() < u32::MAX as usize);
|
||||||
let mut nodes = (0..points.len())
|
let mut shuffled = (0..points.len())
|
||||||
.map(|i| (LayerId::random(ml, &mut rng), i))
|
.map(|i| (PointId(rng.gen_range(0..points.len() as u32)), i))
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
nodes.sort_unstable_by_key(|&n| Reverse(n));
|
shuffled.sort_unstable();
|
||||||
|
|
||||||
// Find out how many layers are needed, so that we can discard empty layers in the next
|
|
||||||
// step. Since layer IDs are randomly generated, there might be big gaps.
|
|
||||||
|
|
||||||
let (mut num_layers, mut prev) = (1, nodes[0].0);
|
|
||||||
for (layer, _) in nodes.iter() {
|
|
||||||
if *layer != prev {
|
|
||||||
num_layers += 1;
|
|
||||||
prev = *layer;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sort the original `points` in layer order.
|
|
||||||
// TODO: maybe optimize this? https://crates.io/crates/permutation
|
|
||||||
|
|
||||||
let mut cur_layer = LayerId(num_layers - 1);
|
|
||||||
let mut prev_layer = nodes[0].0;
|
|
||||||
let mut new_points = Vec::with_capacity(points.len());
|
let mut new_points = Vec::with_capacity(points.len());
|
||||||
let mut new_nodes = Vec::with_capacity(points.len());
|
let mut new_nodes = Vec::with_capacity(points.len());
|
||||||
let mut out = vec![INVALID; points.len()];
|
let mut out = vec![INVALID; points.len()];
|
||||||
for (i, &(layer, idx)) in nodes.iter().enumerate() {
|
for (_, idx) in shuffled {
|
||||||
if prev_layer != layer {
|
let pid = PointId(new_nodes.len() as u32);
|
||||||
cur_layer = LayerId(cur_layer.0 - 1);
|
let layer = sizes
|
||||||
prev_layer = layer;
|
.iter()
|
||||||
}
|
.enumerate()
|
||||||
|
.find_map(|(i, &size)| match (pid.0 as usize) < size.1 {
|
||||||
|
true => Some(i),
|
||||||
|
false => None,
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let pid = PointId(i as u32);
|
|
||||||
new_points.push(points[idx].clone());
|
new_points.push(points[idx].clone());
|
||||||
new_nodes.push((cur_layer, pid));
|
new_nodes.push((LayerId(sizes.len() - layer - 1), pid));
|
||||||
out[idx] = pid;
|
out[idx] = pid;
|
||||||
}
|
}
|
||||||
let (points, nodes) = (new_points, new_nodes);
|
let (points, nodes) = (new_points, new_nodes);
|
||||||
debug_assert_eq!(nodes.last().unwrap().0, LayerId(0));
|
debug_assert_eq!(nodes.last().unwrap().0, LayerId(0));
|
||||||
debug_assert_eq!(nodes.first().unwrap().0, LayerId(num_layers - 1));
|
debug_assert_eq!(nodes.first().unwrap().0, LayerId(sizes.len() - 1));
|
||||||
|
|
||||||
// The layer from the first node is our top layer, or the zero layer if we have no nodes.
|
// The layer from the first node is our top layer, or the zero layer if we have no nodes.
|
||||||
|
|
||||||
|
@ -209,17 +212,12 @@ where
|
||||||
// Figure out how many nodes will go on each layer. This helps us allocate memory capacity
|
// Figure out how many nodes will go on each layer. This helps us allocate memory capacity
|
||||||
// for each layer in advance, and also helps enable batch insertion of points.
|
// for each layer in advance, and also helps enable batch insertion of points.
|
||||||
|
|
||||||
let mut sizes = vec![0; top.0 + 1];
|
let num_layers = sizes.len();
|
||||||
for (layer, _) in nodes.iter().copied() {
|
|
||||||
sizes[layer.0] += 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut start = 0;
|
|
||||||
let mut ranges = Vec::with_capacity(top.0);
|
let mut ranges = Vec::with_capacity(top.0);
|
||||||
for (i, size) in sizes.into_iter().enumerate().rev() {
|
for (i, (size, cumulative)) in sizes.into_iter().enumerate() {
|
||||||
|
let start = cumulative - size;
|
||||||
// Skip the first point, since we insert the enter point separately
|
// Skip the first point, since we insert the enter point separately
|
||||||
ranges.push((LayerId(i), max(start, 1)..start + size));
|
ranges.push((LayerId(num_layers - i - 1), max(start, 1)..cumulative));
|
||||||
start += size;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Insert the first point so that we have an enter point to start searches with.
|
// Insert the first point so that we have an enter point to start searches with.
|
||||||
|
|
|
@ -3,8 +3,6 @@ use std::ops::{Deref, Index};
|
||||||
|
|
||||||
use ordered_float::OrderedFloat;
|
use ordered_float::OrderedFloat;
|
||||||
use parking_lot::{MappedRwLockReadGuard, RwLock, RwLockReadGuard};
|
use parking_lot::{MappedRwLockReadGuard, RwLock, RwLockReadGuard};
|
||||||
use rand::rngs::SmallRng;
|
|
||||||
use rand::Rng;
|
|
||||||
#[cfg(feature = "serde")]
|
#[cfg(feature = "serde")]
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
#[cfg(feature = "serde-big-array")]
|
#[cfg(feature = "serde-big-array")]
|
||||||
|
@ -200,11 +198,6 @@ where
|
||||||
pub(crate) struct LayerId(pub usize);
|
pub(crate) struct LayerId(pub usize);
|
||||||
|
|
||||||
impl LayerId {
|
impl LayerId {
|
||||||
pub(crate) fn random(ml: f32, rng: &mut SmallRng) -> Self {
|
|
||||||
let layer = rng.gen::<f32>();
|
|
||||||
LayerId((-layer.ln() * ml).floor() as usize)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn descend(&self) -> impl Iterator<Item = LayerId> {
|
pub(crate) fn descend(&self) -> impl Iterator<Item = LayerId> {
|
||||||
DescendingLayerIter { next: Some(self.0) }
|
DescendingLayerIter { next: Some(self.0) }
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue