diff --git a/CHANGELOG.md b/CHANGELOG.md index b80b9bcc1e8..bb0ae410c59 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ You may also find the [Upgrade Guide](https://rust-random.github.io/book/update. - Fix feature `simd_support` for recent nightly rust (#1586) - Add `Alphabetic` distribution. (#1587) - Re-export `rand_core` (#1602) +- Allow `fn rand::seq::index::sample_weighted` and `fn IndexedRandom::choose_multiple_weighted` to return fewer than `amount` results (#1623), reverting an undocumented change (#1382) to the previous release. ## [0.9.0] - 2025-01-27 ### Security and unsafe diff --git a/src/seq/index.rs b/src/seq/index.rs index 852bdac76c4..7dd0513850c 100644 --- a/src/seq/index.rs +++ b/src/seq/index.rs @@ -282,10 +282,11 @@ where } } -/// Randomly sample exactly `amount` distinct indices from `0..length` +/// Randomly sample `amount` distinct indices from `0..length` /// -/// Results are in arbitrary order (there is no guarantee of shuffling or -/// ordering). +/// The result may contain less than `amount` indices if insufficient non-zero +/// weights are available. Results are returned in an arbitrary order (there is +/// no guarantee of shuffling or ordering). /// /// Function `weight` is called once for each index to provide weights. /// @@ -295,7 +296,6 @@ where /// /// Error cases: /// - [`WeightError::InvalidWeight`] when a weight is not-a-number or negative. -/// - [`WeightError::InsufficientNonZero`] when fewer than `amount` weights are positive. /// /// This implementation uses `O(length + amount)` space and `O(length)` time. #[cfg(feature = "std")] @@ -328,10 +328,13 @@ where } } -/// Randomly sample exactly `amount` distinct indices from `0..length`, and -/// return them in an arbitrary order (there is no guarantee of shuffling or -/// ordering). The weights are to be provided by the input function `weights`, -/// which will be called once for each index. +/// Randomly sample `amount` distinct indices from `0..length` +/// +/// The result may contain less than `amount` indices if insufficient non-zero +/// weights are available. Results are returned in an arbitrary order (there is +/// no guarantee of shuffling or ordering). +/// +/// Function `weight` is called once for each index to provide weights. /// /// This implementation is based on the algorithm A-ExpJ as found in /// [Efraimidis and Spirakis, 2005](https://doi.org/10.1016/j.ipl.2005.11.003). @@ -339,7 +342,6 @@ where /// /// Error cases: /// - [`WeightError::InvalidWeight`] when a weight is not-a-number or negative. -/// - [`WeightError::InsufficientNonZero`] when fewer than `amount` weights are positive. #[cfg(feature = "std")] fn sample_efraimidis_spirakis( rng: &mut R, @@ -403,28 +405,26 @@ where index += N::one(); } - if candidates.len() < amount.as_usize() { - return Err(WeightError::InsufficientNonZero); - } + if index < length { + let mut x = rng.random::().ln() / candidates.peek().unwrap().key; + while index < length { + let weight = weight(index.as_usize()).into(); + if weight > 0.0 { + x -= weight; + if x <= 0.0 { + let min_candidate = candidates.pop().unwrap(); + let t = (min_candidate.key * weight).exp(); + let key = rng.random_range(t..1.0).ln() / weight; + candidates.push(Element { index, key }); - let mut x = rng.random::().ln() / candidates.peek().unwrap().key; - while index < length { - let weight = weight(index.as_usize()).into(); - if weight > 0.0 { - x -= weight; - if x <= 0.0 { - let min_candidate = candidates.pop().unwrap(); - let t = (min_candidate.key * weight).exp(); - let key = rng.random_range(t..1.0).ln() / weight; - candidates.push(Element { index, key }); - - x = rng.random::().ln() / candidates.peek().unwrap().key; + x = rng.random::().ln() / candidates.peek().unwrap().key; + } + } else if !(weight >= 0.0) { + return Err(WeightError::InvalidWeight); } - } else if !(weight >= 0.0) { - return Err(WeightError::InvalidWeight); - } - index += N::one(); + index += N::one(); + } } Ok(IndexVec::from( @@ -653,7 +653,7 @@ mod test { } let r = sample_weighted(&mut seed_rng(423), 10, |i| i as f64, 10); - assert_eq!(r.unwrap_err(), WeightError::InsufficientNonZero); + assert_eq!(r.unwrap().len(), 9); } #[test] diff --git a/src/seq/iterator.rs b/src/seq/iterator.rs index b10d205676a..a9a9e56155c 100644 --- a/src/seq/iterator.rs +++ b/src/seq/iterator.rs @@ -134,6 +134,10 @@ pub trait IteratorRandom: Iterator + Sized { /// force every element to be created regardless call `.inspect(|e| ())`. /// /// [`choose`]: IteratorRandom::choose + // + // Clippy is wrong here: we need to iterate over all entries with the RNG to + // ensure that choosing is *stable*. + #[allow(clippy::double_ended_iterator_last)] fn choose_stable(mut self, rng: &mut R) -> Option where R: Rng + ?Sized, diff --git a/src/seq/slice.rs b/src/seq/slice.rs index d48d9d2e9f3..f909418bc48 100644 --- a/src/seq/slice.rs +++ b/src/seq/slice.rs @@ -173,26 +173,18 @@ pub trait IndexedRandom: Index { /// Biased sampling of `amount` distinct elements /// - /// Similar to [`choose_multiple`], but where the likelihood of each element's - /// inclusion in the output may be specified. The elements are returned in an - /// arbitrary, unspecified order. + /// Similar to [`choose_multiple`], but where the likelihood of each + /// element's inclusion in the output may be specified. Zero-weighted + /// elements are never returned; the result may therefore contain fewer + /// elements than `amount` even when `self.len() >= amount`. The elements + /// are returned in an arbitrary, unspecified order. /// /// The specified function `weight` maps each item `x` to a relative /// likelihood `weight(x)`. The probability of each item being selected is /// therefore `weight(x) / s`, where `s` is the sum of all `weight(x)`. /// - /// If all of the weights are equal, even if they are all zero, each element has - /// an equal likelihood of being selected. - /// - /// This implementation uses `O(length + amount)` space and `O(length)` time - /// if the "nightly" feature is enabled, or `O(length)` space and - /// `O(length + amount * log length)` time otherwise. - /// - /// # Known issues - /// - /// The algorithm currently used to implement this method loses accuracy - /// when small values are used for weights. - /// See [#1476](https://github.com/rust-random/rand/issues/1476). + /// This implementation uses `O(length + amount)` space and `O(length)` time. + /// See [`index::sample_weighted`] for details. /// /// # Example /// @@ -687,7 +679,7 @@ mod test { // Case 2: All of the weights are 0 let choices = [('a', 0), ('b', 0), ('c', 0)]; let r = choices.choose_multiple_weighted(&mut rng, 2, |item| item.1); - assert_eq!(r.unwrap_err(), WeightError::InsufficientNonZero); + assert_eq!(r.unwrap().len(), 0); // Case 3: Negative weights let choices = [('a', -1), ('b', 1), ('c', 1)];