Skip to content

Multi-Threading for Sample_z #463

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ thiserror = "2.0"
lazy_static = "1.4"
probability = "0.20.3"
derive_more = { version = "2.0.1", features = ["display"] }
rayon = "1.10.0"

[profile.bench]
debug = true
Expand Down
38 changes: 37 additions & 1 deletion benches/sample_z.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,39 @@ pub fn bench_sample_z_narrow(c: &mut Criterion) {
c.bench_function("SampleZ narrow 10,000", |b| b.iter(sample_z_narrow));
}

/// benchmark creating a matrix of size 1x9 sampled by a comparatively narrow discrete Gaussian distribution.
pub fn bench_sample_z_1by9_matrix(c: &mut Criterion) {
let n = Z::from(1000);
let center = Q::from(0);
let s = Q::from(100);

c.bench_function("SampleZ 1x9 matrix", |b| {
b.iter(|| MatZ::sample_discrete_gauss(1, 9, &n, &center, &s).unwrap())
});
}

/// benchmark creating a matrix of size 1x10 sampled by a comparatively narrow discrete Gaussian distribution.
pub fn bench_sample_z_1by10_matrix(c: &mut Criterion) {
let n = Z::from(1000);
let center = Q::from(0);
let s = Q::from(100);

c.bench_function("SampleZ 1x10 matrix", |b| {
b.iter(|| MatZ::sample_discrete_gauss(1, 10, &n, &center, &s).unwrap())
});
}

/// benchmark creating a matrix of size 1x1 sampled by a comparatively narrow discrete Gaussian distribution.
pub fn bench_sample_z_1by1_matrix(c: &mut Criterion) {
let n = Z::from(1000);
let center = Q::from(0);
let s = Q::from(100);

c.bench_function("SampleZ 1x1 matrix", |b| {
b.iter(|| MatZ::sample_discrete_gauss(1, 1, &n, &center, &s).unwrap())
});
}

/// benchmark creating a single integer sampled by a comparatively wide discrete Gaussian distribution.
pub fn bench_sample_z_wide_single(c: &mut Criterion) {
/// Create a single integer sampled by a comparatively wide discrete Gaussian distribution.
Expand Down Expand Up @@ -75,5 +108,8 @@ criterion_group!(
bench_sample_z_wide,
bench_sample_z_narrow,
bench_sample_z_wide_single,
bench_sample_z_narrow_single
bench_sample_z_narrow_single,
bench_sample_z_1by1_matrix,
bench_sample_z_1by9_matrix,
bench_sample_z_1by10_matrix
);
5 changes: 3 additions & 2 deletions src/integer/mat_z/sample/discrete_gauss.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,11 @@ impl MatZ {

let mut dgis = DiscreteGaussianIntegerSampler::init(&n, &center, &s)?;

let mut entries = dgis.sample_z_multiple(out.get_num_columns() * out.get_num_rows());

for row in 0..out.get_num_rows() {
for col in 0..out.get_num_columns() {
let sample = dgis.sample_z();
out.set_entry(row, col, sample)?;
out.set_entry(row, col, entries.pop().unwrap()).unwrap();
}
}

Expand Down
4 changes: 2 additions & 2 deletions src/integer/poly_over_z/sample/discrete_gauss.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,9 @@ impl PolyOverZ {

let mut dgis = DiscreteGaussianIntegerSampler::init(&n, &center, &s)?;

let mut entries = dgis.sample_z_multiple(max_degree + 1);
for index in 0..=max_degree {
let sample = dgis.sample_z();
poly.set_coeff(index, &sample)?;
poly.set_coeff(index, entries.pop().unwrap())?;
}
Ok(poly)
}
Expand Down
5 changes: 3 additions & 2 deletions src/integer_mod_q/mat_zq/sample/discrete_gauss.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,11 @@ impl MatZq {

let mut dgis = DiscreteGaussianIntegerSampler::init(&n, &center, &s)?;

let mut entries = dgis.sample_z_multiple(out.get_num_columns() * out.get_num_rows());

for row in 0..out.get_num_rows() {
for col in 0..out.get_num_columns() {
let sample = dgis.sample_z();
out.set_entry(row, col, sample).unwrap();
out.set_entry(row, col, entries.pop().unwrap()).unwrap();
}
}

Expand Down
4 changes: 2 additions & 2 deletions src/integer_mod_q/poly_over_zq/sample/discrete_gauss.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,9 @@ impl PolyOverZq {

let mut dgis = DiscreteGaussianIntegerSampler::init(&n, &center, &s)?;

let mut entries = dgis.sample_z_multiple(max_degree + 1);
for index in 0..=max_degree {
let sample = dgis.sample_z();
poly.set_coeff(index, &sample)?;
poly.set_coeff(index, entries.pop().unwrap())?;
}
Ok(poly)
}
Expand Down
107 changes: 107 additions & 0 deletions src/utils/sample/discrete_gauss.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ use crate::{
traits::{GetNumColumns, GetNumRows, Pow},
};
use rand::RngCore;
use rayon::{
current_num_threads,
iter::{IntoParallelIterator, ParallelIterator},
};
use serde::Serialize;
use std::collections::HashMap;

Expand Down Expand Up @@ -172,6 +176,65 @@ impl DiscreteGaussianIntegerSampler {
}
}
}

/// Chooses `nr_samples` samples according to the discrete Gaussian distribution out of
/// `[lower_bound , lower_bound + interval_size ]`.
///
/// This function implements a multi-threaded version of [`DiscreteGaussianIntegerSampler::sample_z`]
/// that simply samples `nr_samples` many entries.
/// It first considers the number of available threads.
/// For each thread, a single sampler will be cloned from the origin (to ensure memory safety),
/// and if there is only one available thread, then we will not clone the sampler, ensuring that the actual
/// sampler will be updated with new hash values.
///
/// Parameters:
/// - `nr_samples`: the number of `sample_z` samples that should be computed.
///
/// # Examples
/// ```
/// use qfall_math::{integer::Z, rational::Q};
/// use qfall_math::utils::sample::discrete_gauss::DiscreteGaussianIntegerSampler;
/// let n = Z::from(1024);
/// let center = Q::ZERO;
/// let gaussian_parameter = Q::ONE;
///
/// let mut dgis = DiscreteGaussianIntegerSampler::init(&n, &center, &gaussian_parameter).unwrap();
///
/// let samples_5 = dgis.sample_z_multiple(5);
/// assert_eq!(samples_5.len(), 5)
/// ```
///
/// # Panics ...
/// - if `nr_samples` is negative
pub fn sample_z_multiple(&mut self, nr_samples: i64) -> Vec<Z> {
let nr_threads = current_num_threads();
let nr_samples = nr_samples as usize;
if nr_threads == 1 || nr_samples < 10 {
// no multithreading
(0..nr_samples).map(|_| self.sample_z()).collect()
} else {
// with multithreading
let entries_per_thread = nr_samples / nr_threads;
let remainder = nr_samples % nr_threads;
(0..nr_threads)
.into_par_iter()
.map(|thread_i| {
Comment on lines +219 to +221
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just to provide another idea to make things quicker (although this might not be rayon's way of parallelising things).
Currently, you split the workload into similar-sized buckets - implicitely making the assumption that each bucket will roughly take the same duration on each thread. This assumption should be correct in this case for larger bucket sizes, but for smaller sizes, it might be quicker to just submit tasks to a pool of threads, where each thread collects a new task once it has finished the current one.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I looked again, but I was not able to find a better solution for it, and given that the function will probably not be called with small numbers of samples - I think the current implementation is reasonable.
The problem with the dynamic approach is that I was not able to find a good way to also distribute the integer sampler, and additionally, this also provides an overhead with more threadmanagement, which might also increase the runtime due to the dynamic distribution of tasks.

let mut dgis_thread = self.clone();
let entries_thread_i = if thread_i < remainder {
entries_per_thread + 1
} else {
entries_per_thread
};
(0..entries_thread_i)
.map(|_| dgis_thread.sample_z())
.collect()
})
.reduce(Vec::new, |mut a, mut b| {
a.append(&mut b);
a
})
Comment on lines +232 to +235
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I haven't looked at the flamegraph, but considering my current experience with our library, setting the values of the matrix has a significant overhead. Furthermore, joining and iterating vectors shouldn't be the fastest thing in the world.
Could it be possible that it would be quicker to set the entry in the matrix directly after sampling it - sharing the matrix in an Arc?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will check later, but the join/reduce is probably efficient, as it moves the values.
I would assume that the additional management cost of Arc will exceed the runtime of the collect.

}
}
}

/// Computes the value of the Gaussian function for `x`.
Expand Down Expand Up @@ -800,3 +863,47 @@ mod test_sample_d {
let _ = sample_d_precomputed_gso(&basis, &false_gso, &n, &center, &Q::from(5)).unwrap();
}
}

#[cfg(test)]
mod test_sample_z_multiple {
use crate::{
integer::Z, rational::Q, utils::sample::discrete_gauss::DiscreteGaussianIntegerSampler,
};

/// Ensure that the function outputs the correct number of samples
#[test]
fn correct_number_of_samples() {
let n = Z::from(1024);
let center = Q::ZERO;
let gaussian_parameter = Q::ONE;

let mut dgis =
DiscreteGaussianIntegerSampler::init(&n, &center, &gaussian_parameter).unwrap();

let samples_0 = dgis.sample_z_multiple(0);
let samples_1 = dgis.sample_z_multiple(1);
let samples_10 = dgis.sample_z_multiple(10);
let samples_110 = dgis.sample_z_multiple(110);
let samples_12410 = dgis.sample_z_multiple(12410);

assert_eq!(0, samples_0.len());
assert_eq!(1, samples_1.len());
assert_eq!(10, samples_10.len());
assert_eq!(110, samples_110.len());
assert_eq!(12410, samples_12410.len());
}

/// Ensure that the function does not allow for negative number of samples
#[test]
#[should_panic]
fn panic_if_negative_nr_samples() {
let n = Z::from(1024);
let center = Q::ZERO;
let gaussian_parameter = Q::ONE;

let mut dgis =
DiscreteGaussianIntegerSampler::init(&n, &center, &gaussian_parameter).unwrap();

let _ = dgis.sample_z_multiple(-1);
}
}