Skip to content

Commit 20b24f2

Browse files
committed
Merge branch 'blockrng_take5'
2 parents 1caee2a + 08924a3 commit 20b24f2

File tree

9 files changed

+582
-433
lines changed

9 files changed

+582
-433
lines changed

benches/generators.rs

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ use test::{black_box, Bencher};
1212
use rand::{RngCore, Rng, SeedableRng, NewRng, StdRng, OsRng, JitterRng, EntropyRng};
1313
use rand::{XorShiftRng, Hc128Rng, IsaacRng, Isaac64Rng, ChaChaRng};
1414
use rand::reseeding::ReseedingRng;
15+
use rand::prng::hc128::Hc128Core;
16+
use rand::thread_rng;
1517

1618
macro_rules! gen_bytes {
1719
($fnn:ident, $gen:expr) => {
@@ -150,10 +152,13 @@ chacha_rounds!(gen_bytes_chacha12, gen_u32_chacha12, gen_u64_chacha12, 12);
150152
chacha_rounds!(gen_bytes_chacha20, gen_u32_chacha20, gen_u64_chacha20, 20);
151153

152154

155+
const RESEEDING_THRESHOLD: u64 = 1024*1024*1024; // something high enough to get
156+
// deterministic measurements
157+
153158
#[bench]
154159
fn reseeding_hc128_bytes(b: &mut Bencher) {
155-
let mut rng = ReseedingRng::new(Hc128Rng::new(),
156-
128*1024*1024,
160+
let mut rng = ReseedingRng::new(Hc128Core::new(),
161+
RESEEDING_THRESHOLD,
157162
EntropyRng::new());
158163
let mut buf = [0u8; BYTES_LEN];
159164
b.iter(|| {
@@ -169,8 +174,8 @@ macro_rules! reseeding_uint {
169174
($fnn:ident, $ty:ty) => {
170175
#[bench]
171176
fn $fnn(b: &mut Bencher) {
172-
let mut rng = ReseedingRng::new(Hc128Rng::new(),
173-
128*1024*1024,
177+
let mut rng = ReseedingRng::new(Hc128Core::new(),
178+
RESEEDING_THRESHOLD,
174179
EntropyRng::new());
175180
b.iter(|| {
176181
for _ in 0..RAND_BENCH_N {
@@ -184,3 +189,22 @@ macro_rules! reseeding_uint {
184189

185190
reseeding_uint!(reseeding_hc128_u32, u32);
186191
reseeding_uint!(reseeding_hc128_u64, u64);
192+
193+
194+
macro_rules! threadrng_uint {
195+
($fnn:ident, $ty:ty) => {
196+
#[bench]
197+
fn $fnn(b: &mut Bencher) {
198+
let mut rng = thread_rng();
199+
b.iter(|| {
200+
for _ in 0..RAND_BENCH_N {
201+
black_box(rng.gen::<$ty>());
202+
}
203+
});
204+
b.bytes = size_of::<$ty>() as u64 * RAND_BENCH_N;
205+
}
206+
}
207+
}
208+
209+
threadrng_uint!(thread_rng_u32, u32);
210+
threadrng_uint!(thread_rng_u64, u64);

rand-core/src/impls.rs

Lines changed: 170 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@
2121
2222
use core::intrinsics::transmute;
2323
use core::ptr::copy_nonoverlapping;
24-
use core::slice;
24+
use core::{fmt, slice};
2525
use core::cmp::min;
2626
use core::mem::size_of;
27-
use RngCore;
27+
use {RngCore, BlockRngCore, CryptoRng, SeedableRng, Error};
2828

2929
/// Implement `next_u64` via `next_u32`, little-endian order.
3030
pub fn next_u64_via_u32<R: RngCore + ?Sized>(rng: &mut R) -> u64 {
@@ -164,4 +164,172 @@ pub fn next_u64_via_fill<R: RngCore + ?Sized>(rng: &mut R) -> u64 {
164164
impl_uint_from_fill!(rng, u64, 8)
165165
}
166166

167+
/// Wrapper around PRNGs that implement [`BlockRngCore`] to keep a results
168+
/// buffer and offer the methods from [`RngCore`].
169+
///
170+
/// `BlockRng` has optimized methods to read from the output array that the
171+
/// algorithm of many cryptograpic RNGs generates natively. Also they handle the
172+
/// bookkeeping when to generate a new batch of values.
173+
///
174+
/// `next_u32` simply indexes the array. `next_u64` tries to read two `u32`
175+
/// values at a time if possible, and handles edge cases like when only one
176+
/// value is left. `try_fill_bytes` is optimized use the [`BlockRngCore`]
177+
/// implementation to write the results directly to the destination slice.
178+
/// No generated values are ever thown away.
179+
///
180+
/// For easy initialization `BlockRng` also implements [`SeedableRng`].
181+
///
182+
/// [`BlockRngCore`]: ../BlockRngCore.t.html
183+
/// [`RngCore`]: ../RngCore.t.html
184+
/// [`SeedableRng`]: ../SeedableRng.t.html
185+
#[derive(Clone)]
186+
pub struct BlockRng<R: BlockRngCore<u32>> {
187+
pub core: R,
188+
pub results: R::Results,
189+
pub index: usize,
190+
}
191+
192+
// Custom Debug implementation that does not expose the contents of `results`.
193+
impl<R: BlockRngCore<u32>+fmt::Debug> fmt::Debug for BlockRng<R> {
194+
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
195+
fmt.debug_struct("BlockRng")
196+
.field("core", &self.core)
197+
.field("result_len", &self.results.as_ref().len())
198+
.field("index", &self.index)
199+
.finish()
200+
}
201+
}
202+
203+
impl<R: BlockRngCore<u32>> RngCore for BlockRng<R> {
204+
#[inline(always)]
205+
fn next_u32(&mut self) -> u32 {
206+
if self.index >= self.results.as_ref().len() {
207+
self.core.generate(&mut self.results);
208+
self.index = 0;
209+
}
210+
211+
let value = self.results.as_ref()[self.index];
212+
self.index += 1;
213+
value
214+
}
215+
216+
#[inline(always)]
217+
fn next_u64(&mut self) -> u64 {
218+
let read_u64 = |results: &[u32], index| {
219+
if cfg!(any(target_arch = "x86", target_arch = "x86_64")) {
220+
// requires little-endian CPU supporting unaligned reads:
221+
unsafe { *(&results[index] as *const u32 as *const u64) }
222+
} else {
223+
let x = results[index] as u64;
224+
let y = results[index + 1] as u64;
225+
(y << 32) | x
226+
}
227+
};
228+
229+
let len = self.results.as_ref().len();
230+
231+
let index = self.index;
232+
if index < len-1 {
233+
self.index += 2;
234+
// Read an u64 from the current index
235+
read_u64(self.results.as_ref(), index)
236+
} else if index >= len {
237+
self.core.generate(&mut self.results);
238+
self.index = 2;
239+
read_u64(self.results.as_ref(), 0)
240+
} else {
241+
let x = self.results.as_ref()[len-1] as u64;
242+
self.core.generate(&mut self.results);
243+
self.index = 1;
244+
let y = self.results.as_ref()[0] as u64;
245+
(y << 32) | x
246+
}
247+
}
248+
249+
// As an optimization we try to write directly into the output buffer.
250+
// This is only enabled for little-endian platforms where unaligned writes
251+
// are known to be safe and fast.
252+
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
253+
fn fill_bytes(&mut self, dest: &mut [u8]) {
254+
let mut filled = 0;
255+
256+
// Continue filling from the current set of results
257+
if self.index < self.results.as_ref().len() {
258+
let (consumed_u32, filled_u8) =
259+
fill_via_u32_chunks(&self.results.as_ref()[self.index..],
260+
dest);
261+
262+
self.index += consumed_u32;
263+
filled += filled_u8;
264+
}
265+
266+
let len_remainder =
267+
(dest.len() - filled) % (self.results.as_ref().len() * 4);
268+
let end_direct = dest.len() - len_remainder;
269+
270+
while filled < end_direct {
271+
let dest_u32: &mut R::Results = unsafe {
272+
::core::mem::transmute(dest[filled..].as_mut_ptr())
273+
};
274+
self.core.generate(dest_u32);
275+
filled += self.results.as_ref().len() * 4;
276+
}
277+
self.index = self.results.as_ref().len();
278+
279+
if len_remainder > 0 {
280+
self.core.generate(&mut self.results);
281+
let (consumed_u32, _) =
282+
fill_via_u32_chunks(&mut self.results.as_ref(),
283+
&mut dest[filled..]);
284+
285+
self.index = consumed_u32;
286+
}
287+
}
288+
289+
#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
290+
fn fill_bytes(&mut self, dest: &mut [u8]) {
291+
let mut read_len = 0;
292+
while read_len < dest.len() {
293+
if self.index >= self.results.as_ref().len() {
294+
self.core.generate(&mut self.results);
295+
self.index = 0;
296+
}
297+
let (consumed_u32, filled_u8) =
298+
fill_via_u32_chunks(&self.results.as_ref()[self.index..],
299+
&mut dest[read_len..]);
300+
301+
self.index += consumed_u32;
302+
read_len += filled_u8;
303+
}
304+
}
305+
306+
fn try_fill_bytes(&mut self, dest: &mut [u8]) -> Result<(), Error> {
307+
Ok(self.fill_bytes(dest))
308+
}
309+
}
310+
311+
impl<R: BlockRngCore<u32> + SeedableRng> SeedableRng for BlockRng<R> {
312+
type Seed = R::Seed;
313+
314+
fn from_seed(seed: Self::Seed) -> Self {
315+
let results_empty = R::Results::default();
316+
Self {
317+
core: R::from_seed(seed),
318+
index: results_empty.as_ref().len(), // generate on first use
319+
results: results_empty,
320+
}
321+
}
322+
323+
fn from_rng<RNG: RngCore>(rng: &mut RNG) -> Result<Self, Error> {
324+
let results_empty = R::Results::default();
325+
Ok(Self {
326+
core: R::from_rng(rng)?,
327+
index: results_empty.as_ref().len(), // generate on first use
328+
results: results_empty,
329+
})
330+
}
331+
}
332+
333+
impl<R: BlockRngCore<u32>+CryptoRng> CryptoRng for BlockRng<R> {}
334+
167335
// TODO: implement tests for the above

rand-core/src/lib.rs

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,18 @@ pub trait RngCore {
162162
fn try_fill_bytes(&mut self, dest: &mut [u8]) -> Result<(), Error>;
163163
}
164164

165+
/// Trait for RNGs that do not generate random numbers one at a time, but in
166+
/// blocks. Especially for cryptographic RNG's it is common to generate 16 or
167+
/// more results at a time.
168+
pub trait BlockRngCore<T>: Sized {
169+
/// Results type. This is the 'block' an RNG implementing `BlockRngCore`
170+
/// generates, which will usually be an array like `[u32; 16]`.
171+
type Results: AsRef<[T]> + Default;
172+
173+
/// Generate a new block of results.
174+
fn generate(&mut self, results: &mut Self::Results);
175+
}
176+
165177
/// A marker trait for an `Rng` which may be considered for use in
166178
/// cryptography.
167179
///
@@ -182,7 +194,7 @@ pub trait RngCore {
182194
///
183195
/// Note also that use of a `CryptoRng` does not protect against other
184196
/// weaknesses such as seeding from a weak entropy source or leaking state.
185-
pub trait CryptoRng: RngCore {}
197+
pub trait CryptoRng {}
186198

187199
/// A random number generator that can be explicitly seeded.
188200
///
@@ -263,45 +275,41 @@ pub trait SeedableRng: Sized {
263275

264276

265277
impl<'a, R: RngCore + ?Sized> RngCore for &'a mut R {
266-
#[inline]
278+
#[inline(always)]
267279
fn next_u32(&mut self) -> u32 {
268280
(**self).next_u32()
269281
}
270282

271-
#[inline]
283+
#[inline(always)]
272284
fn next_u64(&mut self) -> u64 {
273285
(**self).next_u64()
274286
}
275287

276-
#[inline]
277288
fn fill_bytes(&mut self, dest: &mut [u8]) {
278289
(**self).fill_bytes(dest)
279290
}
280-
281-
#[inline]
291+
282292
fn try_fill_bytes(&mut self, dest: &mut [u8]) -> Result<(), Error> {
283293
(**self).try_fill_bytes(dest)
284294
}
285295
}
286296

287297
#[cfg(any(feature="std", feature="alloc"))]
288298
impl<R: RngCore + ?Sized> RngCore for Box<R> {
289-
#[inline]
299+
#[inline(always)]
290300
fn next_u32(&mut self) -> u32 {
291301
(**self).next_u32()
292302
}
293303

294-
#[inline]
304+
#[inline(always)]
295305
fn next_u64(&mut self) -> u64 {
296306
(**self).next_u64()
297307
}
298308

299-
#[inline]
300309
fn fill_bytes(&mut self, dest: &mut [u8]) {
301310
(**self).fill_bytes(dest)
302311
}
303-
304-
#[inline]
312+
305313
fn try_fill_bytes(&mut self, dest: &mut [u8]) -> Result<(), Error> {
306314
(**self).try_fill_bytes(dest)
307315
}

src/lib.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,7 @@ pub use jitter::JitterRng;
278278
#[cfg(feature="std")] pub use os::OsRng;
279279

280280
// pseudo rngs
281+
pub mod prng;
281282
pub use isaac::{IsaacRng, Isaac64Rng};
282283
pub use chacha::ChaChaRng;
283284
pub use prng::XorShiftRng;
@@ -312,7 +313,6 @@ pub mod isaac {
312313

313314
// private modules
314315
#[cfg(feature="std")] mod entropy_rng;
315-
mod prng;
316316
#[cfg(feature="std")] mod thread_rng;
317317

318318

@@ -800,10 +800,12 @@ impl<R: SeedableRng> NewRng for R {
800800
pub struct StdRng(Hc128Rng);
801801

802802
impl RngCore for StdRng {
803+
#[inline(always)]
803804
fn next_u32(&mut self) -> u32 {
804805
self.0.next_u32()
805806
}
806807

808+
#[inline(always)]
807809
fn next_u64(&mut self) -> u64 {
808810
self.0.next_u64()
809811
}

0 commit comments

Comments
 (0)