@@ -333,6 +333,12 @@ pub trait Hasher {
333
333
///
334
334
/// println!("Hash is {:x}!", hasher.finish());
335
335
/// ```
336
+ ///
337
+ /// # Note to Implementers
338
+ ///
339
+ /// You generally should not do length-prefixing as part of implementing
340
+ /// this method. It's up to the [`Hash`] implementation to call
341
+ /// [`Hasher::write_length_prefix`] before sequences that need it.
336
342
#[ stable( feature = "rust1" , since = "1.0.0" ) ]
337
343
fn write ( & mut self , bytes : & [ u8 ] ) ;
338
344
@@ -409,6 +415,96 @@ pub trait Hasher {
409
415
fn write_isize ( & mut self , i : isize ) {
410
416
self . write_usize ( i as usize )
411
417
}
418
+
419
+ /// Writes a length prefix into this hasher, as part of being prefix-free.
420
+ ///
421
+ /// If you're implementing [`Hash`] for a custom collection, call this before
422
+ /// writing its contents to this `Hasher`. That way
423
+ /// `(collection![1, 2, 3], collection![4, 5])` and
424
+ /// `(collection![1, 2], collection![3, 4, 5])` will provide different
425
+ /// sequences of values to the `Hasher`
426
+ ///
427
+ /// The `impl<T> Hash for [T]` includes a call to this method, so if you're
428
+ /// hashing a slice (or array or vector) via its `Hash::hash` method,
429
+ /// you should **not** call this yourself.
430
+ ///
431
+ /// This method is only for providing domain separation. If you want to
432
+ /// hash a `usize` that represents part of the *data*, then it's important
433
+ /// that you pass it to [`Hasher::write_usize`] instead of to this method.
434
+ ///
435
+ /// # Examples
436
+ ///
437
+ /// ```
438
+ /// #![feature(hasher_prefixfree_extras)]
439
+ /// # // Stubs to make the `impl` below pass the compiler
440
+ /// # struct MyCollection<T>(Option<T>);
441
+ /// # impl<T> MyCollection<T> {
442
+ /// # fn len(&self) -> usize { todo!() }
443
+ /// # }
444
+ /// # impl<'a, T> IntoIterator for &'a MyCollection<T> {
445
+ /// # type Item = T;
446
+ /// # type IntoIter = std::iter::Empty<T>;
447
+ /// # fn into_iter(self) -> Self::IntoIter { todo!() }
448
+ /// # }
449
+ ///
450
+ /// use std::hash::{Hash, Hasher};
451
+ /// impl<T: Hash> Hash for MyCollection<T> {
452
+ /// fn hash<H: Hasher>(&self, state: &mut H) {
453
+ /// state.write_length_prefix(self.len());
454
+ /// for elt in self {
455
+ /// elt.hash(state);
456
+ /// }
457
+ /// }
458
+ /// }
459
+ /// ```
460
+ ///
461
+ /// # Note to Implementers
462
+ ///
463
+ /// If you've decided that your `Hasher` is willing to be susceptible to
464
+ /// Hash-DoS attacks, then you might consider skipping hashing some or all
465
+ /// of the `len` provided in the name of increased performance.
466
+ #[ inline]
467
+ #[ unstable( feature = "hasher_prefixfree_extras" , issue = "96762" ) ]
468
+ fn write_length_prefix ( & mut self , len : usize ) {
469
+ self . write_usize ( len) ;
470
+ }
471
+
472
+ /// Writes a single `str` into this hasher.
473
+ ///
474
+ /// If you're implementing [`Hash`], you generally do not need to call this,
475
+ /// as the `impl Hash for str` does, so you should prefer that instead.
476
+ ///
477
+ /// This includes the domain separator for prefix-freedom, so you should
478
+ /// **not** call `Self::write_length_prefix` before calling this.
479
+ ///
480
+ /// # Note to Implementers
481
+ ///
482
+ /// The default implementation of this method includes a call to
483
+ /// [`Self::write_length_prefix`], so if your implementation of `Hasher`
484
+ /// doesn't care about prefix-freedom and you've thus overridden
485
+ /// that method to do nothing, there's no need to override this one.
486
+ ///
487
+ /// This method is available to be overridden separately from the others
488
+ /// as `str` being UTF-8 means that it never contains `0xFF` bytes, which
489
+ /// can be used to provide prefix-freedom cheaper than hashing a length.
490
+ ///
491
+ /// For example, if your `Hasher` works byte-by-byte (perhaps by accumulating
492
+ /// them into a buffer), then you can hash the bytes of the `str` followed
493
+ /// by a single `0xFF` byte.
494
+ ///
495
+ /// If your `Hasher` works in chunks, you can also do this by being careful
496
+ /// about how you pad partial chunks. If the chunks are padded with `0x00`
497
+ /// bytes then just hashing an extra `0xFF` byte doesn't necessarily
498
+ /// provide prefix-freedom, as `"ab"` and `"ab\u{0}"` would likely hash
499
+ /// the same sequence of chunks. But if you pad with `0xFF` bytes instead,
500
+ /// ensuring at least one padding byte, then it can often provide
501
+ /// prefix-freedom cheaper than hashing the length would.
502
+ #[ inline]
503
+ #[ unstable( feature = "hasher_prefixfree_extras" , issue = "96762" ) ]
504
+ fn write_str ( & mut self , s : & str ) {
505
+ self . write_length_prefix ( s. len ( ) ) ;
506
+ self . write ( s. as_bytes ( ) ) ;
507
+ }
412
508
}
413
509
414
510
#[ stable( feature = "indirect_hasher_impl" , since = "1.22.0" ) ]
@@ -455,6 +551,12 @@ impl<H: Hasher + ?Sized> Hasher for &mut H {
455
551
fn write_isize ( & mut self , i : isize ) {
456
552
( * * self ) . write_isize ( i)
457
553
}
554
+ fn write_length_prefix ( & mut self , len : usize ) {
555
+ ( * * self ) . write_length_prefix ( len)
556
+ }
557
+ fn write_str ( & mut self , s : & str ) {
558
+ ( * * self ) . write_str ( s)
559
+ }
458
560
}
459
561
460
562
/// A trait for creating instances of [`Hasher`].
@@ -709,8 +811,7 @@ mod impls {
709
811
impl Hash for str {
710
812
#[ inline]
711
813
fn hash < H : Hasher > ( & self , state : & mut H ) {
712
- state. write ( self . as_bytes ( ) ) ;
713
- state. write_u8 ( 0xff )
814
+ state. write_str ( self ) ;
714
815
}
715
816
}
716
817
@@ -767,7 +868,7 @@ mod impls {
767
868
impl < T : Hash > Hash for [ T ] {
768
869
#[ inline]
769
870
fn hash < H : Hasher > ( & self , state : & mut H ) {
770
- self . len ( ) . hash ( state ) ;
871
+ state . write_length_prefix ( self . len ( ) ) ;
771
872
Hash :: hash_slice ( self , state)
772
873
}
773
874
}
0 commit comments