foldhash/seed.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342
use core::hash::BuildHasher;
// These constants may end up unused depending on platform support.
#[allow(unused)]
use crate::{ARBITRARY1, ARBITRARY9};
use super::{
folded_multiply, ARBITRARY2, ARBITRARY3, ARBITRARY4, ARBITRARY5, ARBITRARY6, ARBITRARY7,
ARBITRARY8,
};
/// Used for FixedState, and RandomState if atomics for dynamic init are unavailable.
const FIXED_GLOBAL_SEED: [u64; 4] = [ARBITRARY4, ARBITRARY5, ARBITRARY6, ARBITRARY7];
pub mod fast {
use super::*;
use crate::fast::FoldHasher;
/// A [`BuildHasher`] for [`fast::FoldHasher`]s that are randomly initialized.
#[derive(Copy, Clone, Debug)]
pub struct RandomState {
per_hasher_seed: u64,
global_seed: global::GlobalSeed,
}
impl Default for RandomState {
fn default() -> Self {
// We initialize the per-hasher seed with the stack pointer to ensure
// different threads have different seeds, with as side benefit that
// stack address randomization gives us further non-determinism.
let mut per_hasher_seed = 0;
let stack_ptr = core::ptr::addr_of!(per_hasher_seed) as u64;
per_hasher_seed = stack_ptr;
// If we have the standard library available we use a thread-local
// state to ensure RandomStates are different with high probability,
// even if the call stack is the same.
#[cfg(feature = "std")]
{
use std::cell::Cell;
thread_local! {
static PER_HASHER_NONDETERMINISM: Cell<u64> = const { Cell::new(0) };
}
let nondeterminism = PER_HASHER_NONDETERMINISM.get();
per_hasher_seed = folded_multiply(per_hasher_seed, ARBITRARY1 ^ nondeterminism);
PER_HASHER_NONDETERMINISM.set(per_hasher_seed);
};
// If we don't have the standard library we instead use a global
// atomic instead of a thread-local state.
//
// PER_HASHER_NONDETERMINISM is loaded and updated in a racy manner,
// but this doesn't matter in practice - it is impossible that two
// different threads have the same stack location, so they'll almost
// surely generate different seeds, and provide a different possible
// update for PER_HASHER_NONDETERMINISM. If we would use a proper
// fetch_add atomic update then there is a larger chance of
// problematic contention.
//
// We use usize instead of 64-bit atomics for best platform support.
#[cfg(not(feature = "std"))]
{
use core::sync::atomic::{AtomicUsize, Ordering};
static PER_HASHER_NONDETERMINISM: AtomicUsize = AtomicUsize::new(0);
let nondeterminism = PER_HASHER_NONDETERMINISM.load(Ordering::Relaxed) as u64;
per_hasher_seed = folded_multiply(per_hasher_seed, ARBITRARY1 ^ nondeterminism);
PER_HASHER_NONDETERMINISM.store(per_hasher_seed as usize, Ordering::Relaxed);
}
// One extra mixing step to ensure good random bits.
per_hasher_seed = folded_multiply(per_hasher_seed, ARBITRARY2);
Self {
per_hasher_seed,
global_seed: global::GlobalSeed::new(),
}
}
}
impl BuildHasher for RandomState {
type Hasher = FoldHasher;
#[inline(always)]
fn build_hasher(&self) -> FoldHasher {
FoldHasher::with_seed(self.per_hasher_seed, self.global_seed.get())
}
}
/// A [`BuildHasher`] for [`fast::FoldHasher`]s that all have the same fixed seed.
///
/// Not recommended unless you absolutely need determinism.
#[derive(Copy, Clone, Debug)]
pub struct FixedState {
per_hasher_seed: u64,
}
impl FixedState {
/// Creates a [`FixedState`] with the given seed.
#[inline(always)]
pub const fn with_seed(seed: u64) -> Self {
// XOR with ARBITRARY3 such that with_seed(0) matches default.
Self {
per_hasher_seed: seed ^ ARBITRARY3,
}
}
}
impl Default for FixedState {
#[inline(always)]
fn default() -> Self {
Self {
per_hasher_seed: ARBITRARY3,
}
}
}
impl BuildHasher for FixedState {
type Hasher = FoldHasher;
#[inline(always)]
fn build_hasher(&self) -> FoldHasher {
FoldHasher::with_seed(self.per_hasher_seed, &FIXED_GLOBAL_SEED)
}
}
}
pub mod quality {
use super::*;
use crate::quality::FoldHasher;
/// A [`BuildHasher`] for [`quality::FoldHasher`]s that are randomly initialized.
#[derive(Copy, Clone, Default, Debug)]
pub struct RandomState {
inner: fast::RandomState,
}
impl BuildHasher for RandomState {
type Hasher = FoldHasher;
#[inline(always)]
fn build_hasher(&self) -> FoldHasher {
FoldHasher {
inner: self.inner.build_hasher(),
}
}
}
/// A [`BuildHasher`] for [`quality::FoldHasher`]s that all have the same fixed seed.
///
/// Not recommended unless you absolutely need determinism.
#[derive(Copy, Clone, Default, Debug)]
pub struct FixedState {
inner: fast::FixedState,
}
impl FixedState {
/// Creates a [`FixedState`] with the given seed.
#[inline(always)]
pub const fn with_seed(seed: u64) -> Self {
Self {
// We do an additional folded multiply with the seed here for
// the quality hash to ensure better independence between seed
// and hash. If the seed is zero the folded multiply is zero,
// preserving with_seed(0) == default().
inner: fast::FixedState::with_seed(folded_multiply(seed, ARBITRARY8)),
}
}
}
impl BuildHasher for FixedState {
type Hasher = FoldHasher;
#[inline(always)]
fn build_hasher(&self) -> FoldHasher {
FoldHasher {
inner: self.inner.build_hasher(),
}
}
}
}
#[cfg(target_has_atomic = "8")]
mod global {
use super::*;
use core::cell::UnsafeCell;
use core::sync::atomic::{AtomicU8, Ordering};
fn generate_global_seed() -> [u64; 4] {
let mix = |seed: u64, x: u64| folded_multiply(seed ^ x, ARBITRARY9);
// Use address space layout randomization as our main randomness source.
// This isn't great, but we don't advertise HashDoS resistance in the first
// place. This is a whole lot better than nothing, at near zero cost with
// no dependencies.
let mut seed = 0;
let stack_ptr = &seed as *const _;
let func_ptr = generate_global_seed;
let static_ptr = &GLOBAL_SEED_STORAGE as *const _;
seed = mix(seed, stack_ptr as usize as u64);
seed = mix(seed, func_ptr as usize as u64);
seed = mix(seed, static_ptr as usize as u64);
// If we have the standard library available, augment entropy with the
// current time and an address from the allocator.
#[cfg(feature = "std")]
{
#[cfg(not(any(
miri,
all(target_family = "wasm", target_os = "unknown"),
target_os = "zkvm"
)))]
if let Ok(duration) = std::time::UNIX_EPOCH.elapsed() {
seed = mix(seed, duration.subsec_nanos() as u64);
seed = mix(seed, duration.as_secs());
}
let box_ptr = &*Box::new(0u8) as *const _;
seed = mix(seed, box_ptr as usize as u64);
}
let seed_a = mix(seed, 0);
let seed_b = mix(mix(mix(seed_a, 0), 0), 0);
let seed_c = mix(mix(mix(seed_b, 0), 0), 0);
let seed_d = mix(mix(mix(seed_c, 0), 0), 0);
// Zeroes form a weak-point for the multiply-mix, and zeroes tend to be
// a common input. So we want our global seeds that are XOR'ed with the
// input to always be non-zero. To also ensure there is always a good spread
// of bits, we give up 3 bits of entropy and simply force some bits on.
const FORCED_ONES: u64 = (1 << 63) | (1 << 31) | 1;
[
seed_a | FORCED_ONES,
seed_b | FORCED_ONES,
seed_c | FORCED_ONES,
seed_d | FORCED_ONES,
]
}
// Now all the below code purely exists to cache the above seed as
// efficiently as possible. Even if we weren't a no_std crate and had access to
// OnceLock, we don't want to check whether the global is set each time we
// hash an object, so we hand-roll a global storage where type safety allows us
// to assume the storage is initialized after construction.
struct GlobalSeedStorage {
state: AtomicU8,
seed: UnsafeCell<[u64; 4]>,
}
const UNINIT: u8 = 0;
const LOCKED: u8 = 1;
const INIT: u8 = 2;
// SAFETY: we only mutate the UnsafeCells when state is in the thread-exclusive
// LOCKED state, and only read the UnsafeCells when state is in the
// once-achieved-eternally-preserved state INIT.
unsafe impl Sync for GlobalSeedStorage {}
static GLOBAL_SEED_STORAGE: GlobalSeedStorage = GlobalSeedStorage {
state: AtomicU8::new(UNINIT),
seed: UnsafeCell::new([0; 4]),
};
/// An object representing an initialized global seed.
///
/// Does not actually store the seed inside itself, it is a zero-sized type.
/// This prevents inflating the RandomState size and in turn HashMap's size.
#[derive(Copy, Clone, Debug)]
pub struct GlobalSeed {
// So we can't accidentally type GlobalSeed { } within this crate.
_no_accidental_unsafe_init: (),
}
impl GlobalSeed {
#[inline(always)]
pub fn new() -> Self {
if GLOBAL_SEED_STORAGE.state.load(Ordering::Acquire) != INIT {
Self::init_slow()
}
Self {
_no_accidental_unsafe_init: (),
}
}
#[cold]
#[inline(never)]
fn init_slow() {
// Generate seed outside of critical section.
let seed = generate_global_seed();
loop {
match GLOBAL_SEED_STORAGE.state.compare_exchange_weak(
UNINIT,
LOCKED,
Ordering::Relaxed,
Ordering::Acquire,
) {
Ok(_) => unsafe {
// SAFETY: we just acquired an exclusive lock.
*GLOBAL_SEED_STORAGE.seed.get() = seed;
GLOBAL_SEED_STORAGE.state.store(INIT, Ordering::Release);
return;
},
Err(INIT) => return,
// Yes, it's a spin loop. We need to support no_std (so no easy
// access to proper locks), this is a one-time-per-program
// initialization, and the critical section is only a few
// store instructions, so it'll be fine.
_ => core::hint::spin_loop(),
}
}
}
#[inline(always)]
pub fn get(self) -> &'static [u64; 4] {
// SAFETY: our constructor ensured we are in the INIT state and thus
// this raw read does not race with any write.
unsafe { &*GLOBAL_SEED_STORAGE.seed.get() }
}
}
}
#[cfg(not(target_has_atomic = "8"))]
mod global {
#[derive(Copy, Clone, Debug)]
pub struct GlobalSeed {}
impl GlobalSeed {
#[inline(always)]
pub fn new() -> Self {
Self {}
}
#[inline(always)]
pub fn get(self) -> &'static [u64; 4] {
&super::FIXED_GLOBAL_SEED
}
}
}