Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve DOS resistance #88

Draft
wants to merge 2 commits into
base: v4
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ exclude = ["article/*"]
[features]
default = ["std"]
std = []
no-inlining = []
# Only relevant for throughput benchmarks
bench-csv = []
bench-md = []
Expand All @@ -38,6 +39,7 @@ highway = "1.1.0"
seahash = "4.1.0"
metrohash = "1.0.6"
fnv = "1.0.3"
aes_crypto = "1.2.0"

[dev-dependencies.plotters]
version = "0.3.5"
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,9 @@ All generated hashes for a given version of GxHash are stable, meaning that for
The `std` feature flag enables the `HashMap`/`HashSet` container convenience type aliases. This is on by default. Disable to make the crate `no_std`:

```toml
[dependencies.gxhash]
[dependencies]
...
default-features = false
gxhash = { version = "3", default-features = false, features = ["inlined"] }
```

### `hybrid`
Expand Down Expand Up @@ -121,7 +121,7 @@ GxHash is a non-cryptographic hashing algorithm, thus it is not recommended to u
- Minor for API changes/removal
- Patch for new APIs, bug fixes and performance improvements

> ℹ️ [cargo-show-asm](https://github.com/pacak/cargo-show-asm) is an easy way to view the actual generated assembly code (`cargo asm gxhash::gxhash::gxhash64`) (method `#[inline]` should be removed otherwise it won't be seen by the tool)
> ℹ️ [cargo-show-asm](https://github.com/pacak/cargo-show-asm) is an easy way to view the actual generated assembly code (`cargo asm gxhash::gxhash::gxhash64`). Eg: `cargo asm gxhash::gxhash::gxhash64 --lib --features no-inlining` (+ `| wc -l` to count lines, which is an approximate but quick way to get a sense of bytecode size).
> ℹ️ [AMD μProf](https://www.amd.com/en/developer/uprof.html) gives some useful insights on time spent per instruction.

## Publication
Expand Down
8 changes: 4 additions & 4 deletions benches/hashset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@ fn benchmark<T>(c: &mut Criterion, name: &str, value: T)
{
let mut group = c.benchmark_group(format!("HashSet/{}", name));

let mut set = HashSet::<T>::new();
group.bench_function("Default Hasher", |b| {
let mut set: HashSet::<T, GxBuildHasher> = gxhash::HashSet::<T>::default();
group.bench_function("GxHash", |b| {
iterate(b, &value, &mut set);
});

let mut set: HashSet::<T, GxBuildHasher> = gxhash::HashSet::<T>::default();
group.bench_function("GxHash", |b| {
let mut set = HashSet::<T>::new();
group.bench_function("Default Hasher", |b| {
iterate(b, &value, &mut set);
});

Expand Down
39 changes: 39 additions & 0 deletions changes.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# GxHash 3.X

Bytecode: 201
HashSet/u32/GxHash: 1.5724 ns
Throughput:
| 4 > 6278.58
| 8 > 12620.74
| 16 > 25315.13
| 32 > 26450.76
| 64 > 39590.37
| 128 > 39402.75
| 256 > 52222.14
| 512 > 63567.70
| 1024 > 71014.10
| 2048 > 74969.55
| 4096 > 80239.42
| 8192 > 83975.67
| 16384 > 82638.05
| 32768 > 84528.13

# GxHash 4 (WIP)

Bytecode: 190
HashSet/u32/GxHash: 1.5426 ns
Throughput:
| 4 > 7360.11
| 8 > 14769.95
| 16 > 29555.45
| 32 > 43083.63
| 64 > 43083.63
| 128 > 40690.10
| 256 > 50511.85
| 512 > 62827.61
| 1024 > 70250.75
| 2048 > 81630.13
| 4096 > 87250.16
| 8192 > 89831.86
| 16384 > 88241.06
| 32768 > 89616.23
88 changes: 76 additions & 12 deletions src/gxhash/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ pub fn gxhash32(input: &[u8], seed: i64) -> u32 {
/// let seed = 1234;
/// println!("Hash is {:x}!", gxhash::gxhash64(&bytes, seed));
/// ```
#[cfg(not(feature = "no-inlining"))]
#[inline(always)]
pub fn gxhash64(input: &[u8], seed: i64) -> u64 {
unsafe {
Expand All @@ -36,6 +37,15 @@ pub fn gxhash64(input: &[u8], seed: i64) -> u64 {
}
}

#[cfg(feature = "no-inlining")]
#[inline(never)]
pub fn gxhash64(input: &[u8], seed: i64) -> u64 {
unsafe {
let p = &gxhash(input, create_seed(seed)) as *const State as *const u64;
*p
}
}

/// Hashes an arbitrary stream of bytes to an u128.
///
/// # Example
Expand Down Expand Up @@ -67,23 +77,23 @@ pub(crate) use load_unaligned;

#[inline(always)]
pub(crate) unsafe fn gxhash(input: &[u8], seed: State) -> State {
finalize(aes_encrypt(compress_all(input), seed))
finalize(compress_all(input, seed))
}

#[inline(always)]
pub(crate) unsafe fn compress_all(input: &[u8]) -> State {
pub(crate) unsafe fn compress_all(input: &[u8], seed: State) -> State {

let len = input.len();
let mut ptr = input.as_ptr() as *const State;

if len == 0 {
return create_empty();
return seed;
}

if len <= VECTOR_SIZE {
// Input fits on a single SIMD vector, however we might read beyond the input message
// Thus we need this safe method that checks if it can safely read beyond or must copy
return get_partial(ptr, len);
return xor(get_partial(ptr, len), seed);
}

let mut hash_vector: State;
Expand All @@ -102,6 +112,8 @@ pub(crate) unsafe fn compress_all(input: &[u8]) -> State {
ptr = ptr.cast::<u8>().add(extra_bytes_count).cast();
}

hash_vector = xor(hash_vector, seed);

load_unaligned!(ptr, v0);

if len > VECTOR_SIZE * 2 {
Expand Down Expand Up @@ -152,6 +164,8 @@ unsafe fn compress_many(mut ptr: *const State, end: usize, hash_vector: State, l
#[cfg(test)]
mod tests {

use crate::gxhash;

use super::*;
use rand::Rng;

Expand Down Expand Up @@ -213,14 +227,64 @@ mod tests {
assert_ne!(0, gxhash32(&[0u8; 1200], 0));
}

// #[test]
// fn is_stable() {
// assert_eq!(2533353535, gxhash32(&[0u8; 0], 0));
// assert_eq!(4243413987, gxhash32(&[0u8; 1], 0));
// assert_eq!(2401749549, gxhash32(&[0u8; 1000], 0));
// assert_eq!(4156851105, gxhash32(&[42u8; 4242], 42));
// assert_eq!(1981427771, gxhash32(&[42u8; 4242], -42));
// assert_eq!(1156095992, gxhash32(b"Hello World", i64::MAX));
// assert_eq!(540827083, gxhash32(b"Hello World", i64::MIN));
// }

#[test]
fn is_stable() {
assert_eq!(2533353535, gxhash32(&[0u8; 0], 0));
assert_eq!(4243413987, gxhash32(&[0u8; 1], 0));
assert_eq!(2401749549, gxhash32(&[0u8; 1000], 0));
assert_eq!(4156851105, gxhash32(&[42u8; 4242], 42));
assert_eq!(1981427771, gxhash32(&[42u8; 4242], -42));
assert_eq!(1156095992, gxhash32(b"Hello World", i64::MAX));
assert_eq!(540827083, gxhash32(b"Hello World", i64::MIN));
fn issue_83_multicollision() {

let zero_key = aes_crypto::AesBlock::zero();

let mut s0 = [0u8; 192];
let mut s1 = [0u8; 192];

s0[64] = 100;
s1[64] = 42;

let v0 = aes_crypto::AesBlock::new(s0[64..64 + 16].try_into().unwrap());
v0.enc(zero_key).store_to(&mut s0[64 + 32..]);

let v0 = aes_crypto::AesBlock::new(s1[64..64 + 16].try_into().unwrap());
v0.enc(zero_key).store_to(&mut s1[64 + 32..]);

// Different strings.
assert!(s0 != s1);

// Collide regardless of seed.
assert!(gxhash::gxhash128(&s0, 0) != gxhash::gxhash128(&s1, 0));
assert!(gxhash::gxhash128(&s0, 0xdeadbeef) != gxhash::gxhash128(&s1, 0xdeadbeef));
}

#[test]
fn issue_83_multicollision_dec() {

let zero_key = aes_crypto::AesBlock::zero();

let mut s0 = [0u8; 192];
let mut s1 = [0u8; 192];

s0[64] = 100;
s1[64] = 42;

let v0 = aes_crypto::AesBlock::new(s0[64..64 + 16].try_into().unwrap());
v0.dec(zero_key).store_to(&mut s0[64 + 32..]);

let v0 = aes_crypto::AesBlock::new(s1[64..64 + 16].try_into().unwrap());
v0.dec(zero_key).store_to(&mut s1[64 + 32..]);

// Different strings.
assert!(s0 != s1);

// Collide regardless of seed.
assert!(gxhash::gxhash128(&s0, 0) != gxhash::gxhash128(&s1, 0));
assert!(gxhash::gxhash128(&s0, 0xdeadbeef) != gxhash::gxhash128(&s1, 0xdeadbeef));
}
}
24 changes: 13 additions & 11 deletions src/gxhash/platform/arm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,12 @@ pub unsafe fn aes_encrypt_last(data: State, keys: State) -> State {
vreinterpretq_s8_u8(veorq_u8(encrypted, vreinterpretq_u8_s8(keys)))
}

#[inline(always)]
// See https://blog.michaelbrase.com/2018/05/08/emulating-x86-aes-intrinsics-on-armv8-a
pub unsafe fn xor(a: State, b: State) -> State {
veorq_s8(a, b)
}

#[inline(always)]
pub unsafe fn ld(array: *const u32) -> State {
vreinterpretq_s8_u32(vld1q_u32(array))
Expand All @@ -72,10 +78,6 @@ pub unsafe fn ld(array: *const u32) -> State {
#[inline(always)]
pub unsafe fn compress_8(mut ptr: *const State, end_address: usize, hash_vector: State, len: usize) -> State {

// Disambiguation vectors
let mut t1: State = create_empty();
let mut t2: State = create_empty();

// Hash is processed in two separate 128-bit parallel lanes
// This allows the same processing to be applied using 256-bit V-AES instrinsics
// so that hashes are stable in both cases.
Expand All @@ -86,20 +88,20 @@ pub unsafe fn compress_8(mut ptr: *const State, end_address: usize, hash_vector:

crate::gxhash::load_unaligned!(ptr, v0, v1, v2, v3, v4, v5, v6, v7);

let mut tmp1 = aes_encrypt(v0, v2);
let mut tmp2 = aes_encrypt(v1, v3);
let mut tmp1 = aes_encrypt(v0, hash_vector);
let mut tmp2 = aes_encrypt(v1, hash_vector);

tmp1 = aes_encrypt(tmp1, v2);
tmp2 = aes_encrypt(tmp2, v3);

tmp1 = aes_encrypt(tmp1, v4);
tmp2 = aes_encrypt(tmp2, v5);

tmp1 = aes_encrypt(tmp1, v6);
tmp2 = aes_encrypt(tmp2, v7);

t1 = vaddq_s8(t1, ld(KEYS.as_ptr()));
t2 = vaddq_s8(t2, ld(KEYS.as_ptr().offset(4)));

lane1 = aes_encrypt_last(aes_encrypt(tmp1, t1), lane1);
lane2 = aes_encrypt_last(aes_encrypt(tmp2, t2), lane2);
lane1 = aes_encrypt_last(tmp1, lane1);
lane2 = aes_encrypt_last(tmp2, lane2);
}
// For 'Zeroes' test
let len_vec = vreinterpretq_s8_u32(vdupq_n_u32(len as u32));
Expand Down
4 changes: 2 additions & 2 deletions src/hasher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ macro_rules! write {
#[inline]
fn $name(&mut self, value: $type) {
self.state = unsafe {
aes_encrypt_last($load(value), aes_encrypt(self.state, ld(KEYS.as_ptr())))
aes_encrypt(self.state, $load(value))
};
}
}
Expand All @@ -112,7 +112,7 @@ impl Hasher for GxHasher {
#[inline]
fn write(&mut self, bytes: &[u8]) {
// Improvement: only compress at this stage and finalize in finish
self.state = unsafe { aes_encrypt_last(compress_all(bytes), aes_encrypt(self.state, ld(KEYS.as_ptr()))) };
self.state = unsafe { compress_all(bytes, self.state) };
}

write!(write_u8, u8, load_u8);
Expand Down