Skip to content

Commit

Permalink
core: remove cached hash code for ItemId (#1696)
Browse files Browse the repository at this point in the history
The id is generated from a hash function, typically SHA1 so
we can just use a part of that for the hash code of the id
object. Since data is sometimes partitioned and routed based
on the most and least significant bits, we pick one of the
middle bytes to reduce the liklihood of collisions.

Benchmarks show it is about 25% slower to access the hash
code this way, but it does provide some memory savings and
compute savings when creating an id instance since the murmur
hash doesn't need to be computed.
  • Loading branch information
brharrington authored Sep 27, 2024
1 parent 8c8edfc commit 7474caa
Show file tree
Hide file tree
Showing 3 changed files with 110 additions and 55 deletions.
44 changes: 24 additions & 20 deletions atlas-core/src/main/scala/com/netflix/atlas/core/model/ItemId.scala
Original file line number Diff line number Diff line change
Expand Up @@ -16,28 +16,37 @@
package com.netflix.atlas.core.model

import java.math.BigInteger

import com.netflix.atlas.core.util.Strings

import scala.util.hashing.MurmurHash3
import java.lang.invoke.MethodHandles
import java.nio.ByteOrder

/**
* Represents an identifier for a tagged item.
* Represents an identifier for a tagged item. If using for a hash map the
* bytes used for the id should come from a decent hash function as 4 bytes
* from the middle are used for the hash code of the id object.
*
* @param data
* Bytes for the id. This is usually the results of computing a SHA1 hash
* over a normalized representation of the tags.
* @param hc
* Precomputed hash code for the bytes.
*/
class ItemId private (private val data: Array[Byte], private val hc: Int)
extends Comparable[ItemId] {
class ItemId private (private val data: Array[Byte]) extends Comparable[ItemId] {

// Typically it should be 20 bytes for SHA1. Require at least 16 to avoid
// checks for other operations.
require(data.length >= 16)

override def hashCode(): Int = hc
override def hashCode(): Int = {
// Choose middle byte. The id should be generated using decent hash
// function so in theory any subset will do. In some cases data is
// routed based on the prefix or a modulo of the intValue. Choosing a
// byte toward the middle helps to mitigate that.
ItemId.intHandle.get(data, 12)
}

override def equals(obj: Any): Boolean = {
obj match {
case other: ItemId => hc == other.hc && java.util.Arrays.equals(data, other.data)
case other: ItemId => java.util.Arrays.equals(data, other.data)
case _ => false
}
}
Expand All @@ -60,21 +69,16 @@ class ItemId private (private val data: Array[Byte], private val hc: Int)
def toBigInteger: BigInteger = new BigInteger(1, data)

def intValue: Int = {
var result = 0
val end = math.max(0, data.length - 4)
var i = data.length - 1
var shift = 0
while (i >= end) {
result |= (data(i) & 0xFF) << shift
i -= 1
shift += 8
}
result
ItemId.intHandle.get(data, data.length - 4)
}
}

object ItemId {

// Helper to access integer from byte array
private val intHandle =
MethodHandles.byteArrayViewVarHandle(classOf[Array[Int]], ByteOrder.BIG_ENDIAN)

private val hexValueForByte = (0 until 256).toArray.map { i =>
Strings.zeroPad(i, 2)
}
Expand All @@ -84,7 +88,7 @@ object ItemId {
* using MurmurHash3.
*/
def apply(data: Array[Byte]): ItemId = {
new ItemId(data, MurmurHash3.bytesHash(data))
new ItemId(data)
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,44 +19,48 @@ import com.netflix.atlas.core.util.Hash
import com.netflix.atlas.core.util.Strings
import munit.FunSuite

import scala.util.hashing.MurmurHash3

class ItemIdSuite extends FunSuite {

def testByteArray: Array[Byte] = {
(1 to 20).map(_.toByte).toArray
}

test("create from byte array") {
val bytes = Array(1.toByte, 2.toByte)
val bytes = testByteArray
val id = ItemId(bytes)
assertEquals(id.hashCode(), MurmurHash3.bytesHash(bytes))
assertEquals(id.hashCode(), 219025168)
}

test("equals") {
val id1 = ItemId(Array(1.toByte, 2.toByte))
val id2 = ItemId(Array(1.toByte, 2.toByte))
val id1 = ItemId(testByteArray)
val id2 = ItemId(testByteArray)
assertEquals(id1, id1)
assertEquals(id1, id2)
}

test("not equals") {
val id1 = ItemId(Array(1.toByte, 2.toByte))
val id2 = ItemId(Array(1.toByte, 3.toByte))
val id1 = ItemId(testByteArray)
val bytes = testByteArray
bytes(13) = 3.toByte // perturb byte used with hashing
val id2 = ItemId(bytes)
assertNotEquals(id1, id2)
assertNotEquals(id1.hashCode(), id2.hashCode())
}

test("does not equal wrong object type") {
val id1 = ItemId(Array(1.toByte, 2.toByte))
val id1 = ItemId(testByteArray)
assert(!id1.equals("foo"))
}

test("does not equal null") {
val id1 = ItemId(Array(1.toByte, 2.toByte))
val id1 = ItemId(testByteArray)
assert(id1 != null)
}

test("toString") {
val bytes = Array(1.toByte, 2.toByte)
val bytes = testByteArray
val id = ItemId(bytes)
assertEquals(id.toString, "0102")
assertEquals(id.toString, "0102030405060708090a0b0c0d0e0f1011121314")
}

test("toString sha1 bytes 0") {
Expand All @@ -79,15 +83,15 @@ class ItemIdSuite extends FunSuite {
}

test("from String lower") {
val bytes = Array(10.toByte, 11.toByte)
val bytes = testByteArray
val id = ItemId(bytes)
assertEquals(id, ItemId("0a0b"))
assertEquals(id, ItemId("0102030405060708090a0b0c0d0e0f1011121314"))
}

test("from String upper") {
val bytes = Array(10.toByte, 11.toByte)
val bytes = testByteArray
val id = ItemId(bytes)
assertEquals(id, ItemId("0A0B"))
assertEquals(id, ItemId("0102030405060708090A0B0C0D0E0F1011121314"))
}

test("from String invalid") {
Expand All @@ -97,15 +101,17 @@ class ItemIdSuite extends FunSuite {
}

test("compareTo equals") {
val id1 = ItemId(Array(1.toByte, 2.toByte))
val id2 = ItemId(Array(1.toByte, 2.toByte))
val id1 = ItemId(testByteArray)
val id2 = ItemId(testByteArray)
assertEquals(id1.compareTo(id1), 0)
assertEquals(id1.compareTo(id2), 0)
}

test("compareTo less than/greater than") {
val id1 = ItemId(Array(1.toByte, 2.toByte))
val id2 = ItemId(Array(1.toByte, 3.toByte))
val id1 = ItemId(testByteArray)
val bytes = testByteArray
bytes(bytes.length - 1) = 21.toByte
val id2 = ItemId(bytes)
assert(id1.compareTo(id2) < 0)
assert(id2.compareTo(id1) > 0)
}
Expand All @@ -116,19 +122,4 @@ class ItemIdSuite extends FunSuite {
assertEquals(id.intValue, id.toBigInteger.intValue())
}
}

test("int value: one byte") {
val id = ItemId(Array(57.toByte))
assertEquals(id.intValue, id.toBigInteger.intValue())
}

test("int value: two bytes") {
val id = ItemId(Array(1.toByte, 2.toByte))
assertEquals(id.intValue, id.toBigInteger.intValue())
}

test("int value: three bytes") {
val id = ItemId(Array(1.toByte, 2.toByte, 0xFF.toByte))
assertEquals(id.intValue, id.toBigInteger.intValue())
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* Copyright 2014-2024 Netflix, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.netflix.atlas.core.model

import org.openjdk.jmh.annotations.Benchmark
import org.openjdk.jmh.annotations.Scope
import org.openjdk.jmh.annotations.State
import org.openjdk.jmh.infra.Blackhole

import scala.util.Random

/**
* ```
* > jmh:run -prof gc -prof stack -wi 5 -i 10 -f1 -t1 .*ItemIdHashCode.*
* ```
*
* ItemId.hashCode:
*
* ```
* Benchmark Mode Cnt Score Error Units
* before thrpt 10 1547433250.866 ± 52995197.574 ops/s
* after thrpt 10 1162791300.824 ± 36880550.752 ops/s
* ```
*
*
* ItemId.intValue:
*
* ```
* before thrpt 10 292282926.019 ± 2643333.767 ops/s
* after thrpt 10 1101391907.134 ± 14583585.092 ops/s
* ```
*/
@State(Scope.Thread)
class ItemIdHashCode {

private val id = ItemId(Random.nextBytes(20))

@Benchmark
def intValue(bh: Blackhole): Unit = {
bh.consume(id.intValue)
}

@Benchmark
def hashCode(bh: Blackhole): Unit = {
bh.consume(id.hashCode())
}
}

0 comments on commit 7474caa

Please sign in to comment.