Skip to content

Commit

Permalink
TKSS-198: Enhance SM4-GCM performance with precomputed tables
Browse files Browse the repository at this point in the history
  • Loading branch information
johnshajiang committed May 8, 2023
1 parent bf87be3 commit 36e374b
Show file tree
Hide file tree
Showing 4 changed files with 223 additions and 71 deletions.
11 changes: 11 additions & 0 deletions kona-crypto/src/main/java/com/tencent/kona/crypto/CryptoUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,17 @@ public static byte[] bigIntToBytes32(BigInteger value) {
return bytes32;
}

public static void longToBytes8(long value, byte[] dest, int offset) {
intToBytes4((int)(value >>> 32), dest, offset);
intToBytes4((int)(value & 0xFFFFFFFFL), dest, offset + 4);
}

public static long bytes8ToLong(byte[] bytes8, int offset) {
int high = bytes4ToInt(bytes8, offset);
int low = bytes4ToInt(bytes8, offset + 4);
return ((high & 0xFFFFFFFFL) << 32) | (low & 0xFFFFFFFFL);
}

public static byte[] copy(byte[] data, int offset, int length) {
RangeUtil.nullAndBoundsCheck(data, offset, length);

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package com.tencent.kona.crypto.provider;

import static com.tencent.kona.crypto.CryptoUtils.bytes8ToLong;

/**
* The abstract implementation on the Galois Field multiplication.
*/
abstract class GFMultiplier {

final long[] subkeyWords = new long[2];

// Convert key from 16-bytes to 2-longs
GFMultiplier(byte[] subkeyH) {
subkeyWords[0] = bytes8ToLong(subkeyH, 0);
subkeyWords[1] = bytes8ToLong(subkeyH, 8);
}

abstract void multiply(long[] block);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
package com.tencent.kona.crypto.provider;

import static com.tencent.kona.crypto.CryptoUtils.longToBytes8;

/**
* Some implementations on the multiplications over GF(2 ^ 128).
*
* Refer to The Galois/Counter Mode of Operation (GCM) [GCMO]
* https://csrc.nist.rip/groups/ST/toolkit/BCM/documents/proposedmodes/gcm/gcm-revised-spec.pdf
*/
class GFMultipliers {

// [GCMO] section 2.5, R = 1110 0001 0000 .... 0000
private static final long R = 0xE100000000000000L;

static GFMultiplier gfmWithoutPreTable(byte[] subkeyH) {
return new GFMWithoutPreTable(subkeyH);
}

static GFMultiplier gfmWith32KPreTable(byte[] subkeyH) {
return new GFMWith32KTable(subkeyH);
}

// Without precomputed table
private static final class GFMWithoutPreTable extends GFMultiplier {

private GFMWithoutPreTable(byte[] subkeyH) {
super(subkeyH);
}

@Override
public void multiply(long[] block) {
long Z0 = 0;
long Z1 = 0;
long V0 = subkeyWords[0];
long V1 = subkeyWords[1];
long X;

// Separate loops for processing state[0] and state[1].
X = block[0];
for (int i = 0; i < 64; i++) {
// Zi+1 = Zi if bit i of x is 0
long mask = X >> 63;
Z0 ^= V0 & mask;
Z1 ^= V1 & mask;

// Save mask for conditional reduction below.
mask = (V1 << 63) >> 63;

// V = rightshift(V)
long carry = V0 & 1;
V0 = V0 >>> 1;
V1 = (V1 >>> 1) | (carry << 63);

// Conditional reduction modulo P128.
V0 ^= R & mask;
X <<= 1;
}

X = block[1];
for (int i = 64; i < 127; i++) {
// Zi+1 = Zi if bit i of x is 0
long mask = X >> 63;
Z0 ^= V0 & mask;
Z1 ^= V1 & mask;

// Save mask for conditional reduction below.
mask = (V1 << 63) >> 63;

// V = rightshift(V)
long carry = V0 & 1;
V0 = V0 >>> 1;
V1 = (V1 >>> 1) | (carry << 63);

// Conditional reduction.
V0 ^= R & mask;
X <<= 1;
}

// calculate Z128
long mask = X >> 63;
Z0 ^= V0 & mask;
Z1 ^= V1 & mask;

// Save result.
block[0] = Z0;
block[1] = Z1;
}
}

// With a precomputed table,
// which consumes 256 * 2 * 64 = 32768 or 32K bits
private static final class GFMWith32KTable extends GFMultiplier {

private final long[][] table = preTable();

GFMWith32KTable(byte[] subkeyH) {
super(subkeyH);
}

private long[][] preTable() {
long[][] table = new long[256][2];

table[1][0] = subkeyWords[0];
table[1][1] = subkeyWords[1];
multiplyP7(table[1]);

for (int i = 2; i < 256; i += 2) {
divideP(table[i >> 1], table[i]);
add(table[i], table[1], table[i + 1]);
}

return table;
}

private static void multiplyP7(long[] x) {
long x0 = x[0];
long x1 = x[1];

long c = x1 << 57;
x[0] = (x0 >>> 7) ^ c ^ (c >>> 1) ^ (c >>> 2) ^ (c >>> 7);
x[1] = (x1 >>> 7) | (x0 << 57);
}

private static void divideP(long[] x, long[] z) {
long x0 = x[0];
long x1 = x[1];

long m = x0 >> 63;
x0 ^= m & R;
z[0] = (x0 << 1) | (x1 >>> 63);
z[1] = (x1 << 1) | -m;
}

private static void add(long[] x, long[] y, long[] z) {
z[0] = x[0] ^ y[0];
z[1] = x[1] ^ y[1];
}

public void multiply(long[] block) {
byte[] buf = new byte[16];
longToBytes8(block[0], buf, 0);
longToBytes8(block[1], buf, 8);

long[] t = table[buf[15] & 0xFF];
long z0 = t[0];
long z1 = t[1];

for (int i = 14; i >= 0; i--) {
t = table[buf[i] & 0xFF];

long c = z1 << 56;
z1 = t[1] ^ ((z1 >>> 8) | (z0 << 56));
z0 = t[0] ^ (z0 >>> 8) ^ c ^ (c >>> 1) ^ (c >>> 2) ^ (c >>> 7);
}

block[0] = z0;
block[1] = z1;
}
}
}
103 changes: 32 additions & 71 deletions kona-crypto/src/main/java/com/tencent/kona/crypto/provider/GHASH.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,13 @@
package com.tencent.kona.crypto.provider;

import com.tencent.kona.crypto.util.Constants;
import com.tencent.kona.sun.security.action.GetPropertyAction;

import java.nio.ByteBuffer;
import java.security.ProviderException;

import static com.tencent.kona.crypto.CryptoUtils.longToBytes8;

/**
* This class represents the GHASH function defined in NIST 800-38D
* under section 6.4. It needs to be constructed w/ a hash subkey, i.e.
Expand All @@ -47,6 +50,11 @@

final class GHASH implements Cloneable, GCM {

// preTableSize: ZERO, 32K
private static final String PRE_TABLE_SIZE
= GetPropertyAction.privilegedGetProperty(
"com.tencent.kona.crypto.gcm.preTableSize", "32K");

private static long getLong(byte[] buffer, int offset) {
long result = 0;
int end = offset + 8;
Expand All @@ -69,66 +77,6 @@ private static void putLong(byte[] buffer, int offset, long value) {
// Maximum buffer size rotating ByteBuffer->byte[] intrinsic copy
private static final int MAX_LEN = 1024;

// Multiplies state[0], state[1] by subkeyH[0], subkeyH[1].
private static void blockMult(long[] st, long[] subH) {
long Z0 = 0;
long Z1 = 0;
long V0 = subH[0];
long V1 = subH[1];
long X;

// Separate loops for processing state[0] and state[1].
X = st[0];
for (int i = 0; i < 64; i++) {
// Zi+1 = Zi if bit i of x is 0
long mask = X >> 63;
Z0 ^= V0 & mask;
Z1 ^= V1 & mask;

// Save mask for conditional reduction below.
mask = (V1 << 63) >> 63;

// V = rightshift(V)
long carry = V0 & 1;
V0 = V0 >>> 1;
V1 = (V1 >>> 1) | (carry << 63);

// Conditional reduction modulo P128.
V0 ^= 0xe100000000000000L & mask;
X <<= 1;
}

X = st[1];
for (int i = 64; i < 127; i++) {
// Zi+1 = Zi if bit i of x is 0
long mask = X >> 63;
Z0 ^= V0 & mask;
Z1 ^= V1 & mask;

// Save mask for conditional reduction below.
mask = (V1 << 63) >> 63;

// V = rightshift(V)
long carry = V0 & 1;
V0 = V0 >>> 1;
V1 = (V1 >>> 1) | (carry << 63);

// Conditional reduction.
V0 ^= 0xe100000000000000L & mask;
X <<= 1;
}

// calculate Z128
long mask = X >> 63;
Z0 ^= V0 & mask;
Z1 ^= V1 & mask;

// Save result.
st[0] = Z0;
st[1] = Z1;

}

/* subkeyHtbl and state are stored in long[] for GHASH intrinsic use */

// hashtable subkeyHtbl holds 2*9 powers of subkeyH computed using
Expand All @@ -141,6 +89,8 @@ private static void blockMult(long[] st, long[] subH) {
// variables for save/restore calls
private long stateSave0, stateSave1;

private final GFMultiplier multiplier;

/**
* Initializes the cipher in the specified mode with the given key
* and iv.
Expand All @@ -158,12 +108,27 @@ private static void blockMult(long[] st, long[] subH) {
subkeyHtbl = new long[2*9];
subkeyHtbl[0] = getLong(subkeyH, 0);
subkeyHtbl[1] = getLong(subkeyH, 8);

multiplier = multiplier(subkeyH);
}

// Cloning constructor
private GHASH(GHASH g) {
state = g.state.clone();
subkeyHtbl = g.subkeyHtbl.clone();

byte[] subkeyH = new byte[SM4_BLOCK_SIZE];
longToBytes8(subkeyHtbl[0], subkeyH, 0);
longToBytes8(subkeyHtbl[1], subkeyH, 8);
multiplier = multiplier(subkeyH);
}

private GFMultiplier multiplier(byte[] subkeyH) {
if ("32K".equalsIgnoreCase(PRE_TABLE_SIZE)) {
return GFMultipliers.gfmWith32KPreTable(subkeyH);
} else {
return GFMultipliers.gfmWithoutPreTable(subkeyH);
}
}

@Override
Expand Down Expand Up @@ -197,11 +162,10 @@ void restore() {
state[1] = stateSave1;
}

private static void processBlock(byte[] data, int ofs, long[] st,
long[] subH) {
private void processBlock(byte[] data, int ofs, long[] st) {
st[0] ^= getLong(data, ofs);
st[1] ^= getLong(data, ofs + 8);
blockMult(st, subH);
multiplier.multiply(st);
}

int update(byte[] in) {
Expand All @@ -214,7 +178,7 @@ int update(byte[] in, int inOfs, int inLen) {
}
int len = inLen - (inLen % SM4_BLOCK_SIZE);
ghashRangeCheck(in, inOfs, len, state, subkeyHtbl);
processBlocks(in, inOfs, len / SM4_BLOCK_SIZE, state, subkeyHtbl);
processBlocks(in, inOfs, len / SM4_BLOCK_SIZE, state);
return len;
}

Expand Down Expand Up @@ -312,11 +276,10 @@ private static void ghashRangeCheck(byte[] in, int inOfs, int inLen,
* the hotspot signature. This method and methods called by it, cannot
* throw exceptions or allocate arrays as it will breaking intrinsics
*/
private static void processBlocks(byte[] data, int inOfs, int blocks,
long[] st, long[] subH) {
private void processBlocks(byte[] data, int inOfs, int blocks, long[] st) {
int offset = inOfs;
while (blocks > 0) {
processBlock(data, offset, st, subH);
processBlock(data, offset, st);
blocks--;
offset += SM4_BLOCK_SIZE;
}
Expand All @@ -327,15 +290,13 @@ private void processBlocksDirect(ByteBuffer ct, int inLen) {
byte[] data = new byte[Math.min(MAX_LEN, inLen)];
while (inLen > MAX_LEN) {
ct.get(data, 0, MAX_LEN);
processBlocks(data, 0, MAX_LEN / SM4_BLOCK_SIZE, state,
subkeyHtbl);
processBlocks(data, 0, MAX_LEN / SM4_BLOCK_SIZE, state);
inLen -= MAX_LEN;
}
if (inLen >= SM4_BLOCK_SIZE) {
int len = inLen - (inLen % SM4_BLOCK_SIZE);
ct.get(data, 0, len);
processBlocks(data, 0, len / SM4_BLOCK_SIZE, state,
subkeyHtbl);
processBlocks(data, 0, len / SM4_BLOCK_SIZE, state);
}
}

Expand Down

0 comments on commit 36e374b

Please sign in to comment.