From 80d561cda34eb6ba37d4451b2ca25b4188d4e170 Mon Sep 17 00:00:00 2001 From: Chris Duncan Date: Tue, 4 Feb 2025 13:27:19 -0800 Subject: [PATCH] Skip a couple unnecessary computations in the final G round. Implement literal of iv[0] used in digest. --- src/shaders/compute.wgsl | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/src/shaders/compute.wgsl b/src/shaders/compute.wgsl index 1ee01af..da0dd96 100644 --- a/src/shaders/compute.wgsl +++ b/src/shaders/compute.wgsl @@ -20,12 +20,6 @@ struct WORK { }; @group(0) @binding(1) var work: WORK; -/** -* Defined separately from `v0` because the original value is required to -* calculate the digest and compare it to the threshold. -*/ -const BLAKE2B_IV32_0: vec2 = vec2(0xF2BDC900u, 0x6A09E667u); - /** * Used to rotate bits by a fixed amount during G mixing. */ @@ -105,7 +99,7 @@ fn main(id: vec3) { * It is always the "last" compression at this INLEN * v14 = ~v14; */ - var v01: vec4 = vec4(BLAKE2B_IV32_0, 0x84CAA73Bu, 0xBB67AE85u); + var v01: vec4 = vec4(0xF2BDC900u, 0x6A09E667u, 0x84CAA73Bu, 0xBB67AE85u); var v23: vec4 = vec4(0xFE94F82Bu, 0x3C6EF372u, 0x5F1D36F1u, 0xA54FF53Au); var v45: vec4 = vec4(0xADE682D1u, 0x510E527Fu, 0x2B3E6C1Fu, 0x9B05688Cu); var v67: vec4 = vec4(0xFB41BD6Bu, 0x1F83D9ABu, 0x137E2179u, 0x5BE0CD19u); @@ -1218,10 +1212,10 @@ fn main(id: vec3) { v01 += vec4(Z, m2) + vec4(Z, 0u, u32(v01.z + m2.x < v01.z)); v23 += vec4(Z, m3) + vec4(Z, 0u, u32(v23.z + m3.x < v23.z)); - vFC = ((vFC ^ v01) >> ROTATE_16) | ((vFC ^ v01).yxwz << ROTATE_16); + // vFC = ((vFC ^ v01) >> ROTATE_16) | ((vFC ^ v01).yxwz << ROTATE_16); vDE = ((vDE ^ v23) >> ROTATE_16) | ((vDE ^ v23).yxwz << ROTATE_16); - vAB += vFC + vec4(0u, u32(vAB.x + vFC.x < vAB.x), 0u, u32(vAB.z + vFC.z < vAB.z)); + // vAB += vFC + vec4(0u, u32(vAB.x + vFC.x < vAB.x), 0u, u32(vAB.z + vFC.z < vAB.z)); v89 += vDE + vec4(0u, u32(v89.x + vDE.x < v89.x), 0u, u32(v89.z + vDE.z < v89.z)); // v56 = ((v56 ^ vAB) << ROTATE_1) | ((v56 ^ vAB).yxwz >> ROTATE_31); @@ -1241,9 +1235,12 @@ fn main(id: vec3) { ****************************************************************************/ /** - * Set nonce if it passes the threshold and no other thread has set it + * Set nonce if it passes the threshold and no other thread has set it. + * Numeric literal used in the finalization digest is the original value of the + * first element of the initialization vector `blake2b_IV[0]` which in NanoPow + * is initialized at vector component `v01.y`. */ - if ((BLAKE2B_IV32_0.y ^ v01.y ^ v89.y) > ubo.threshold) { + if ((0x6A09E667u ^ v01.y ^ v89.y) > ubo.threshold) { let wasFound: u32 = atomicExchange(&work.found, 1u); if (wasFound == 0u) { work.nonce = m0; -- 2.34.1