From ee2fce003f5369b193353211b3463138ef3593e8 Mon Sep 17 00:00:00 2001 From: Chris Duncan Date: Sun, 5 Jan 2025 11:54:25 -0800 Subject: [PATCH] Back up rolled-up shader code. --- src/lib/workers/pow.wgsl | 178 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 178 insertions(+) create mode 100644 src/lib/workers/pow.wgsl diff --git a/src/lib/workers/pow.wgsl b/src/lib/workers/pow.wgsl new file mode 100644 index 0000000..2509e8e --- /dev/null +++ b/src/lib/workers/pow.wgsl @@ -0,0 +1,178 @@ +struct UBO { + blockhash: array, 2>, + random: u32, + threshold: u32 +}; +@group(0) @binding(0) var ubo: UBO; + +struct WORK { + nonce: vec2, + found: atomic +}; +@group(0) @binding(1) var work: WORK; + +/** +* Defined separately from uint v[32] below as the original value is required +* to calculate the second uint32 of the digest for threshold comparison +*/ +const BLAKE2B_IV32_1: u32 = 0x6A09E667u; + +/** +* These are offsets into the input data buffer for each mixing step. +* They are multiplied by 2 from the original SIGMA values in +* the C reference implementation, which refered to uint64s. +*/ +const SIGMA82: array = array( + 0u,2u,4u,6u,8u,10u,12u,14u,16u,18u,20u,22u,24u,26u,28u,30u, + 28u,20u,8u,16u,18u,30u,26u,12u,2u,24u,0u,4u,22u,14u,10u,6u, + 22u,16u,24u,0u,10u,4u,30u,26u,20u,28u,6u,12u,14u,2u,18u,8u, + 14u,18u,6u,2u,26u,24u,22u,28u,4u,12u,10u,20u,8u,0u,30u,16u, + 18u,0u,10u,14u,4u,8u,20u,30u,28u,2u,22u,24u,12u,16u,6u,26u, + 4u,24u,12u,20u,0u,22u,16u,6u,8u,26u,14u,10u,30u,28u,2u,18u, + 24u,10u,2u,30u,28u,26u,8u,20u,0u,14u,12u,6u,18u,4u,16u,22u, + 26u,22u,14u,28u,24u,2u,6u,18u,10u,0u,30u,8u,16u,12u,4u,20u, + 12u,30u,28u,18u,22u,6u,0u,16u,24u,4u,26u,14u,2u,8u,20u,10u, + 20u,4u,16u,8u,14u,12u,2u,10u,30u,22u,18u,28u,6u,24u,26u,0u, + 0u,2u,4u,6u,8u,10u,12u,14u,16u,18u,20u,22u,24u,26u,28u,30u, + 28u,20u,8u,16u,18u,30u,26u,12u,2u,24u,0u,4u,22u,14u,10u,6u +); + +/** +* 64-bit unsigned addition within the compression buffer +* Sets v[i,i+1] += b +* LSb is the Least-Significant (32) Bits of b +* MSb is the Most-Significant (32) Bits of b +* If LSb overflows, increment MSb operand +*/ +fn add_uint64 (v: ptr>, i: u32, LSb: u32, MSb: u32) { + var o0: u32 = (*v)[i] + LSb; + var o1: u32 = (*v)[i+1u] + MSb; + if ((*v)[i] > 0xFFFFFFFFu - LSb) { + o1 = o1 + 1u; + } + (*v)[i] = o0; + (*v)[i+1u] = o1; +} + +/** +* G Mixing function +*/ +fn G (v: ptr>, m: ptr>, a: u32, b: u32, c: u32, d: u32, ix: u32, iy: u32) { + add_uint64(v, a, (*v)[b], (*v)[b+1u]); + add_uint64(v, a, (*m)[ix], (*m)[ix+1u]); + + // v[d,d+1] = (v[d,d+1] xor v[a,a+1]) rotated to the right by 32 bits + var xor0: u32 = (*v)[d] ^ (*v)[a]; + var xor1: u32 = (*v)[d+1u] ^ (*v)[a+1u]; + (*v)[d] = xor1; + (*v)[d+1u] = xor0; + + add_uint64(v, c, (*v)[d], (*v)[d+1u]); + + // v[b,b+1] = (v[b,b+1] xor v[c,c+1]) rotated right by 24 bits + xor0 = (*v)[b] ^ (*v)[c]; + xor1 = (*v)[b+1u] ^ (*v)[c+1u]; + (*v)[b] = (xor0 >> 24u) ^ (xor1 << 8u); + (*v)[b+1u] = (xor1 >> 24u) ^ (xor0 << 8u); + + add_uint64(v, a, (*v)[b], (*v)[b+1u]); + add_uint64(v, a, (*m)[iy], (*m)[iy+1u]); + + // v[d,d+1] = (v[d,d+1] xor v[a,a+1]) rotated right by 16 bits + xor0 = (*v)[d] ^ (*v)[a]; + xor1 = (*v)[d+1u] ^ (*v)[a+1u]; + (*v)[d] = (xor0 >> 16u) ^ (xor1 << 16u); + (*v)[d+1u] = (xor1 >> 16u) ^ (xor0 << 16u); + + add_uint64(v, c, (*v)[d], (*v)[d+1u]); + + // v[b,b+1] = (v[b,b+1] xor v[c,c+1]) rotated right by 63 bits + xor0 = (*v)[b] ^ (*v)[c]; + xor1 = (*v)[b+1u] ^ (*v)[c+1u]; + (*v)[b] = (xor1 >> 31u) ^ (xor0 << 1u); + (*v)[b+1u] = (xor0 >> 31u) ^ (xor1 << 1u); +} + +/** +* Main compute function +* 8-byte work is split into two 4-byte u32. Low 4 bytes are random u32 from +* UBO. High 4 bytes are the random value XOR'd with index of each thread. +*/ +@compute @workgroup_size(${this.workload}) +fn main( + @builtin(workgroup_id) workgroup_id: vec3, + @builtin(local_invocation_id) local_id: vec3 +) { + if (atomicLoad(&work.found) != 0u) { return; } + + let threshold: u32 = ubo.threshold; + + /** + * Flatten 3D workgroup and local identifiers into u32 for each thread + */ + var id: u32 = ((workgroup_id.x & 0xff) << 24) | + ((workgroup_id.y & 0xff) << 16) | + ((workgroup_id.z & 0xff) << 8) | + (local_id.x & 0xff); + + /** + * Initialize (nonce||blockhash) concatenation + */ + var m: array; + m[0u] = ubo.random; + m[1u] = ubo.random ^ id; + m[2u] = ubo.blockhash[0u].x; + m[3u] = ubo.blockhash[0u].y; + m[4u] = ubo.blockhash[0u].z; + m[5u] = ubo.blockhash[0u].w; + m[6u] = ubo.blockhash[1u].x; + m[7u] = ubo.blockhash[1u].y; + m[8u] = ubo.blockhash[1u].z; + m[9u] = ubo.blockhash[1u].w; + + /** + * Compression buffer intialized to 2 instances of initialization vector + * The following values have been modified from the BLAKE2B_IV: + * OUTLEN is constant 8 bytes + * v[0u] ^= 0x01010000u ^ uint(OUTLEN); + * INLEN is constant 40 bytes: work value (8) + block hash (32) + * v[24u] ^= uint(INLEN); + * It is always the "last" compression at this INLEN + * v[28u] = ~v[28u]; + * v[29u] = ~v[29u]; + */ + var v = array( + 0xF2BDC900u, 0x6A09E667u, 0x84CAA73Bu, 0xBB67AE85u, + 0xFE94F82Bu, 0x3C6EF372u, 0x5F1D36F1u, 0xA54FF53Au, + 0xADE682D1u, 0x510E527Fu, 0x2B3E6C1Fu, 0x9B05688Cu, + 0xFB41BD6Bu, 0x1F83D9ABu, 0x137E2179u, 0x5BE0CD19u, + 0xF3BCC908u, 0x6A09E667u, 0x84CAA73Bu, 0xBB67AE85u, + 0xFE94F82Bu, 0x3C6EF372u, 0x5F1D36F1u, 0xA54FF53Au, + 0xADE682F9u, 0x510E527Fu, 0x2B3E6C1Fu, 0x9B05688Cu, + 0x04BE4294u, 0xE07C2654u, 0x137E2179u, 0x5BE0CD19u + ); + + /** + * Twelve rounds of mixing as part of BLAKE2b compression step + */ + for (var r: u32 = 0u; r < 12u; r = r + 1u) { + G(&v, &m, 0u, 8u, 16u, 24u, SIGMA82[r * 16u + 0u], SIGMA82[r * 16u + 1u]); + G(&v, &m, 2u, 10u, 18u, 26u, SIGMA82[r * 16u + 2u], SIGMA82[r * 16u + 3u]); + G(&v, &m, 4u, 12u, 20u, 28u, SIGMA82[r * 16u + 4u], SIGMA82[r * 16u + 5u]); + G(&v, &m, 6u, 14u, 22u, 30u, SIGMA82[r * 16u + 6u], SIGMA82[r * 16u + 7u]); + G(&v, &m, 0u, 10u, 20u, 30u, SIGMA82[r * 16u + 8u], SIGMA82[r * 16u + 9u]); + G(&v, &m, 2u, 12u, 22u, 24u, SIGMA82[r * 16u + 10u], SIGMA82[r * 16u + 11u]); + G(&v, &m, 4u, 14u, 16u, 26u, SIGMA82[r * 16u + 12u], SIGMA82[r * 16u + 13u]); + G(&v, &m, 6u, 8u, 18u, 28u, SIGMA82[r * 16u + 14u], SIGMA82[r * 16u + 15u]); + } + + /** + * Set nonce if it passes the threshold and no other thread has set it + */ + if ((BLAKE2B_IV32_1 ^ v[1u] ^ v[17u]) > threshold && atomicLoad(&work.found) == 0u) { + atomicStore(&work.found, 1u); + work.nonce.x = m[0]; + work.nonce.y = m[1]; + } + return; +} -- 2.34.1