From c0efb22930916cfd9c4081c8ff3d570e70050c9b Mon Sep 17 00:00:00 2001 From: Chris Duncan Date: Mon, 30 Dec 2024 14:35:19 -0800 Subject: [PATCH] Redefine shader ints as uints where applicable, and specify types explicitly when using literal numerals. Restore some basic frame timing to keep track of performance gain or loss. --- src/lib/workers/powgl.ts | 117 +++++++++++++++++++++------------------ 1 file changed, 62 insertions(+), 55 deletions(-) diff --git a/src/lib/workers/powgl.ts b/src/lib/workers/powgl.ts index 5f7733a..97a9d81 100644 --- a/src/lib/workers/powgl.ts +++ b/src/lib/workers/powgl.ts @@ -81,7 +81,7 @@ layout(std140) uniform WORK { // Defined separately from uint v[32] below as the original value is required // to calculate the second uint32 of the digest for threshold comparison -#define BLAKE2B_IV32_1 0x6A09E667u +const uint BLAKE2B_IV32_1 = 0x6A09E667u; // Both buffers represent 16 uint64s as 32 uint32s // because that's what GLSL offers, just like Javascript @@ -111,75 +111,75 @@ uint m[32]; // These are offsets into the input data buffer for each mixing step. // They are multiplied by 2 from the original SIGMA values in // the C reference implementation, which refered to uint64s. -const int SIGMA82[192] = int[192]( - 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30, - 28,20,8,16,18,30,26,12,2,24,0,4,22,14,10,6, - 22,16,24,0,10,4,30,26,20,28,6,12,14,2,18,8, - 14,18,6,2,26,24,22,28,4,12,10,20,8,0,30,16, - 18,0,10,14,4,8,20,30,28,2,22,24,12,16,6,26, - 4,24,12,20,0,22,16,6,8,26,14,10,30,28,2,18, - 24,10,2,30,28,26,8,20,0,14,12,6,18,4,16,22, - 26,22,14,28,24,2,6,18,10,0,30,8,16,12,4,20, - 12,30,28,18,22,6,0,16,24,4,26,14,2,8,20,10, - 20,4,16,8,14,12,2,10,30,22,18,28,6,24,26,0, - 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30, - 28,20,8,16,18,30,26,12,2,24,0,4,22,14,10,6 +const uint SIGMA82[192] = uint[192]( + 0u,2u,4u,6u,8u,10u,12u,14u,16u,18u,20u,22u,24u,26u,28u,30u, + 28u,20u,8u,16u,18u,30u,26u,12u,2u,24u,0u,4u,22u,14u,10u,6u, + 22u,16u,24u,0u,10u,4u,30u,26u,20u,28u,6u,12u,14u,2u,18u,8u, + 14u,18u,6u,2u,26u,24u,22u,28u,4u,12u,10u,20u,8u,0u,30u,16u, + 18u,0u,10u,14u,4u,8u,20u,30u,28u,2u,22u,24u,12u,16u,6u,26u, + 4u,24u,12u,20u,0u,22u,16u,6u,8u,26u,14u,10u,30u,28u,2u,18u, + 24u,10u,2u,30u,28u,26u,8u,20u,0u,14u,12u,6u,18u,4u,16u,22u, + 26u,22u,14u,28u,24u,2u,6u,18u,10u,0u,30u,8u,16u,12u,4u,20u, + 12u,30u,28u,18u,22u,6u,0u,16u,24u,4u,26u,14u,2u,8u,20u,10u, + 20u,4u,16u,8u,14u,12u,2u,10u,30u,22u,18u,28u,6u,24u,26u,0u, + 0u,2u,4u,6u,8u,10u,12u,14u,16u,18u,20u,22u,24u,26u,28u,30u, + 28u,20u,8u,16u,18u,30u,26u,12u,2u,24u,0u,4u,22u,14u,10u,6u ); // 64-bit unsigned addition within the compression buffer // Sets v[a,a+1] += b // b0 is the low 32 bits of b, b1 represents the high 32 bits -void add_uint64 (int a, uint b0, uint b1) { +void add_uint64 (uint a, uint b0, uint b1) { uint o0 = v[a] + b0; - uint o1 = v[a + 1] + b1; + uint o1 = v[a+1u] + b1; if (v[a] > 0xFFFFFFFFu - b0) { // did low 32 bits overflow? o1++; } v[a] = o0; - v[a + 1] = o1; + v[a+1u] = o1; } // G Mixing function -void B2B_G (int a, int b, int c, int d, int ix, int iy) { - add_uint64(a, v[b], v[b+1]); - add_uint64(a, m[ix], m[ix + 1]); +void B2B_G (uint a, uint b, uint c, uint d, uint ix, uint iy) { + add_uint64(a, v[b], v[b+1u]); + add_uint64(a, m[ix], m[ix+1u]); // v[d,d+1] = (v[d,d+1] xor v[a,a+1]) rotated to the right by 32 bits uint xor0 = v[d] ^ v[a]; - uint xor1 = v[d + 1] ^ v[a + 1]; + uint xor1 = v[d+1u] ^ v[a+1u]; v[d] = xor1; - v[d + 1] = xor0; + v[d+1u] = xor0; - add_uint64(c, v[d], v[d+1]); + add_uint64(c, v[d], v[d+1u]); // v[b,b+1] = (v[b,b+1] xor v[c,c+1]) rotated right by 24 bits xor0 = v[b] ^ v[c]; - xor1 = v[b + 1] ^ v[c + 1]; - v[b] = (xor0 >> 24) ^ (xor1 << 8); - v[b + 1] = (xor1 >> 24) ^ (xor0 << 8); + xor1 = v[b+1u] ^ v[c+1u]; + v[b] = (xor0 >> 24u) ^ (xor1 << 8u); + v[b+1u] = (xor1 >> 24u) ^ (xor0 << 8u); - add_uint64(a, v[b], v[b+1]); - add_uint64(a, m[iy], m[iy + 1]); + add_uint64(a, v[b], v[b+1u]); + add_uint64(a, m[iy], m[iy+1u]); // v[d,d+1] = (v[d,d+1] xor v[a,a+1]) rotated right by 16 bits xor0 = v[d] ^ v[a]; - xor1 = v[d + 1] ^ v[a + 1]; - v[d] = (xor0 >> 16) ^ (xor1 << 16); - v[d + 1] = (xor1 >> 16) ^ (xor0 << 16); + xor1 = v[d+1u] ^ v[a+1u]; + v[d] = (xor0 >> 16u) ^ (xor1 << 16u); + v[d+1u] = (xor1 >> 16u) ^ (xor0 << 16u); - add_uint64(c, v[d], v[d+1]); + add_uint64(c, v[d], v[d+1u]); // v[b,b+1] = (v[b,b+1] xor v[c,c+1]) rotated right by 63 bits xor0 = v[b] ^ v[c]; - xor1 = v[b + 1] ^ v[c + 1]; - v[b] = (xor1 >> 31) ^ (xor0 << 1); - v[b + 1] = (xor0 >> 31) ^ (xor1 << 1); + xor1 = v[b+1u] ^ v[c+1u]; + v[b] = (xor1 >> 31u) ^ (xor0 << 1u); + v[b+1u] = (xor0 >> 31u) ^ (xor1 << 1u); } void main() { int i; - uvec4 u_work0 = work[0]; - uvec4 u_work1 = work[1]; + uvec4 u_work0 = work[0u]; + uvec4 u_work1 = work[1u]; uint uv_x = uint(uv_pos.x * workload); uint uv_y = uint(uv_pos.y * workload); uint x_pos = uv_x % 256u; @@ -190,36 +190,36 @@ void main() { // First 2 work bytes are the x,y pos within the 256x256 area, the next // two bytes are modified from the random generated value, XOR'd with // the x,y area index of where this pixel is located - m[0] = (x_pos ^ (y_pos << 8) ^ ((u_work0.b ^ x_index) << 16) ^ ((u_work0.a ^ y_index) << 24)); + m[0u] = (x_pos ^ (y_pos << 8u) ^ ((u_work0.b ^ x_index) << 16u) ^ ((u_work0.a ^ y_index) << 24u)); // Remaining bytes are un-modified from the random generated value - m[1] = (u_work1.r ^ (u_work1.g << 8) ^ (u_work1.b << 16) ^ (u_work1.a << 24)); + m[1u] = (u_work1.r ^ (u_work1.g << 8u) ^ (u_work1.b << 16u) ^ (u_work1.a << 24u)); // Block hash - for (i=0;i<8;i++) { - m[i+2] = blockhash[i]; + for (uint i = 0u; i < 8u; i = i + 1u) { + m[i+2u] = blockhash[i]; } // twelve rounds of mixing - for(i=0;i<12;i++) { - B2B_G(0, 8, 16, 24, SIGMA82[i * 16 + 0], SIGMA82[i * 16 + 1]); - B2B_G(2, 10, 18, 26, SIGMA82[i * 16 + 2], SIGMA82[i * 16 + 3]); - B2B_G(4, 12, 20, 28, SIGMA82[i * 16 + 4], SIGMA82[i * 16 + 5]); - B2B_G(6, 14, 22, 30, SIGMA82[i * 16 + 6], SIGMA82[i * 16 + 7]); - B2B_G(0, 10, 20, 30, SIGMA82[i * 16 + 8], SIGMA82[i * 16 + 9]); - B2B_G(2, 12, 22, 24, SIGMA82[i * 16 + 10], SIGMA82[i * 16 + 11]); - B2B_G(4, 14, 16, 26, SIGMA82[i * 16 + 12], SIGMA82[i * 16 + 13]); - B2B_G(6, 8, 18, 28, SIGMA82[i * 16 + 14], SIGMA82[i * 16 + 15]); + for(uint i = 0u; i < 12u; i = i + 1u) { + B2B_G(0u, 8u, 16u, 24u, SIGMA82[i * 16u + 0u], SIGMA82[i * 16u + 1u]); + B2B_G(2u, 10u, 18u, 26u, SIGMA82[i * 16u + 2u], SIGMA82[i * 16u + 3u]); + B2B_G(4u, 12u, 20u, 28u, SIGMA82[i * 16u + 4u], SIGMA82[i * 16u + 5u]); + B2B_G(6u, 14u, 22u, 30u, SIGMA82[i * 16u + 6u], SIGMA82[i * 16u + 7u]); + B2B_G(0u, 10u, 20u, 30u, SIGMA82[i * 16u + 8u], SIGMA82[i * 16u + 9u]); + B2B_G(2u, 12u, 22u, 24u, SIGMA82[i * 16u + 10u], SIGMA82[i * 16u + 11u]); + B2B_G(4u, 14u, 16u, 26u, SIGMA82[i * 16u + 12u], SIGMA82[i * 16u + 13u]); + B2B_G(6u, 8u, 18u, 28u, SIGMA82[i * 16u + 14u], SIGMA82[i * 16u + 15u]); } // Pixel data is multipled by threshold test result (0 or 1) // First 4 bytes insignificant, only calculate digest of second 4 bytes - if ((BLAKE2B_IV32_1 ^ v[1] ^ v[17]) > threshold) { + if ((BLAKE2B_IV32_1 ^ v[1u] ^ v[17u]) > threshold) { fragColor = vec4( - float(x_index + 1u)/255., // +1 to distinguish from 0 (unsuccessful) pixels - float(y_index + 1u)/255., // Same as previous - float(x_pos)/255., // Return the 2 custom bytes used in work value - float(y_pos)/255. // Second custom byte + float(x_index + 1u)/255.0, // +1 to distinguish from 0 (unsuccessful) pixels + float(y_index + 1u)/255.0, // Same as previous + float(x_pos)/255.0, // Return the 2 custom bytes used in work value + float(y_pos)/255.0 // Second custom byte ); } else { discard; @@ -341,7 +341,9 @@ void main() { // Draw output until success or progressCallback says to stop const work = new Uint8Array(8) + let start: DOMHighResTimeStamp const draw = (): void => { + start = performance.now() if (Pow.#gl == null) throw new Error('WebGL 2 is required') if (Pow.#query == null) throw new Error('WebGL 2 is required to run queries') Pow.#gl.clear(Pow.#gl.COLOR_BUFFER_BIT) @@ -362,16 +364,20 @@ void main() { function checkQueryResult () { if (Pow.#gl == null) throw new Error('WebGL 2 is required to check query results') if (Pow.#query == null) throw new Error('Query not found') + console.log(`checking (${performance.now() - start} ms)`) if (Pow.#gl.getQueryParameter(Pow.#query, Pow.#gl.QUERY_RESULT_AVAILABLE)) { + console.log(`AVAILABLE (${performance.now() - start} ms)`) const anySamplesPassed = Pow.#gl.getQueryParameter(Pow.#query, Pow.#gl.QUERY_RESULT) if (anySamplesPassed) { // A valid nonce was found readBackResult() } else { + console.log(`not found (${performance.now() - start} ms)`) // No valid nonce found, start the next draw call requestAnimationFrame(draw) } } else { + console.log(`not ready (${performance.now() - start} ms)`) // Query result not yet available, check again in the next frame requestAnimationFrame(checkQueryResult) } @@ -382,6 +388,7 @@ void main() { // Check the pixels for any success for (let i = 0; i < Pow.#pixels.length; i += 4) { if (Pow.#pixels[i] !== 0) { + console.log(`FOUND (${performance.now() - start} ms)`) const hex = Pow.#hexify(work.subarray(4, 8)) + Pow.#hexify([ Pow.#pixels[i + 2], Pow.#pixels[i + 3], -- 2.34.1