Redefine shader ints as uints where applicable, and specify types explicitly when...

author Chris Duncan <chris@zoso.dev>

Mon, 30 Dec 2024 22:35:19 +0000 (14:35 -0800)

committer Chris Duncan <chris@zoso.dev>

Mon, 30 Dec 2024 22:35:19 +0000 (14:35 -0800)
author Chris Duncan <chris@zoso.dev>
Mon, 30 Dec 2024 22:35:19 +0000 (14:35 -0800)
committer Chris Duncan <chris@zoso.dev>
Mon, 30 Dec 2024 22:35:19 +0000 (14:35 -0800)
diff --git a/src/lib/workers/powgl.ts b/src/lib/workers/powgl.ts

index 5f7733a40fec84343f6e9dc5d5f02ecdebec0a3f..97a9d8142de37b5e30142473d875bec3bda1835d 100644 (file)
--- a/src/lib/workers/powgl.ts
+++ b/src/lib/workers/powgl.ts
@@ -81,7 +81,7 @@ layout(std140) uniform WORK {
  
  // Defined separately from uint v[32] below as the original value is required
  // to calculate the second uint32 of the digest for threshold comparison
-#define BLAKE2B_IV32_1 0x6A09E667u
+const uint BLAKE2B_IV32_1 = 0x6A09E667u;
  
  // Both buffers represent 16 uint64s as 32 uint32s
  // because that's what GLSL offers, just like Javascript
@@ -111,75 +111,75 @@ uint m[32];
  // These are offsets into the input data buffer for each mixing step.
  // They are multiplied by 2 from the original SIGMA values in
  // the C reference implementation, which refered to uint64s.
-const int SIGMA82[192] = int[192](
-       0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,
-       28,20,8,16,18,30,26,12,2,24,0,4,22,14,10,6,
-       22,16,24,0,10,4,30,26,20,28,6,12,14,2,18,8,
-       14,18,6,2,26,24,22,28,4,12,10,20,8,0,30,16,
-       18,0,10,14,4,8,20,30,28,2,22,24,12,16,6,26,
-       4,24,12,20,0,22,16,6,8,26,14,10,30,28,2,18,
-       24,10,2,30,28,26,8,20,0,14,12,6,18,4,16,22,
-       26,22,14,28,24,2,6,18,10,0,30,8,16,12,4,20,
-       12,30,28,18,22,6,0,16,24,4,26,14,2,8,20,10,
-       20,4,16,8,14,12,2,10,30,22,18,28,6,24,26,0,
-       0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,
-       28,20,8,16,18,30,26,12,2,24,0,4,22,14,10,6
+const uint SIGMA82[192] = uint[192](
+       0u,2u,4u,6u,8u,10u,12u,14u,16u,18u,20u,22u,24u,26u,28u,30u,
+       28u,20u,8u,16u,18u,30u,26u,12u,2u,24u,0u,4u,22u,14u,10u,6u,
+       22u,16u,24u,0u,10u,4u,30u,26u,20u,28u,6u,12u,14u,2u,18u,8u,
+       14u,18u,6u,2u,26u,24u,22u,28u,4u,12u,10u,20u,8u,0u,30u,16u,
+       18u,0u,10u,14u,4u,8u,20u,30u,28u,2u,22u,24u,12u,16u,6u,26u,
+       4u,24u,12u,20u,0u,22u,16u,6u,8u,26u,14u,10u,30u,28u,2u,18u,
+       24u,10u,2u,30u,28u,26u,8u,20u,0u,14u,12u,6u,18u,4u,16u,22u,
+       26u,22u,14u,28u,24u,2u,6u,18u,10u,0u,30u,8u,16u,12u,4u,20u,
+       12u,30u,28u,18u,22u,6u,0u,16u,24u,4u,26u,14u,2u,8u,20u,10u,
+       20u,4u,16u,8u,14u,12u,2u,10u,30u,22u,18u,28u,6u,24u,26u,0u,
+       0u,2u,4u,6u,8u,10u,12u,14u,16u,18u,20u,22u,24u,26u,28u,30u,
+       28u,20u,8u,16u,18u,30u,26u,12u,2u,24u,0u,4u,22u,14u,10u,6u
  );
  
  // 64-bit unsigned addition within the compression buffer
  // Sets v[a,a+1] += b
  // b0 is the low 32 bits of b, b1 represents the high 32 bits
-void add_uint64 (int a, uint b0, uint b1) {
+void add_uint64 (uint a, uint b0, uint b1) {
         uint o0 = v[a] + b0;
-       uint o1 = v[a + 1] + b1;
+       uint o1 = v[a+1u] + b1;
         if (v[a] > 0xFFFFFFFFu - b0) { // did low 32 bits overflow?
                 o1++;
         }
         v[a] = o0;
-       v[a + 1] = o1;
+       v[a+1u] = o1;
  }
  
  // G Mixing function
-void B2B_G (int a, int b, int c, int d, int ix, int iy) {
-       add_uint64(a, v[b], v[b+1]);
-       add_uint64(a, m[ix], m[ix + 1]);
+void B2B_G (uint a, uint b, uint c, uint d, uint ix, uint iy) {
+       add_uint64(a, v[b], v[b+1u]);
+       add_uint64(a, m[ix], m[ix+1u]);
  
         // v[d,d+1] = (v[d,d+1] xor v[a,a+1]) rotated to the right by 32 bits
         uint xor0 = v[d] ^ v[a];
-       uint xor1 = v[d + 1] ^ v[a + 1];
+       uint xor1 = v[d+1u] ^ v[a+1u];
         v[d] = xor1;
-       v[d + 1] = xor0;
+       v[d+1u] = xor0;
  
-       add_uint64(c, v[d], v[d+1]);
+       add_uint64(c, v[d], v[d+1u]);
  
         // v[b,b+1] = (v[b,b+1] xor v[c,c+1]) rotated right by 24 bits
         xor0 = v[b] ^ v[c];
-       xor1 = v[b + 1] ^ v[c + 1];
-       v[b] = (xor0 >> 24) ^ (xor1 << 8);
-       v[b + 1] = (xor1 >> 24) ^ (xor0 << 8);
+       xor1 = v[b+1u] ^ v[c+1u];
+       v[b] = (xor0 >> 24u) ^ (xor1 << 8u);
+       v[b+1u] = (xor1 >> 24u) ^ (xor0 << 8u);
  
-       add_uint64(a, v[b], v[b+1]);
-       add_uint64(a, m[iy], m[iy + 1]);
+       add_uint64(a, v[b], v[b+1u]);
+       add_uint64(a, m[iy], m[iy+1u]);
  
         // v[d,d+1] = (v[d,d+1] xor v[a,a+1]) rotated right by 16 bits
         xor0 = v[d] ^ v[a];
-       xor1 = v[d + 1] ^ v[a + 1];
-       v[d] = (xor0 >> 16) ^ (xor1 << 16);
-       v[d + 1] = (xor1 >> 16) ^ (xor0 << 16);
+       xor1 = v[d+1u] ^ v[a+1u];
+       v[d] = (xor0 >> 16u) ^ (xor1 << 16u);
+       v[d+1u] = (xor1 >> 16u) ^ (xor0 << 16u);
  
-       add_uint64(c, v[d], v[d+1]);
+       add_uint64(c, v[d], v[d+1u]);
  
         // v[b,b+1] = (v[b,b+1] xor v[c,c+1]) rotated right by 63 bits
         xor0 = v[b] ^ v[c];
-       xor1 = v[b + 1] ^ v[c + 1];
-       v[b] = (xor1 >> 31) ^ (xor0 << 1);
-       v[b + 1] = (xor0 >> 31) ^ (xor1 << 1);
+       xor1 = v[b+1u] ^ v[c+1u];
+       v[b] = (xor1 >> 31u) ^ (xor0 << 1u);
+       v[b+1u] = (xor0 >> 31u) ^ (xor1 << 1u);
  }
  
  void main() {
         int i;
-       uvec4 u_work0 = work[0];
-       uvec4 u_work1 = work[1];
+       uvec4 u_work0 = work[0u];
+       uvec4 u_work1 = work[1u];
         uint uv_x = uint(uv_pos.x * workload);
         uint uv_y = uint(uv_pos.y * workload);
         uint x_pos = uv_x % 256u;
@@ -190,36 +190,36 @@ void main() {
         // First 2 work bytes are the x,y pos within the 256x256 area, the next
         // two bytes are modified from the random generated value, XOR'd with
         // the x,y area index of where this pixel is located
-       m[0] = (x_pos ^ (y_pos << 8) ^ ((u_work0.b ^ x_index) << 16) ^ ((u_work0.a ^ y_index) << 24));
+       m[0u] = (x_pos ^ (y_pos << 8u) ^ ((u_work0.b ^ x_index) << 16u) ^ ((u_work0.a ^ y_index) << 24u));
  
         // Remaining bytes are un-modified from the random generated value
-       m[1] = (u_work1.r ^ (u_work1.g << 8) ^ (u_work1.b << 16) ^ (u_work1.a << 24));
+       m[1u] = (u_work1.r ^ (u_work1.g << 8u) ^ (u_work1.b << 16u) ^ (u_work1.a << 24u));
  
         // Block hash
-       for (i=0;i<8;i++) {
-               m[i+2] = blockhash[i];
+       for (uint i = 0u; i < 8u; i = i + 1u) {
+               m[i+2u] = blockhash[i];
         }
  
         // twelve rounds of mixing
-       for(i=0;i<12;i++) {
-               B2B_G(0, 8, 16, 24, SIGMA82[i * 16 + 0], SIGMA82[i * 16 + 1]);
-               B2B_G(2, 10, 18, 26, SIGMA82[i * 16 + 2], SIGMA82[i * 16 + 3]);
-               B2B_G(4, 12, 20, 28, SIGMA82[i * 16 + 4], SIGMA82[i * 16 + 5]);
-               B2B_G(6, 14, 22, 30, SIGMA82[i * 16 + 6], SIGMA82[i * 16 + 7]);
-               B2B_G(0, 10, 20, 30, SIGMA82[i * 16 + 8], SIGMA82[i * 16 + 9]);
-               B2B_G(2, 12, 22, 24, SIGMA82[i * 16 + 10], SIGMA82[i * 16 + 11]);
-               B2B_G(4, 14, 16, 26, SIGMA82[i * 16 + 12], SIGMA82[i * 16 + 13]);
-               B2B_G(6, 8, 18, 28, SIGMA82[i * 16 + 14], SIGMA82[i * 16 + 15]);
+       for(uint i = 0u; i < 12u; i = i + 1u) {
+               B2B_G(0u, 8u, 16u, 24u, SIGMA82[i * 16u + 0u], SIGMA82[i * 16u + 1u]);
+               B2B_G(2u, 10u, 18u, 26u, SIGMA82[i * 16u + 2u], SIGMA82[i * 16u + 3u]);
+               B2B_G(4u, 12u, 20u, 28u, SIGMA82[i * 16u + 4u], SIGMA82[i * 16u + 5u]);
+               B2B_G(6u, 14u, 22u, 30u, SIGMA82[i * 16u + 6u], SIGMA82[i * 16u + 7u]);
+               B2B_G(0u, 10u, 20u, 30u, SIGMA82[i * 16u + 8u], SIGMA82[i * 16u + 9u]);
+               B2B_G(2u, 12u, 22u, 24u, SIGMA82[i * 16u + 10u], SIGMA82[i * 16u + 11u]);
+               B2B_G(4u, 14u, 16u, 26u, SIGMA82[i * 16u + 12u], SIGMA82[i * 16u + 13u]);
+               B2B_G(6u, 8u, 18u, 28u, SIGMA82[i * 16u + 14u], SIGMA82[i * 16u + 15u]);
         }
  
         // Pixel data is multipled by threshold test result (0 or 1)
         // First 4 bytes insignificant, only calculate digest of second 4 bytes
-       if ((BLAKE2B_IV32_1 ^ v[1] ^ v[17]) > threshold) {
+       if ((BLAKE2B_IV32_1 ^ v[1u] ^ v[17u]) > threshold) {
                 fragColor = vec4(
-                       float(x_index + 1u)/255., // +1 to distinguish from 0 (unsuccessful) pixels
-                       float(y_index + 1u)/255., // Same as previous
-                       float(x_pos)/255., // Return the 2 custom bytes used in work value
-                       float(y_pos)/255.  // Second custom byte
+                       float(x_index + 1u)/255.0, // +1 to distinguish from 0 (unsuccessful) pixels
+                       float(y_index + 1u)/255.0, // Same as previous
+                       float(x_pos)/255.0, // Return the 2 custom bytes used in work value
+                       float(y_pos)/255.0  // Second custom byte
                 );
         } else {
                 discard;
@@ -341,7 +341,9 @@ void main() {
  
                 // Draw output until success or progressCallback says to stop
                 const work = new Uint8Array(8)
+               let start: DOMHighResTimeStamp
                 const draw = (): void => {
+                       start = performance.now()
                         if (Pow.#gl == null) throw new Error('WebGL 2 is required')
                         if (Pow.#query == null) throw new Error('WebGL 2 is required to run queries')
                         Pow.#gl.clear(Pow.#gl.COLOR_BUFFER_BIT)
@@ -362,16 +364,20 @@ void main() {
                 function checkQueryResult () {
                         if (Pow.#gl == null) throw new Error('WebGL 2 is required to check query results')
                         if (Pow.#query == null) throw new Error('Query not found')
+                       console.log(`checking (${performance.now() - start} ms)`)
                         if (Pow.#gl.getQueryParameter(Pow.#query, Pow.#gl.QUERY_RESULT_AVAILABLE)) {
+                               console.log(`AVAILABLE (${performance.now() - start} ms)`)
                                 const anySamplesPassed = Pow.#gl.getQueryParameter(Pow.#query, Pow.#gl.QUERY_RESULT)
                                 if (anySamplesPassed) {
                                         // A valid nonce was found
                                         readBackResult()
                                 } else {
+                                       console.log(`not found (${performance.now() - start} ms)`)
                                         // No valid nonce found, start the next draw call
                                         requestAnimationFrame(draw)
                                 }
                         } else {
+                               console.log(`not ready (${performance.now() - start} ms)`)
                                 // Query result not yet available, check again in the next frame
                                 requestAnimationFrame(checkQueryResult)
                         }
@@ -382,6 +388,7 @@ void main() {
                         // Check the pixels for any success
                         for (let i = 0; i < Pow.#pixels.length; i += 4) {
                                 if (Pow.#pixels[i] !== 0) {
+                                       console.log(`FOUND (${performance.now() - start} ms)`)
                                         const hex = Pow.#hexify(work.subarray(4, 8)) + Pow.#hexify([
                                                 Pow.#pixels[i + 2],
                                                 Pow.#pixels[i + 3],
author	Chris Duncan <chris@zoso.dev>
	Mon, 30 Dec 2024 22:35:19 +0000 (14:35 -0800)
committer	Chris Duncan <chris@zoso.dev>
	Mon, 30 Dec 2024 22:35:19 +0000 (14:35 -0800)