]> zoso.dev Git - libnemo.git/commitdiff
Start experimenting with 4-byte assignment using workgroup size as 1 byte via local_i...
authorChris Duncan <chris@zoso.dev>
Thu, 2 Jan 2025 13:55:02 +0000 (05:55 -0800)
committerChris Duncan <chris@zoso.dev>
Thu, 2 Jan 2025 13:55:02 +0000 (05:55 -0800)
src/lib/workers/powgpu.ts

index b27f31c4479c8eee072d92a875a616bb66db1303..7d7774db8c3de7afcf3a5433d4dca0e761eceef5 100644 (file)
@@ -142,11 +142,13 @@ export class PowGpu extends WorkerInterface {
                */
                @compute @workgroup_size(256)
                fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
-                       var m: array<u32, 16>;
-
-                       m[0u] = ubo.rand.x;
-                       m[1u] = (ubo.rand.y << 16u) ^ global_id.x;
+                       work.x = 0u;
+                       work.y = 0u;
+                       work.z = 0u;
 
+                       var m: array<u32, 16>;
+                       m[0u] = 0u;
+                       m[1u] = global_id.x;
                        m[2u] = ubo.blockhash[0u].x;
                        m[3u] = ubo.blockhash[0u].y;
                        m[4u] = ubo.blockhash[0u].z;
@@ -156,44 +158,51 @@ export class PowGpu extends WorkerInterface {
                        m[8u] = ubo.blockhash[1u].z;
                        m[9u] = ubo.blockhash[1u].w;
 
-                       /**
-                       * Compression buffer, intialized to 2 instances of the initialization vector
-                       * The following values have been modified from the BLAKE2B_IV:
-                       * OUTLEN is constant 8 bytes
-                       * v[0] ^= 0x01010000u ^ uint(OUTLEN);
-                       * INLEN is constant 40 bytes: work value (8) + block hash (32)
-                       * v[24] ^= uint(INLEN);
-                       * It's always the "last" compression at this INLEN
-                       * v[28] = ~v[28];
-                       * v[29] = ~v[29];
-                       */
-                       var v = array<u32, 32>(
-                               0xF2BDC900u, 0x6A09E667u, 0x84CAA73Bu, 0xBB67AE85u,
-                               0xFE94F82Bu, 0x3C6EF372u, 0x5F1D36F1u, 0xA54FF53Au,
-                               0xADE682D1u, 0x510E527Fu, 0x2B3E6C1Fu, 0x9B05688Cu,
-                               0xFB41BD6Bu, 0x1F83D9ABu, 0x137E2179u, 0x5BE0CD19u,
-                               0xF3BCC908u, 0x6A09E667u, 0x84CAA73Bu, 0xBB67AE85u,
-                               0xFE94F82Bu, 0x3C6EF372u, 0x5F1D36F1u, 0xA54FF53Au,
-                               0xADE682F9u, 0x510E527Fu, 0x2B3E6C1Fu, 0x9B05688Cu,
-                               0x04BE4294u, 0xE07C2654u, 0x137E2179u, 0x5BE0CD19u
-                       );
-
-                       // twelve rounds of mixing
-                       for (var i: u32 = 0u; i < 12u; i = i + 1u) {
-                               B2B_G(&v, &m, 0, 8, 16, 24, SIGMA82[i * 16 + 0], SIGMA82[i * 16 + 1]);
-                               B2B_G(&v, &m, 2, 10, 18, 26, SIGMA82[i * 16 + 2], SIGMA82[i * 16 + 3]);
-                               B2B_G(&v, &m, 4, 12, 20, 28, SIGMA82[i * 16 + 4], SIGMA82[i * 16 + 5]);
-                               B2B_G(&v, &m, 6, 14, 22, 30, SIGMA82[i * 16 + 6], SIGMA82[i * 16 + 7]);
-                               B2B_G(&v, &m, 0, 10, 20, 30, SIGMA82[i * 16 + 8], SIGMA82[i * 16 + 9]);
-                               B2B_G(&v, &m, 2, 12, 22, 24, SIGMA82[i * 16 + 10], SIGMA82[i * 16 + 11]);
-                               B2B_G(&v, &m, 4, 14, 16, 26, SIGMA82[i * 16 + 12], SIGMA82[i * 16 + 13]);
-                               B2B_G(&v, &m, 6, 8, 18, 28, SIGMA82[i * 16 + 14], SIGMA82[i * 16 + 15]);
-                       }
+                       var i: u32 = 0u;
+                       while (work.x == 0u) {
+                               m[0u] = i;
+                               i = i + 1u;
+
+                               /**
+                               * Compression buffer, intialized to 2 instances of the initialization vector
+                               * The following values have been modified from the BLAKE2B_IV:
+                               * OUTLEN is constant 8 bytes
+                               * v[0] ^= 0x01010000u ^ uint(OUTLEN);
+                               * INLEN is constant 40 bytes: work value (8) + block hash (32)
+                               * v[24] ^= uint(INLEN);
+                               * It's always the "last" compression at this INLEN
+                               * v[28] = ~v[28];
+                               * v[29] = ~v[29];
+                               */
+                               var v = array<u32, 32>(
+                                       0xF2BDC900u, 0x6A09E667u, 0x84CAA73Bu, 0xBB67AE85u,
+                                       0xFE94F82Bu, 0x3C6EF372u, 0x5F1D36F1u, 0xA54FF53Au,
+                                       0xADE682D1u, 0x510E527Fu, 0x2B3E6C1Fu, 0x9B05688Cu,
+                                       0xFB41BD6Bu, 0x1F83D9ABu, 0x137E2179u, 0x5BE0CD19u,
+                                       0xF3BCC908u, 0x6A09E667u, 0x84CAA73Bu, 0xBB67AE85u,
+                                       0xFE94F82Bu, 0x3C6EF372u, 0x5F1D36F1u, 0xA54FF53Au,
+                                       0xADE682F9u, 0x510E527Fu, 0x2B3E6C1Fu, 0x9B05688Cu,
+                                       0x04BE4294u, 0xE07C2654u, 0x137E2179u, 0x5BE0CD19u
+                               );
+
+                               // twelve rounds of mixing
+                               for (var i: u32 = 0u; i < 12u; i = i + 1u) {
+                                       B2B_G(&v, &m, 0, 8, 16, 24, SIGMA82[i * 16 + 0], SIGMA82[i * 16 + 1]);
+                                       B2B_G(&v, &m, 2, 10, 18, 26, SIGMA82[i * 16 + 2], SIGMA82[i * 16 + 3]);
+                                       B2B_G(&v, &m, 4, 12, 20, 28, SIGMA82[i * 16 + 4], SIGMA82[i * 16 + 5]);
+                                       B2B_G(&v, &m, 6, 14, 22, 30, SIGMA82[i * 16 + 6], SIGMA82[i * 16 + 7]);
+                                       B2B_G(&v, &m, 0, 10, 20, 30, SIGMA82[i * 16 + 8], SIGMA82[i * 16 + 9]);
+                                       B2B_G(&v, &m, 2, 12, 22, 24, SIGMA82[i * 16 + 10], SIGMA82[i * 16 + 11]);
+                                       B2B_G(&v, &m, 4, 14, 16, 26, SIGMA82[i * 16 + 12], SIGMA82[i * 16 + 13]);
+                                       B2B_G(&v, &m, 6, 8, 18, 28, SIGMA82[i * 16 + 14], SIGMA82[i * 16 + 15]);
+                               }
 
-                       // Store the result directly into work array
-                       if ((BLAKE2B_IV32_1 ^ v[1u] ^ v[17u]) > 0xffffffffu) {
-                               work.x = ubo.rand.x;
-                               work.y = (ubo.rand.y << 16u) ^ global_id.x;
+                               // Store the result directly into work array
+                               if ((BLAKE2B_IV32_1 ^ v[1u] ^ v[17u]) > ubo.threshold) {
+                                       work.x = 1u;
+                                       work.y = m[0u];
+                                       work.z = m[1u];
+                               }
                        }
                }
        `;
@@ -328,8 +337,7 @@ export class PowGpu extends WorkerInterface {
                // Issue commands and end render pass
                passEncoder.setPipeline(PowGpu.#pipeline)
                passEncoder.setBindGroup(0, bindGroup)
-               // passEncoder.dispatchWorkgroups(256 * Math.max(1, navigator.hardwareConcurrency))
-               passEncoder.dispatchWorkgroups(256)
+               passEncoder.dispatchWorkgroups(256, 256, 256)
                passEncoder.end()
 
                // Copy result from GPU buffer to CPU buffer
@@ -350,9 +358,8 @@ export class PowGpu extends WorkerInterface {
                const result = new Uint32Array(PowGpu.#cpuBuffer.getMappedRange()).slice()
                PowGpu.#cpuBuffer.unmap()
 
-               console.log(`result: ${[...result]}`)
-               if (result[0] !== 0 || result[1] !== 0) {
-                       const hex = PowGpu.#hexify([result[0], result[1]])
+               if (result[0] !== 0) {
+                       const hex = PowGpu.#hexify([result[1], result[2]])
                        typeof callback === 'function' && callback(hex)
                        return
                }