]> zoso.dev Git - nano-pow.git/commitdiff
Increase max workgroup size and remove a dispatch dimension in order to compute more...
authorChris Duncan <chris@zoso.dev>
Fri, 10 Jan 2025 16:16:34 +0000 (08:16 -0800)
committerChris Duncan <chris@zoso.dev>
Fri, 10 Jan 2025 18:28:07 +0000 (10:28 -0800)
src/classes/gpu.ts
src/shaders/gpu-compute.ts

index 89cb9a0e0e7c043c730a3086c8d4795c1417901f..a09a11c053366c5695ee9c3112421970c35430af 100644 (file)
@@ -173,7 +173,7 @@ export class NanoPowGpu {
                        // Issue commands and end compute pass structure
                        passEncoder.setPipeline(this.#pipeline)
                        passEncoder.setBindGroup(0, bindGroup)
-                       passEncoder.dispatchWorkgroups(256, 256, 256)
+                       passEncoder.dispatchWorkgroups(0xff, 0xff)
                        passEncoder.end()
 
                        // Copy 8-byte nonce and 4-byte found flag from GPU to CPU for reading
index 784560b80de1b1dce82b03ad0a49cf592f85c990..4da88999c3aeb668cc38d153a6872850ade23302 100644 (file)
@@ -23,28 +23,18 @@ const BLAKE2B_IV32_1: u32 = 0x6A09E667u;
 * 8-byte work is split into two 4-byte u32. Low 4 bytes are random u32 from
 * UBO. High 4 bytes are the random value XOR'd with index of each thread.
 */
-@compute @workgroup_size(64)
-fn main(
-       @builtin(workgroup_id) workgroup_id: vec3<u32>,
-       @builtin(local_invocation_id) local_id: vec3<u32>
+@compute @workgroup_size(256)
+fn main(@builtin(global_invocation_id) id: vec3<u32>
 ) {
        if (atomicLoad(&work.found) != 0u) { return; }
 
        let threshold: u32 = ubo.threshold;
 
-       /**
-       * Flatten 3D workgroup and local identifiers into u32 for each thread
-       */
-       var id: u32 = ((workgroup_id.x & 0xFFu) << 24u) |
-               ((workgroup_id.y & 0xFFu) << 16u) |
-               ((workgroup_id.z & 0xFFu) << 8u) |
-               (local_id.x & 0xFFu);
-
        /**
        * Initialize (nonce||blockhash) concatenation
        */
-       var m0: u32 = ubo.random;
-       var m1: u32 = ubo.random ^ id;
+       var m0: u32 = ubo.random ^ id.x;
+       var m1: u32 = ubo.random ^ id.y;
        var m2: u32 = ubo.blockhash[0u].x;
        var m3: u32 = ubo.blockhash[0u].y;
        var m4: u32 = ubo.blockhash[0u].z;