Increase max workgroup size and remove a dispatch dimension in order to compute more...

author Chris Duncan <chris@zoso.dev>

Fri, 10 Jan 2025 16:16:34 +0000 (08:16 -0800)

committer Chris Duncan <chris@zoso.dev>

Fri, 10 Jan 2025 18:28:07 +0000 (10:28 -0800)
author Chris Duncan <chris@zoso.dev>
Fri, 10 Jan 2025 16:16:34 +0000 (08:16 -0800)
committer Chris Duncan <chris@zoso.dev>
Fri, 10 Jan 2025 18:28:07 +0000 (10:28 -0800)
diff --git a/src/classes/gpu.ts b/src/classes/gpu.ts

index 89cb9a0e0e7c043c730a3086c8d4795c1417901f..a09a11c053366c5695ee9c3112421970c35430af 100644 (file)
--- a/src/classes/gpu.ts
+++ b/src/classes/gpu.ts
@@ -173,7 +173,7 @@ export class NanoPowGpu {
                         // Issue commands and end compute pass structure
                         passEncoder.setPipeline(this.#pipeline)
                         passEncoder.setBindGroup(0, bindGroup)
-                       passEncoder.dispatchWorkgroups(256, 256, 256)
+                       passEncoder.dispatchWorkgroups(0xff, 0xff)
                         passEncoder.end()
  
                         // Copy 8-byte nonce and 4-byte found flag from GPU to CPU for reading
diff --git a/src/shaders/gpu-compute.ts b/src/shaders/gpu-compute.ts

index 784560b80de1b1dce82b03ad0a49cf592f85c990..4da88999c3aeb668cc38d153a6872850ade23302 100644 (file)
--- a/src/shaders/gpu-compute.ts
+++ b/src/shaders/gpu-compute.ts
@@ -23,28 +23,18 @@ const BLAKE2B_IV32_1: u32 = 0x6A09E667u;
  * 8-byte work is split into two 4-byte u32. Low 4 bytes are random u32 from
  * UBO. High 4 bytes are the random value XOR'd with index of each thread.
  */
-@compute @workgroup_size(64)
-fn main(
-       @builtin(workgroup_id) workgroup_id: vec3<u32>,
-       @builtin(local_invocation_id) local_id: vec3<u32>
+@compute @workgroup_size(256)
+fn main(@builtin(global_invocation_id) id: vec3<u32>
  ) {
         if (atomicLoad(&work.found) != 0u) { return; }
  
         let threshold: u32 = ubo.threshold;
  
-       /**
-       * Flatten 3D workgroup and local identifiers into u32 for each thread
-       */
-       var id: u32 = ((workgroup_id.x & 0xFFu) << 24u) |
-               ((workgroup_id.y & 0xFFu) << 16u) |
-               ((workgroup_id.z & 0xFFu) << 8u) |
-               (local_id.x & 0xFFu);
-
         /**
         * Initialize (nonce||blockhash) concatenation
         */
-       var m0: u32 = ubo.random;
-       var m1: u32 = ubo.random ^ id;
+       var m0: u32 = ubo.random ^ id.x;
+       var m1: u32 = ubo.random ^ id.y;
         var m2: u32 = ubo.blockhash[0u].x;
         var m3: u32 = ubo.blockhash[0u].y;
         var m4: u32 = ubo.blockhash[0u].z;
author	Chris Duncan <chris@zoso.dev>
	Fri, 10 Jan 2025 16:16:34 +0000 (08:16 -0800)
committer	Chris Duncan <chris@zoso.dev>
	Fri, 10 Jan 2025 18:28:07 +0000 (10:28 -0800)
src/classes/gpu.ts		patch \| blob \| history
src/shaders/gpu-compute.ts		patch \| blob \| history