From 912097dd99f297f6dc06c1cac7982f42bea53620 Mon Sep 17 00:00:00 2001 From: Chris Duncan Date: Fri, 10 Jan 2025 08:16:34 -0800 Subject: [PATCH] Increase max workgroup size and remove a dispatch dimension in order to compute more in parallel as well as simplify nonce construction. --- src/classes/gpu.ts | 2 +- src/shaders/gpu-compute.ts | 18 ++++-------------- 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/src/classes/gpu.ts b/src/classes/gpu.ts index 89cb9a0..a09a11c 100644 --- a/src/classes/gpu.ts +++ b/src/classes/gpu.ts @@ -173,7 +173,7 @@ export class NanoPowGpu { // Issue commands and end compute pass structure passEncoder.setPipeline(this.#pipeline) passEncoder.setBindGroup(0, bindGroup) - passEncoder.dispatchWorkgroups(256, 256, 256) + passEncoder.dispatchWorkgroups(0xff, 0xff) passEncoder.end() // Copy 8-byte nonce and 4-byte found flag from GPU to CPU for reading diff --git a/src/shaders/gpu-compute.ts b/src/shaders/gpu-compute.ts index 784560b..4da8899 100644 --- a/src/shaders/gpu-compute.ts +++ b/src/shaders/gpu-compute.ts @@ -23,28 +23,18 @@ const BLAKE2B_IV32_1: u32 = 0x6A09E667u; * 8-byte work is split into two 4-byte u32. Low 4 bytes are random u32 from * UBO. High 4 bytes are the random value XOR'd with index of each thread. */ -@compute @workgroup_size(64) -fn main( - @builtin(workgroup_id) workgroup_id: vec3, - @builtin(local_invocation_id) local_id: vec3 +@compute @workgroup_size(256) +fn main(@builtin(global_invocation_id) id: vec3 ) { if (atomicLoad(&work.found) != 0u) { return; } let threshold: u32 = ubo.threshold; - /** - * Flatten 3D workgroup and local identifiers into u32 for each thread - */ - var id: u32 = ((workgroup_id.x & 0xFFu) << 24u) | - ((workgroup_id.y & 0xFFu) << 16u) | - ((workgroup_id.z & 0xFFu) << 8u) | - (local_id.x & 0xFFu); - /** * Initialize (nonce||blockhash) concatenation */ - var m0: u32 = ubo.random; - var m1: u32 = ubo.random ^ id; + var m0: u32 = ubo.random ^ id.x; + var m1: u32 = ubo.random ^ id.y; var m2: u32 = ubo.blockhash[0u].x; var m3: u32 = ubo.blockhash[0u].y; var m4: u32 = ubo.blockhash[0u].z; -- 2.34.1