From 2686d5a651c8e0955924bbd7b10c7e493bf7a5b4 Mon Sep 17 00:00:00 2001 From: Chris Duncan Date: Wed, 15 Jan 2025 19:14:48 -0800 Subject: [PATCH] Split main into separate entry points to fix false positives from validate. --- src/classes/gpu.ts | 34 +++++++++++++++++++++++----------- src/shaders/compute.wgsl | 21 +++++++++++++++++++-- 2 files changed, 42 insertions(+), 13 deletions(-) diff --git a/src/classes/gpu.ts b/src/classes/gpu.ts index 3a69199..5513ff7 100644 --- a/src/classes/gpu.ts +++ b/src/classes/gpu.ts @@ -16,7 +16,8 @@ export class NanoPowGpu { static #gpuBuffer: GPUBuffer static #cpuBuffer: GPUBuffer static #bindGroupLayout: GPUBindGroupLayout - static #pipeline: GPUComputePipeline + static #searchPipeline: GPUComputePipeline + static #validatePipeline: GPUComputePipeline // Initialize WebGPU static async init (): Promise { @@ -69,16 +70,27 @@ export class NanoPowGpu { } ] }) + const shaderModule = this.#device.createShaderModule({ + code: NanoPowGpuComputeShader + }) + // Create pipeline to connect compute shader to binding layout + this.#searchPipeline = this.#device.createComputePipeline({ + layout: this.#device.createPipelineLayout({ + bindGroupLayouts: [this.#bindGroupLayout] + }), + compute: { + entryPoint: 'search', + module: shaderModule + } + }) // Create pipeline to connect compute shader to binding layout - this.#pipeline = this.#device.createComputePipeline({ + this.#validatePipeline = this.#device.createComputePipeline({ layout: this.#device.createPipelineLayout({ bindGroupLayouts: [this.#bindGroupLayout] }), compute: { - entryPoint: 'main', - module: this.#device.createShaderModule({ - code: NanoPowGpuComputeShader - }) + entryPoint: 'validate', + module: shaderModule } }) } @@ -117,7 +129,7 @@ export class NanoPowGpu { console.table(averages) } - static async #dispatch (seed: bigint, hash: string, threshold: number, passes: number): Promise { + static async #dispatch (pipeline: GPUComputePipeline, seed: bigint, hash: string, threshold: number, passes: number): Promise { if (this.#device == null) throw new Error(`WebGPU device failed to load.`) // Set up uniform buffer object // Note: u32 size is 4, but total alignment must be multiple of 16 @@ -157,7 +169,7 @@ export class NanoPowGpu { const passEncoder = commandEncoder.beginComputePass() // Issue commands and end compute pass structure - passEncoder.setPipeline(this.#pipeline) + passEncoder.setPipeline(pipeline) passEncoder.setBindGroup(0, bindGroup) passEncoder.dispatchWorkgroups(passes, passes) passEncoder.end() @@ -177,7 +189,7 @@ export class NanoPowGpu { this.#cpuBuffer.unmap() } catch (err) { console.warn(`Error getting data from GPU. ${err}`) - return this.#dispatch(seed, hash, threshold, passes) + return this.#dispatch(pipeline, seed, hash, threshold, passes) } if (data == null) throw new Error(`Failed to get data from buffer.`) return data @@ -224,7 +236,7 @@ export class NanoPowGpu { start = performance.now() const random = Math.floor(Math.random() * 0xffffffff) const seed = (BigInt(random) << 32n) | BigInt(random) - const data = await this.#dispatch(seed, hash, threshold, effort * 0x100) + const data = await this.#dispatch(this.#searchPipeline, seed, hash, threshold, effort * 0x100) nonce = data.getBigUint64(0, true) this.#busy = !data.getUint32(8) times.push(performance.now() - start) @@ -267,7 +279,7 @@ export class NanoPowGpu { if (this.#device == null) throw new Error(`WebGPU device failed to load.`) const seed = BigInt(`0x${work}`) - const data = await this.#dispatch(seed, hash, threshold, 1) + const data = await this.#dispatch(this.#validatePipeline, seed, hash, threshold, 1) const nonce = data.getBigUint64(0, true).toString(16).padStart(16, '0') const found = !!data.getUint32(8) this.#busy = false diff --git a/src/shaders/compute.wgsl b/src/shaders/compute.wgsl index 01125aa..406007b 100644 --- a/src/shaders/compute.wgsl +++ b/src/shaders/compute.wgsl @@ -20,14 +20,31 @@ struct WORK { */ const BLAKE2B_IV32_1: u32 = 0x6A09E667u; +/** +* Search compute function +* Calls main with a workgroup size of 64 which has been tested as optimal +*/ +@compute @workgroup_size(64) +fn search(@builtin(global_invocation_id) global_id: vec3) { + main(global_id); +} + +/** +* Validate compute function +* Calls main with a workgroup size of 1 so that only one value is tested +*/ +@compute @workgroup_size(1) +fn validate(@builtin(global_invocation_id) global_id: vec3) { + main(global_id); +} + /** * Main compute function * A random u32 provided by the UBO is copied to form a pair. Each component of * this 8-byte value is then XOR'd with a different dimensional index from * the thread identifier. */ -@compute @workgroup_size(64) -fn main(@builtin(global_invocation_id) id: vec3) { +fn main(id: vec3) { if (atomicLoad(&work.found) != 0u) { return; } let threshold: u32 = ubo.threshold; -- 2.34.1