From 2686d5a651c8e0955924bbd7b10c7e493bf7a5b4 Mon Sep 17 00:00:00 2001
From: Chris Duncan <chris@zoso.dev>
Date: Wed, 15 Jan 2025 19:14:48 -0800
Subject: [PATCH] Split main into separate entry points to fix false positives
 from validate.

---
 src/classes/gpu.ts       | 34 +++++++++++++++++++++++-----------
 src/shaders/compute.wgsl | 21 +++++++++++++++++++--
 2 files changed, 42 insertions(+), 13 deletions(-)
diff --git a/src/classes/gpu.ts b/src/classes/gpu.ts
index 3a69199..5513ff7 100644
--- a/src/classes/gpu.ts
+++ b/src/classes/gpu.ts
@@ -16,7 +16,8 @@ export class NanoPowGpu {
 	static #gpuBuffer: GPUBuffer
 	static #cpuBuffer: GPUBuffer
 	static #bindGroupLayout: GPUBindGroupLayout
-	static #pipeline: GPUComputePipeline
+	static #searchPipeline: GPUComputePipeline
+	static #validatePipeline: GPUComputePipeline
 
 	// Initialize WebGPU
 	static async init (): Promise<void> {
@@ -69,16 +70,27 @@ export class NanoPowGpu {
 				}
 			]
 		})
+		const shaderModule = this.#device.createShaderModule({
+			code: NanoPowGpuComputeShader
+		})
+		// Create pipeline to connect compute shader to binding layout
+		this.#searchPipeline = this.#device.createComputePipeline({
+			layout: this.#device.createPipelineLayout({
+				bindGroupLayouts: [this.#bindGroupLayout]
+			}),
+			compute: {
+				entryPoint: 'search',
+				module: shaderModule
+			}
+		})
 		// Create pipeline to connect compute shader to binding layout
-		this.#pipeline = this.#device.createComputePipeline({
+		this.#validatePipeline = this.#device.createComputePipeline({
 			layout: this.#device.createPipelineLayout({
 				bindGroupLayouts: [this.#bindGroupLayout]
 			}),
 			compute: {
-				entryPoint: 'main',
-				module: this.#device.createShaderModule({
-					code: NanoPowGpuComputeShader
-				})
+				entryPoint: 'validate',
+				module: shaderModule
 			}
 		})
 	}
@@ -117,7 +129,7 @@ export class NanoPowGpu {
 		console.table(averages)
 	}
 
-	static async #dispatch (seed: bigint, hash: string, threshold: number, passes: number): Promise<DataView> {
+	static async #dispatch (pipeline: GPUComputePipeline, seed: bigint, hash: string, threshold: number, passes: number): Promise<DataView> {
 		if (this.#device == null) throw new Error(`WebGPU device failed to load.`)
 		// Set up uniform buffer object
 		// Note: u32 size is 4, but total alignment must be multiple of 16
@@ -157,7 +169,7 @@ export class NanoPowGpu {
 		const passEncoder = commandEncoder.beginComputePass()
 
 		// Issue commands and end compute pass structure
-		passEncoder.setPipeline(this.#pipeline)
+		passEncoder.setPipeline(pipeline)
 		passEncoder.setBindGroup(0, bindGroup)
 		passEncoder.dispatchWorkgroups(passes, passes)
 		passEncoder.end()
@@ -177,7 +189,7 @@ export class NanoPowGpu {
 			this.#cpuBuffer.unmap()
 		} catch (err) {
 			console.warn(`Error getting data from GPU. ${err}`)
-			return this.#dispatch(seed, hash, threshold, passes)
+			return this.#dispatch(pipeline, seed, hash, threshold, passes)
 		}
 		if (data == null) throw new Error(`Failed to get data from buffer.`)
 		return data
@@ -224,7 +236,7 @@ export class NanoPowGpu {
 			start = performance.now()
 			const random = Math.floor(Math.random() * 0xffffffff)
 			const seed = (BigInt(random) << 32n) | BigInt(random)
-			const data = await this.#dispatch(seed, hash, threshold, effort * 0x100)
+			const data = await this.#dispatch(this.#searchPipeline, seed, hash, threshold, effort * 0x100)
 			nonce = data.getBigUint64(0, true)
 			this.#busy = !data.getUint32(8)
 			times.push(performance.now() - start)
@@ -267,7 +279,7 @@ export class NanoPowGpu {
 		if (this.#device == null) throw new Error(`WebGPU device failed to load.`)
 
 		const seed = BigInt(`0x${work}`)
-		const data = await this.#dispatch(seed, hash, threshold, 1)
+		const data = await this.#dispatch(this.#validatePipeline, seed, hash, threshold, 1)
 		const nonce = data.getBigUint64(0, true).toString(16).padStart(16, '0')
 		const found = !!data.getUint32(8)
 		this.#busy = false
diff --git a/src/shaders/compute.wgsl b/src/shaders/compute.wgsl
index 01125aa..406007b 100644
--- a/src/shaders/compute.wgsl
+++ b/src/shaders/compute.wgsl
@@ -20,14 +20,31 @@ struct WORK {
 */
 const BLAKE2B_IV32_1: u32 = 0x6A09E667u;
 
+/**
+* Search compute function
+* Calls main with a workgroup size of 64 which has been tested as optimal
+*/
+@compute @workgroup_size(64)
+fn search(@builtin(global_invocation_id) global_id: vec3<u32>) {
+	main(global_id);
+}
+
+/**
+* Validate compute function
+* Calls main with a workgroup size of 1 so that only one value is tested
+*/
+@compute @workgroup_size(1)
+fn validate(@builtin(global_invocation_id) global_id: vec3<u32>) {
+	main(global_id);
+}
+
 /**
 * Main compute function
 * A random u32 provided by the UBO is copied to form a pair. Each component of
 * this 8-byte value is then XOR'd with a different dimensional index from
 * the thread identifier.
 */
-@compute @workgroup_size(64)
-fn main(@builtin(global_invocation_id) id: vec3<u32>) {
+fn main(id: vec3<u32>) {
 	if (atomicLoad(&work.found) != 0u) { return; }
 
 	let threshold: u32 = ubo.threshold;
-- 
2.34.1