From 4e5a6a5365e27beb4bbc798643ba66a7a14b5a10 Mon Sep 17 00:00:00 2001 From: Chris Duncan Date: Wed, 1 Jan 2025 02:08:33 -0800 Subject: [PATCH] Clarified limitations of workgroup dispatching and sizes and corrected how workload is determined and passed into the shader, including bytes available for each pass (hint: we will always need to pass in at least some crypto bytes due to sheer size of computation, i.e. 256x256 is 65k nonces but only 2 bytes of 8 required). Start filling out actual hashing function. --- src/lib/workers/powgpu.ts | 78 ++++++++++++++++----------------------- 1 file changed, 32 insertions(+), 46 deletions(-) diff --git a/src/lib/workers/powgpu.ts b/src/lib/workers/powgpu.ts index 81bdc57..7897f26 100644 --- a/src/lib/workers/powgpu.ts +++ b/src/lib/workers/powgpu.ts @@ -4,7 +4,6 @@ // https://github.com/numtel/nano-webgl-pow /// import { WorkerInterface } from '../pool.js' -import powgl from './powgl.js' export class PowGpu extends WorkerInterface { static { @@ -42,19 +41,15 @@ export class PowGpu extends WorkerInterface { }) } - static #workDispatchSize: number = Math.max(1, Math.floor(navigator.hardwareConcurrency)) - static #workgroupSize: number = 256 // Must align with shader - static #workload: number = this.#workDispatchSize * this.#workgroupSize - // WebGPU Compute Shader static #shader = ` struct UBO { blockhash: array, 2>, - threshold: u32, - workload: u32 + rand: vec2, + threshold: u32 }; @group(0) @binding(0) var ubo: UBO; - @group(0) @binding(1) var work: array>; + @group(0) @binding(1) var work: u32; /** * Defined separately from uint v[32] below as the original value is required @@ -142,18 +137,18 @@ export class PowGpu extends WorkerInterface { /** * Main compute function */ - @compute @workgroup_size(${this.#workgroupSize}, ${this.#workgroupSize}) - fn main(@builtin(global_invocation_id) global_id: vec3) { - // Check bounds, may be unnecessary with proper dispatch size - if (global_id.x >= ubo.workload || global_id.y >= ubo.workload) { - return; - } - + @compute @workgroup_size(256) + fn main( + @builtin(global_invocation_id) global_id: vec3, + @builtin(workgroup_id) workgroup_id: vec3, + @builtin(local_invocation_id) local_id: vec3 + ) { var m: array; - // Workgroup index defines work value for this calculation point - m[0u] = global_id.x; - m[1u] = global_id.y; + // 8-byte work is split into two 4-byte u32 + // First 3 bytes provided, last byte defined by this compute index + m[0u] = (ubo.rand.x << 8u) ^ workgroup_id.x; + m[1u] = (ubo.rand.y << 8u) ^ local_id.x; // Block hash m[2u] = ubo.blockhash[0u].x; @@ -200,10 +195,9 @@ export class PowGpu extends WorkerInterface { } // Store the result directly into work array - // if ((BLAKE2B_IV32_1 ^ v[1u] ^ v[17u]) > 0) { - work[global_id.x].x = 1u ^ (2u << 8u); - work[global_id.x].y = 3u ^ (4u << 8u); - // } + if ((BLAKE2B_IV32_1 ^ v[1u] ^ v[17u]) > 0u) { + work = global_id.x; + } } `; @@ -239,11 +233,11 @@ export class PowGpu extends WorkerInterface { // Create buffers for writing GPU calculations and reading from Javascript this.#gpuBuffer = this.#device.createBuffer({ - size: this.#workload, + size: 16, usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC }) this.#cpuBuffer = this.#device.createBuffer({ - size: this.#workload, + size: 16, usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ }) @@ -300,8 +294,10 @@ export class PowGpu extends WorkerInterface { const uint32 = hashHex.slice(i, i + 8) uboView.setUint32(i / 2, parseInt(uint32, 16)) } - uboView.setUint32(32, threshold, true) - uboView.setUint32(40, PowGpu.#workload, true) + const rand = crypto.getRandomValues(new Uint32Array(2)) + uboView.setUint32(32, rand[0], true) + uboView.setUint32(36, rand[1], true) + uboView.setUint32(40, threshold, true) const uboBuffer = PowGpu.#device.createBuffer({ size: uboView.byteLength, usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, @@ -334,7 +330,8 @@ export class PowGpu extends WorkerInterface { // Issue commands and end render pass passEncoder.setPipeline(PowGpu.#pipeline) passEncoder.setBindGroup(0, bindGroup) - passEncoder.dispatchWorkgroups(PowGpu.#workDispatchSize) + // passEncoder.dispatchWorkgroups(256 * Math.max(1, navigator.hardwareConcurrency)) + passEncoder.dispatchWorkgroups(256) passEncoder.end() // Copy result from GPU buffer to CPU buffer @@ -343,7 +340,7 @@ export class PowGpu extends WorkerInterface { 0, PowGpu.#cpuBuffer, 0, - PowGpu.#workload + 16 ) // End frame by passing array of command buffers to command queue for execution @@ -351,29 +348,18 @@ export class PowGpu extends WorkerInterface { // Read results back to Javascript and then unmap buffer after reading await PowGpu.#cpuBuffer.mapAsync(GPUMapMode.READ) - const result = new Uint32Array(PowGpu.#cpuBuffer.getMappedRange()) + await PowGpu.#device.queue.onSubmittedWorkDone() + const result = new Uint32Array(PowGpu.#cpuBuffer.getMappedRange()).slice() PowGpu.#cpuBuffer.unmap() console.log(`result`) console.dir(result) - console.log(`result?`) - for (let i = 0; i < result.length; i += 2) { - const work = new Uint8Array([result[i], result[i + 1]]) - console.log(`result[${i}]: ${result[i]}`) - console.log(`result[${i + 1}]: ${result[i + 1]}`) - console.log(`work: ${work}`) - if (result[i] !== 0 || result[i + 1] !== 0) { - // const hex = PowGpu.#hexify(work.subarray(4, 8)) + PowGpu.#hexify([ - // result[i + 2], - // result[i + 3], - // work[2] ^ (result[i] - 1), - // work[3] ^ (result[i + 1] - 1) - // ]) - const hex = PowGpu.#hexify(work) - typeof callback === 'function' && callback(hex) - return - } + if (result[0] !== 0 || result[1] !== 0) { + const hex = PowGpu.#hexify([...result]) + typeof callback === 'function' && callback(hex) + return } + // No result found. Redraw requestAnimationFrame(() => this.#calculate(hashHex, callback, threshold)) } -- 2.34.1