From: Chris Duncan Date: Thu, 2 Jan 2025 16:33:37 +0000 (-0800) Subject: Fix invocation ID construction. Add "found" flag as an atomic. Remove randomized... X-Git-Url: https://zoso.dev/?a=commitdiff_plain;h=471ce80b402af779f1e35c1cd7b183e25dc0c672;p=libnemo.git Fix invocation ID construction. Add "found" flag as an atomic. Remove randomized value from Javascript since shader will just iterate nonces itself. Delete unused hexify function since we can just use built-in BigInt functions now. --- diff --git a/src/lib/workers/powgpu.ts b/src/lib/workers/powgpu.ts index 7d7774d..69759ab 100644 --- a/src/lib/workers/powgpu.ts +++ b/src/lib/workers/powgpu.ts @@ -45,11 +45,15 @@ export class PowGpu extends WorkerInterface { static #shader = ` struct UBO { blockhash: array, 2>, - rand: vec2, threshold: u32 }; @group(0) @binding(0) var ubo: UBO; - @group(0) @binding(1) var work: vec3; + + struct WORK { + nonce: vec2, + found: atomic + }; + @group(0) @binding(1) var work: WORK; /** * Defined separately from uint v[32] below as the original value is required @@ -141,14 +145,17 @@ export class PowGpu extends WorkerInterface { * Last 4 bytes are defined by index of each thread */ @compute @workgroup_size(256) - fn main(@builtin(global_invocation_id) global_id: vec3) { - work.x = 0u; - work.y = 0u; - work.z = 0u; - + fn main( + @builtin(workgroup_id) workgroup_id: vec3, + @builtin(local_invocation_id) local_id: vec3 + ) { + var id: u32 = ((workgroup_id.x & 0xff) << 24) | + ((workgroup_id.y & 0xff) << 16) | + ((workgroup_id.z & 0xff) << 8) | + (local_id.x & 0xff); var m: array; m[0u] = 0u; - m[1u] = global_id.x; + m[1u] = id; m[2u] = ubo.blockhash[0u].x; m[3u] = ubo.blockhash[0u].y; m[4u] = ubo.blockhash[0u].z; @@ -159,7 +166,7 @@ export class PowGpu extends WorkerInterface { m[9u] = ubo.blockhash[1u].w; var i: u32 = 0u; - while (work.x == 0u) { + while (atomicLoad(&work.found) == 0u) { m[0u] = i; i = i + 1u; @@ -198,23 +205,16 @@ export class PowGpu extends WorkerInterface { } // Store the result directly into work array - if ((BLAKE2B_IV32_1 ^ v[1u] ^ v[17u]) > ubo.threshold) { - work.x = 1u; - work.y = m[0u]; - work.z = m[1u]; + if ((BLAKE2B_IV32_1 ^ v[1u] ^ v[17u]) > 0) { + atomicStore(&work.found, 1u); + work.nonce.x = i; + work.nonce.y = id; + return; } } } `; - static #hexify (arr: number[] | Uint8Array): string { - let out = '' - for (let i = arr.length - 1; i >= 0; i--) { - out += arr[i].toString(16).padStart(2, '0') - } - return out - } - // Initialize WebGPU static #device: GPUDevice | null = null static #gpuBuffer: GPUBuffer @@ -300,11 +300,7 @@ export class PowGpu extends WorkerInterface { const uint32 = hashHex.slice(i, i + 8) uboView.setUint32(i / 2, parseInt(uint32, 16)) } - const rand = crypto.getRandomValues(new Uint32Array(2)) - console.log(rand) - uboView.setUint32(32, rand[0], true) - uboView.setUint32(36, rand[1], true) - uboView.setUint32(40, threshold, true) + uboView.setUint32(32, threshold, true) const uboBuffer = PowGpu.#device.createBuffer({ size: uboView.byteLength, usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, @@ -337,16 +333,16 @@ export class PowGpu extends WorkerInterface { // Issue commands and end render pass passEncoder.setPipeline(PowGpu.#pipeline) passEncoder.setBindGroup(0, bindGroup) - passEncoder.dispatchWorkgroups(256, 256, 256) + passEncoder.dispatchWorkgroups(256) passEncoder.end() - // Copy result from GPU buffer to CPU buffer + // Copy 8-byte nonce and 4-byte found flag from GPU to CPU for reading commandEncoder.copyBufferToBuffer( PowGpu.#gpuBuffer, 0, PowGpu.#cpuBuffer, 0, - 16 + 12 ) // End frame by passing array of command buffers to command queue for execution @@ -355,11 +351,16 @@ export class PowGpu extends WorkerInterface { // Read results back to Javascript and then unmap buffer after reading await PowGpu.#cpuBuffer.mapAsync(GPUMapMode.READ) await PowGpu.#device.queue.onSubmittedWorkDone() - const result = new Uint32Array(PowGpu.#cpuBuffer.getMappedRange()).slice() + const data = new DataView(PowGpu.#cpuBuffer.getMappedRange()) + const nonce = data.getBigUint64(0, true) + const found = !!data.getUint32(8) + console.log(new Uint32Array(data.buffer)) PowGpu.#cpuBuffer.unmap() - if (result[0] !== 0) { - const hex = PowGpu.#hexify([result[1], result[2]]) + console.log(`found: ${found}`) + console.log(`nonce: ${nonce}`) + if (found) { + const hex = nonce.toString(16) typeof callback === 'function' && callback(hex) return }