From 822c24e112da41522421f8b91e2e5ae92fb98622 Mon Sep 17 00:00:00 2001 From: Chris Duncan Date: Sun, 5 Jan 2025 00:00:31 -0800 Subject: [PATCH] Scrap in-shader loop since it is probably triggering browser watchdog timers as a long-running process on lower-end hardware, just redispatch instead. --- src/lib/workers/powgpu.ts | 60 +++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 31 deletions(-) diff --git a/src/lib/workers/powgpu.ts b/src/lib/workers/powgpu.ts index 7c5b826..920e090 100644 --- a/src/lib/workers/powgpu.ts +++ b/src/lib/workers/powgpu.ts @@ -53,8 +53,8 @@ export class PowGpu extends WorkerInterface { @group(0) @binding(0) var ubo: UBO; struct WORK { - nonce: vec2, - found: atomic + found: atomic, + nonce: vec2 }; @group(0) @binding(1) var work: WORK; @@ -145,7 +145,7 @@ export class PowGpu extends WorkerInterface { * 8-byte work is split into two 4-byte u32. Low 4 bytes are random u32 from * UBO. High 4 bytes are the random value XOR'd with index of each thread. */ - @compute @workgroup_size(64) + @compute @workgroup_size(256) fn main( @builtin(workgroup_id) workgroup_id: vec3, @builtin(local_invocation_id) local_id: vec3 @@ -197,32 +197,30 @@ export class PowGpu extends WorkerInterface { 0x04BE4294u, 0xE07C2654u, 0x137E2179u, 0x5BE0CD19u ); - for (var i: u32 = 0u; i < 0xFFu; i = i << 1u) { - m[0u] = m[0u] ^ i; - /** - * Twelve rounds of mixing as part of BLAKE2b compression step - */ - for (var r: u32 = 0u; r < 12u; r = r + 1u) { - G(&v, &m, 0u, 8u, 16u, 24u, SIGMA82[r * 16u + 0u], SIGMA82[r * 16u + 1u]); - G(&v, &m, 2u, 10u, 18u, 26u, SIGMA82[r * 16u + 2u], SIGMA82[r * 16u + 3u]); - G(&v, &m, 4u, 12u, 20u, 28u, SIGMA82[r * 16u + 4u], SIGMA82[r * 16u + 5u]); - G(&v, &m, 6u, 14u, 22u, 30u, SIGMA82[r * 16u + 6u], SIGMA82[r * 16u + 7u]); - G(&v, &m, 0u, 10u, 20u, 30u, SIGMA82[r * 16u + 8u], SIGMA82[r * 16u + 9u]); - G(&v, &m, 2u, 12u, 22u, 24u, SIGMA82[r * 16u + 10u], SIGMA82[r * 16u + 11u]); - G(&v, &m, 4u, 14u, 16u, 26u, SIGMA82[r * 16u + 12u], SIGMA82[r * 16u + 13u]); - G(&v, &m, 6u, 8u, 18u, 28u, SIGMA82[r * 16u + 14u], SIGMA82[r * 16u + 15u]); - } + /** + * Twelve rounds of mixing as part of BLAKE2b compression step + */ + for (var r: u32 = 0u; r < 12u; r = r + 1u) { + G(&v, &m, 0u, 8u, 16u, 24u, SIGMA82[r * 16u + 0u], SIGMA82[r * 16u + 1u]); + G(&v, &m, 2u, 10u, 18u, 26u, SIGMA82[r * 16u + 2u], SIGMA82[r * 16u + 3u]); + G(&v, &m, 4u, 12u, 20u, 28u, SIGMA82[r * 16u + 4u], SIGMA82[r * 16u + 5u]); + G(&v, &m, 6u, 14u, 22u, 30u, SIGMA82[r * 16u + 6u], SIGMA82[r * 16u + 7u]); + G(&v, &m, 0u, 10u, 20u, 30u, SIGMA82[r * 16u + 8u], SIGMA82[r * 16u + 9u]); + G(&v, &m, 2u, 12u, 22u, 24u, SIGMA82[r * 16u + 10u], SIGMA82[r * 16u + 11u]); + G(&v, &m, 4u, 14u, 16u, 26u, SIGMA82[r * 16u + 12u], SIGMA82[r * 16u + 13u]); + G(&v, &m, 6u, 8u, 18u, 28u, SIGMA82[r * 16u + 14u], SIGMA82[r * 16u + 15u]); + } - /** - * Set nonce if it passes the threshold and no other thread has set it - */ - if (atomicLoad(&work.found) == 0u && (BLAKE2B_IV32_1 ^ v[1u] ^ v[17u]) > ubo.threshold) { - atomicStore(&work.found, 1u); - work.nonce.x = m[0]; - work.nonce.y = m[1]; - return; - } + /** + * Set nonce if it passes the threshold and no other thread has set it + */ + if (atomicLoad(&work.found) == 0u && (BLAKE2B_IV32_1 ^ v[1u] ^ v[17u]) > ubo.threshold) { + atomicStore(&work.found, 1u); + work.nonce.x = m[0]; + work.nonce.y = m[1]; + return; } + /** * Nonce not found in this execution context */ @@ -327,8 +325,8 @@ export class PowGpu extends WorkerInterface { uboView.setUint32(36, threshold, true) PowGpu.#device.queue.writeBuffer(PowGpu.#uboBuffer, 0, uboView) - // Reset offset 8 `found` flag to 0u in WORK before each calculation - PowGpu.#device.queue.writeBuffer(PowGpu.#gpuBuffer, 8, new Uint32Array([0])) + // Reset `found` flag to 0u in WORK before each calculation + PowGpu.#device.queue.writeBuffer(PowGpu.#gpuBuffer, 0, new Uint32Array([0])) // Bind UBO read and GPU write buffers const bindGroup = PowGpu.#device.createBindGroup({ @@ -375,8 +373,8 @@ export class PowGpu extends WorkerInterface { await PowGpu.#cpuBuffer.mapAsync(GPUMapMode.READ) await PowGpu.#device.queue.onSubmittedWorkDone() const data = new DataView(PowGpu.#cpuBuffer.getMappedRange()) - const nonce = data.getBigUint64(0, true) - const found = !!data.getUint32(8) + const found = !!data.getUint32(0) + const nonce = data.getBigUint64(8, true) PowGpu.#cpuBuffer.unmap() if (found) { -- 2.34.1