From 91e7ce7f9005fc381f946bb38cd7360de5a509fa Mon Sep 17 00:00:00 2001 From: Chris Duncan Date: Wed, 5 Feb 2025 06:18:33 -0800 Subject: [PATCH] Found the culprit. Atomic exchange is actually 40ms slower than atomic load, so revert to conditional load-and-store. Makes sense, it's doing two operations. --- src/shaders/compute.wgsl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/shaders/compute.wgsl b/src/shaders/compute.wgsl index 6f2a29a..d6d01f1 100644 --- a/src/shaders/compute.wgsl +++ b/src/shaders/compute.wgsl @@ -1456,7 +1456,8 @@ fn main(id: vec3) { * Set nonce if it passes the threshold and no other thread has set it. * Only high bits are needed for comparison since threshold low bits are zero. */ - if ((BLAKE2B_IV_0.y ^ v0.y ^ v8.y) >= ubo.threshold && atomicExchange(&work.found, 1u) == 0u) { + if ((BLAKE2B_IV32_1 ^ v0.y ^ v8.y) > threshold && atomicLoad(&work.found) == 0u) { + atomicStore(&work.found, 1u); work.nonce = m0; } return; -- 2.34.1