From: Chris Duncan Date: Wed, 5 Feb 2025 14:18:33 +0000 (-0800) Subject: Found the culprit. Atomic exchange is actually 40ms slower than atomic load, so rever... X-Git-Tag: v3.0.0~23 X-Git-Url: https://zoso.dev/?a=commitdiff_plain;h=91e7ce7f9005fc381f946bb38cd7360de5a509fa;p=nano-pow.git Found the culprit. Atomic exchange is actually 40ms slower than atomic load, so revert to conditional load-and-store. Makes sense, it's doing two operations. --- diff --git a/src/shaders/compute.wgsl b/src/shaders/compute.wgsl index 6f2a29a..d6d01f1 100644 --- a/src/shaders/compute.wgsl +++ b/src/shaders/compute.wgsl @@ -1456,7 +1456,8 @@ fn main(id: vec3) { * Set nonce if it passes the threshold and no other thread has set it. * Only high bits are needed for comparison since threshold low bits are zero. */ - if ((BLAKE2B_IV_0.y ^ v0.y ^ v8.y) >= ubo.threshold && atomicExchange(&work.found, 1u) == 0u) { + if ((BLAKE2B_IV32_1 ^ v0.y ^ v8.y) > threshold && atomicLoad(&work.found) == 0u) { + atomicStore(&work.found, 1u); work.nonce = m0; } return;