From: Chris Duncan Date: Tue, 21 Jan 2025 17:26:35 +0000 (-0800) Subject: Initialize digest constant as vec2 and initialize v0 from it. Update doc comments. X-Git-Tag: v3.0.0~79 X-Git-Url: https://zoso.dev/?a=commitdiff_plain;h=331383cd1842f9584972b6dacee904c7570c5cec;p=nano-pow.git Initialize digest constant as vec2 and initialize v0 from it. Update doc comments. --- diff --git a/src/shaders/compute.wgsl b/src/shaders/compute.wgsl index d441379..928ada3 100644 --- a/src/shaders/compute.wgsl +++ b/src/shaders/compute.wgsl @@ -15,10 +15,10 @@ struct WORK { @group(0) @binding(1) var work: WORK; /** -* Defined separately from uint v[32] below as the original value is required -* to calculate the second uint32 of the digest for threshold comparison +* Defined separately from `v0` because the original value is required to +* calculate the digest and compare it to the threshold. */ -const BLAKE2B_IV32_1: u32 = 0x6A09E667u; +const BLAKE2B_IV32_0: vec2 = vec2(0xF2BDC900u, 0x6A09E667u); /** * Used to rotate bits by a fixed amount during G mixing. @@ -75,22 +75,22 @@ fn main(id: vec3) { * Compression buffer initialized to 2 instances of initialization vector. Each * vec2 represents two halves of the original u64 value from the reference * implementation. They appear reversed pairwise as defined below, but this is - * because NanoPow treats the `x` component of a vector as the low bits and the - * `y` component as the high bits. + * an illusion due to endianness: the `x` component of the vector is the low + * bits and the `y` component is the high bits, and if you laid the bits out + * individually, they would match the little-endian 64-bit representation. * * The following values have been modified from the BLAKE2B_IV: * * OUTLEN is constant 8 bytes - * v[0u] ^= 0x01010000u ^ uint(OUTLEN); + * v0.x ^= 0x01010000u ^ u32(OUTLEN); * * INLEN is constant 40 bytes: work value (8) + block hash (32) - * v[24u] ^= uint(INLEN); + * v12.x ^= u32(INLEN); * * It is always the "last" compression at this INLEN - * v[28u] = ~v[28u]; - * v[29u] = ~v[29u]; + * v14 = ~v14; */ - var v0: vec2 = vec2(0xF2BDC900u, 0x6A09E667u); + var v0: vec2 = BLAKE2B_IV32_0; var v1: vec2 = vec2(0x84CAA73Bu, 0xBB67AE85u); var v2: vec2 = vec2(0xFE94F82Bu, 0x3C6EF372u); var v3: vec2 = vec2(0x5F1D36F1u, 0xA54FF53Au); @@ -130,8 +130,8 @@ fn main(id: vec3) { * c = c + d * b = rotr64(b ^ c, 63) * - * Each sum step has an extra carry addition. Note that the `a` assignment sum - * has zero, one, or two carry additions depending on whether m[sigma] is zero. + * Each sum step has an extra carry addition. Note that the m[sigma] sum is + * skipped if m[sigma] is zero since it effectively does nothing. */ /**************************************************************************** @@ -1432,7 +1432,7 @@ fn main(id: vec3) { /** * Set nonce if it passes the threshold and no other thread has set it */ - if ((BLAKE2B_IV32_1 ^ v0.y ^ v8.y) > threshold && atomicLoad(&work.found) == 0u) { + if ((BLAKE2B_IV32_0.y ^ v0.y ^ v8.y) > threshold && atomicLoad(&work.found) == 0u) { atomicStore(&work.found, 1u); work.nonce = m0; }