@group(0) @binding(1) var<storage, read_write> work: WORK;
/**
-* Defined separately from uint v[32] below as the original value is required
-* to calculate the second uint32 of the digest for threshold comparison
+* Defined separately from `v0` because the original value is required to
+* calculate the digest and compare it to the threshold.
*/
-const BLAKE2B_IV32_1: u32 = 0x6A09E667u;
+const BLAKE2B_IV32_0: vec2<u32> = vec2(0xF2BDC900u, 0x6A09E667u);
/**
* Used to rotate bits by a fixed amount during G mixing.
* Compression buffer initialized to 2 instances of initialization vector. Each
* vec2<u32> represents two halves of the original u64 value from the reference
* implementation. They appear reversed pairwise as defined below, but this is
- * because NanoPow treats the `x` component of a vector as the low bits and the
- * `y` component as the high bits.
+ * an illusion due to endianness: the `x` component of the vector is the low
+ * bits and the `y` component is the high bits, and if you laid the bits out
+ * individually, they would match the little-endian 64-bit representation.
*
* The following values have been modified from the BLAKE2B_IV:
*
* OUTLEN is constant 8 bytes
- * v[0u] ^= 0x01010000u ^ uint(OUTLEN);
+ * v0.x ^= 0x01010000u ^ u32(OUTLEN);
*
* INLEN is constant 40 bytes: work value (8) + block hash (32)
- * v[24u] ^= uint(INLEN);
+ * v12.x ^= u32(INLEN);
*
* It is always the "last" compression at this INLEN
- * v[28u] = ~v[28u];
- * v[29u] = ~v[29u];
+ * v14 = ~v14;
*/
- var v0: vec2<u32> = vec2(0xF2BDC900u, 0x6A09E667u);
+ var v0: vec2<u32> = BLAKE2B_IV32_0;
var v1: vec2<u32> = vec2(0x84CAA73Bu, 0xBB67AE85u);
var v2: vec2<u32> = vec2(0xFE94F82Bu, 0x3C6EF372u);
var v3: vec2<u32> = vec2(0x5F1D36F1u, 0xA54FF53Au);
* c = c + d
* b = rotr64(b ^ c, 63)
*
- * Each sum step has an extra carry addition. Note that the `a` assignment sum
- * has zero, one, or two carry additions depending on whether m[sigma] is zero.
+ * Each sum step has an extra carry addition. Note that the m[sigma] sum is
+ * skipped if m[sigma] is zero since it effectively does nothing.
*/
/****************************************************************************
/**
* Set nonce if it passes the threshold and no other thread has set it
*/
- if ((BLAKE2B_IV32_1 ^ v0.y ^ v8.y) > threshold && atomicLoad(&work.found) == 0u) {
+ if ((BLAKE2B_IV32_0.y ^ v0.y ^ v8.y) > threshold && atomicLoad(&work.found) == 0u) {
atomicStore(&work.found, 1u);
work.nonce = m0;
}