Initialize digest constant as vec2 and initialize v0 from it. Update doc comments.

author Chris Duncan <chris@zoso.dev>

Tue, 21 Jan 2025 17:26:35 +0000 (09:26 -0800)

committer Chris Duncan <chris@zoso.dev>

Tue, 21 Jan 2025 17:26:35 +0000 (09:26 -0800)
author Chris Duncan <chris@zoso.dev>
Tue, 21 Jan 2025 17:26:35 +0000 (09:26 -0800)
committer Chris Duncan <chris@zoso.dev>
Tue, 21 Jan 2025 17:26:35 +0000 (09:26 -0800)
diff --git a/src/shaders/compute.wgsl b/src/shaders/compute.wgsl

index d441379a2a1cf8f0a7b43bdfd81e7263c894adcc..928ada3d930457bfb59df7f6bf6ccbd4a210ac95 100644 (file)
--- a/src/shaders/compute.wgsl
+++ b/src/shaders/compute.wgsl
@@ -15,10 +15,10 @@ struct WORK {
  @group(0) @binding(1) var<storage, read_write> work: WORK;
  
  /**
-* Defined separately from uint v[32] below as the original value is required
-* to calculate the second uint32 of the digest for threshold comparison
+* Defined separately from `v0` because the original value is required to
+* calculate the digest and compare it to the threshold.
  */
-const BLAKE2B_IV32_1: u32 = 0x6A09E667u;
+const BLAKE2B_IV32_0: vec2<u32> = vec2(0xF2BDC900u, 0x6A09E667u);
  
  /**
  * Used to rotate bits by a fixed amount during G mixing.
@@ -75,22 +75,22 @@ fn main(id: vec3<u32>) {
         * Compression buffer initialized to 2 instances of initialization vector. Each
         * vec2<u32> represents two halves of the original u64 value from the reference
         * implementation. They appear reversed pairwise as defined below, but this is
-       * because NanoPow treats the `x` component of a vector as the low bits and the
-       * `y` component as the high bits.
+       * an illusion due to endianness: the `x` component of the vector is the low
+       * bits and the `y` component is the high bits, and if you laid the bits out
+       * individually, they would match the little-endian 64-bit representation.
         *
         * The following values have been modified from the BLAKE2B_IV:
         *
         * OUTLEN is constant 8 bytes
-       * v[0u] ^= 0x01010000u ^ uint(OUTLEN);
+       * v0.x ^= 0x01010000u ^ u32(OUTLEN);
         *
         * INLEN is constant 40 bytes: work value (8) + block hash (32)
-       * v[24u] ^= uint(INLEN);
+       * v12.x ^= u32(INLEN);
         *
         * It is always the "last" compression at this INLEN
-       * v[28u] = ~v[28u];
-       * v[29u] = ~v[29u];
+       * v14 = ~v14;
         */
-       var v0: vec2<u32> = vec2(0xF2BDC900u, 0x6A09E667u);
+       var v0: vec2<u32> = BLAKE2B_IV32_0;
         var v1: vec2<u32> = vec2(0x84CAA73Bu, 0xBB67AE85u);
         var v2: vec2<u32> = vec2(0xFE94F82Bu, 0x3C6EF372u);
         var v3: vec2<u32> = vec2(0x5F1D36F1u, 0xA54FF53Au);
@@ -130,8 +130,8 @@ fn main(id: vec3<u32>) {
         *       c = c + d
         *       b = rotr64(b ^ c, 63)
         *
-       * Each sum step has an extra carry addition. Note that the `a` assignment sum
-       * has zero, one, or two carry additions depending on whether m[sigma] is zero.
+       * Each sum step has an extra carry addition. Note that the m[sigma] sum is
+       * skipped if m[sigma] is zero since it effectively does nothing.
         */
  
         /****************************************************************************
@@ -1432,7 +1432,7 @@ fn main(id: vec3<u32>) {
         /**
         * Set nonce if it passes the threshold and no other thread has set it
         */
-       if ((BLAKE2B_IV32_1 ^ v0.y ^ v8.y) > threshold && atomicLoad(&work.found) == 0u) {
+       if ((BLAKE2B_IV32_0.y ^ v0.y ^ v8.y) > threshold && atomicLoad(&work.found) == 0u) {
                 atomicStore(&work.found, 1u);
                 work.nonce = m0;
         }
author	Chris Duncan <chris@zoso.dev>
	Tue, 21 Jan 2025 17:26:35 +0000 (09:26 -0800)
committer	Chris Duncan <chris@zoso.dev>
	Tue, 21 Jan 2025 17:26:35 +0000 (09:26 -0800)