From: Chris Duncan <chris@zoso.dev>
Date: Tue, 21 Jan 2025 17:26:35 +0000 (-0800)
Subject: Initialize digest constant as vec2 and initialize v0 from it. Update doc comments.
X-Git-Tag: v3.0.0~79
X-Git-Url: https://zoso.dev/?a=commitdiff_plain;h=331383cd1842f9584972b6dacee904c7570c5cec;p=nano-pow.git

Initialize digest constant as vec2 and initialize v0 from it. Update doc comments.
---

diff --git a/src/shaders/compute.wgsl b/src/shaders/compute.wgsl
index d441379..928ada3 100644
--- a/src/shaders/compute.wgsl
+++ b/src/shaders/compute.wgsl
@@ -15,10 +15,10 @@ struct WORK {
 @group(0) @binding(1) var<storage, read_write> work: WORK;
 
 /**
-* Defined separately from uint v[32] below as the original value is required
-* to calculate the second uint32 of the digest for threshold comparison
+* Defined separately from `v0` because the original value is required to
+* calculate the digest and compare it to the threshold.
 */
-const BLAKE2B_IV32_1: u32 = 0x6A09E667u;
+const BLAKE2B_IV32_0: vec2<u32> = vec2(0xF2BDC900u, 0x6A09E667u);
 
 /**
 * Used to rotate bits by a fixed amount during G mixing.
@@ -75,22 +75,22 @@ fn main(id: vec3<u32>) {
 	* Compression buffer initialized to 2 instances of initialization vector. Each
 	* vec2<u32> represents two halves of the original u64 value from the reference
 	* implementation. They appear reversed pairwise as defined below, but this is
-	* because NanoPow treats the `x` component of a vector as the low bits and the
-	* `y` component as the high bits.
+	* an illusion due to endianness: the `x` component of the vector is the low
+	* bits and the `y` component is the high bits, and if you laid the bits out
+	* individually, they would match the little-endian 64-bit representation.
 	*
 	* The following values have been modified from the BLAKE2B_IV:
 	*
 	* OUTLEN is constant 8 bytes
-	* v[0u] ^= 0x01010000u ^ uint(OUTLEN);
+	* v0.x ^= 0x01010000u ^ u32(OUTLEN);
 	*
 	* INLEN is constant 40 bytes: work value (8) + block hash (32)
-	* v[24u] ^= uint(INLEN);
+	* v12.x ^= u32(INLEN);
 	*
 	* It is always the "last" compression at this INLEN
-	* v[28u] = ~v[28u];
-	* v[29u] = ~v[29u];
+	* v14 = ~v14;
 	*/
-	var v0: vec2<u32> = vec2(0xF2BDC900u, 0x6A09E667u);
+	var v0: vec2<u32> = BLAKE2B_IV32_0;
 	var v1: vec2<u32> = vec2(0x84CAA73Bu, 0xBB67AE85u);
 	var v2: vec2<u32> = vec2(0xFE94F82Bu, 0x3C6EF372u);
 	var v3: vec2<u32> = vec2(0x5F1D36F1u, 0xA54FF53Au);
@@ -130,8 +130,8 @@ fn main(id: vec3<u32>) {
 	* 	c = c + d
 	* 	b = rotr64(b ^ c, 63)
 	*
-	* Each sum step has an extra carry addition. Note that the `a` assignment sum
-	* has zero, one, or two carry additions depending on whether m[sigma] is zero.
+	* Each sum step has an extra carry addition. Note that the m[sigma] sum is
+	* skipped if m[sigma] is zero since it effectively does nothing.
 	*/
 
 	/****************************************************************************
@@ -1432,7 +1432,7 @@ fn main(id: vec3<u32>) {
 	/**
 	* Set nonce if it passes the threshold and no other thread has set it
 	*/
-	if ((BLAKE2B_IV32_1 ^ v0.y ^ v8.y) > threshold && atomicLoad(&work.found) == 0u) {
+	if ((BLAKE2B_IV32_0.y ^ v0.y ^ v8.y) > threshold && atomicLoad(&work.found) == 0u) {
 		atomicStore(&work.found, 1u);
 		work.nonce = m0;
 	}