Expand documentation. Remove unused variables.

author Chris Duncan <chris@zoso.dev>

Tue, 4 Feb 2025 21:36:28 +0000 (13:36 -0800)

committer Chris Duncan <chris@zoso.dev>

Tue, 4 Feb 2025 21:36:28 +0000 (13:36 -0800)
author Chris Duncan <chris@zoso.dev>
Tue, 4 Feb 2025 21:36:28 +0000 (13:36 -0800)
committer Chris Duncan <chris@zoso.dev>
Tue, 4 Feb 2025 21:36:28 +0000 (13:36 -0800)
diff --git a/src/shaders/compute.wgsl b/src/shaders/compute.wgsl

index da0dd968ca8c79ed470df7dc6810bf2003dbb485..c07ce350855132278807bc02b81f5231cbb90355 100644 (file)
--- a/src/shaders/compute.wgsl
+++ b/src/shaders/compute.wgsl
@@ -112,7 +112,13 @@ fn main(id: vec3<u32>) {
         * Twelve rounds of G mixing as part of BLAKE2b compression step. Normally,
         * each round is divided into eight subprocesses; NanoPow compresses these
         * operations into four subprocesses by executing sequential pairs
-       * simultaneously, inspired by https://github.com/minio/blake2b-simd
+       * simultaneously, inspired by https://github.com/minio/blake2b-simd. It then
+       * executes each compressed statement in pairs so that the compiler can
+       * interleave independent instructions and improve scheduling. That is to say,
+       * to execute `a = a + b` for subprocesses 1-4, first 1 is paired with 2 and 3
+       * is paired with 4; then 1/2 is executed and 3/4 is executed; then the next
+       * computation `a = a + m[sigma[r][2*i+0]]` is executed in the same manner, and
+       * so on through all the steps of the subprocess.
         *
         * Each subprocess applies transformations to to `m` and `v` variables based on
         * a defined set of index inputs. The algorithm for each subprocess is defined
@@ -138,12 +144,6 @@ fn main(id: vec3<u32>) {
         * Each sum step has an extra carry addition. Note that the m[sigma] sum is
         * skipped if m[sigma] is zero since it effectively does nothing.
         */
-       var a: vec4<u32>;
-       var b: vec4<u32>;
-       var c: vec4<u32>;
-       var d: vec4<u32>;
-       var x: vec4<u32>;
-       var y: vec4<u32>;
         var v56: vec4<u32>;
         var vFC: vec4<u32>;
         var v74: vec4<u32>;
author	Chris Duncan <chris@zoso.dev>
	Tue, 4 Feb 2025 21:36:28 +0000 (13:36 -0800)
committer	Chris Duncan <chris@zoso.dev>
	Tue, 4 Feb 2025 21:36:28 +0000 (13:36 -0800)