From 874a1a7813dcc3c34301359e4c3c0e0d731a2c70 Mon Sep 17 00:00:00 2001 From: Chris Duncan Date: Wed, 8 Jan 2025 08:35:46 -0800 Subject: [PATCH] Inline G mix 0 of round 0. --- src/lib/nano-pow/shaders/gpu-compute.ts | 82 ++++++++++++++++++++++++- 1 file changed, 81 insertions(+), 1 deletion(-) diff --git a/src/lib/nano-pow/shaders/gpu-compute.ts b/src/lib/nano-pow/shaders/gpu-compute.ts index 7081a34..5d839f1 100644 --- a/src/lib/nano-pow/shaders/gpu-compute.ts +++ b/src/lib/nano-pow/shaders/gpu-compute.ts @@ -209,8 +209,88 @@ fn main( * ROUND 0.1 * G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, m0, m1, m2, m3); */ + // a = a + b; + o0 = v0 + v8; + o1 = v1 + v9; + if (v0 > 0xFFFFFFFFu - v8) { + o1 = o1 + 1u; + } + v0 = o0; + v1 = o1; + + // a = a + m[sigma[r][2*i+0]]; + o0 = v0 + m0; + o1 = v1 + m1; + if (v0 > 0xFFFFFFFFu - m0) { + o1 = o1 + 1u; + } + v0 = o0; + v1 = o1; + + // d = rotr64(d ^ a, 32); + xor0 = v24 ^ v0; + xor1 = v25 ^ v1; + v24 = xor1; + v25 = xor0; + + // c = c + d; + o0 = v16 + v24; + o1 = v17 + v25; + if (v16 > 0xFFFFFFFFu - v24) { + o1 = o1 + 1u; + } + v16 = o0; + v17 = o1; + + // b = rotr64(b ^ c, 24); + xor0 = v8 ^ v16; + xor1 = v9 ^ v17; + v8 = (xor0 >> 24u) ^ (xor1 << 8u); + v9 = (xor1 >> 24u) ^ (xor0 << 8u); + + // a = a + b; + o0 = v0 + v8; + o1 = v1 + v9; + if (v0 > 0xFFFFFFFFu - v8) { + o1 = o1 + 1u; + } + v0 = o0; + v1 = o1; + + // a = a + m[sigma[r][2*i+1]]; + o0 = v0 + m2; + o1 = v1 + m3; + if (v0 > 0xFFFFFFFFu - m2) { + o1 = o1 + 1u; + } + v0 = o0; + v1 = o1; + + // d = rotr64(d ^ a, 16) + xor0 = v24 ^ v0; + xor1 = v25 ^ v1; + v24 = (xor0 >> 16u) ^ (xor1 << 16u); + v25 = (xor1 >> 16u) ^ (xor0 << 16u); + + // c = c + d; + o0 = v16 + v24; + o1 = v17 + v25; + if (v16 > 0xFFFFFFFFu - v24) { + o1 = o1 + 1u; + } + v16 = o0; + v17 = o1; + + // b = rotr64(b ^ c, 63) + xor0 = v8 ^ v16; + xor1 = v9 ^ v17; + v8 = (xor1 >> 31u) ^ (xor0 << 1u); + v9 = (xor0 >> 31u) ^ (xor1 << 1u); + + + + /* ROUND 0.2 */ - G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, m0, m1, m2, m3); G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, m4, m5, m6, m7); G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, m8, m9, 0u, 0u); G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, 0u, 0u, 0u, 0u); -- 2.34.1