From 955dcb6a209717c2912ecde552eb11c6678e7f23 Mon Sep 17 00:00:00 2001 From: Chris Duncan Date: Wed, 8 Jan 2025 12:08:33 -0800 Subject: [PATCH] Inline G mix 1 of round 0. --- src/lib/nano-pow/shaders/gpu-compute.ts | 89 ++++++++++++++++++++++++- 1 file changed, 87 insertions(+), 2 deletions(-) diff --git a/src/lib/nano-pow/shaders/gpu-compute.ts b/src/lib/nano-pow/shaders/gpu-compute.ts index f710de6..0fcf82e 100644 --- a/src/lib/nano-pow/shaders/gpu-compute.ts +++ b/src/lib/nano-pow/shaders/gpu-compute.ts @@ -289,9 +289,94 @@ fn main( - /* ROUND 0.2 */ + /** + * r=0, i=2, a=v[2-3], b=v[10-11], c=v[18-19], d=v[26-27] + */ + + // a = a + b; + o0 = v2 + v10; + o1 = v3 + v11; + if (v2 > 0xFFFFFFFFu - v10) { + o1 = o1 + 1u; + } + v2 = o0; + v3 = o1; + + // a = a + m[sigma[r][2*i+0]]; + o0 = v2 + m4; + o1 = v3 + m5; + if (v2 > 0xFFFFFFFFu - m4) { + o1 = o1 + 1u; + } + v2 = o0; + v3 = o1; + + // d = rotr64(d ^ a, 32); + xor0 = v26 ^ v2; + xor1 = v27 ^ v3; + v26 = xor1; + v27 = xor0; + + // c = c + d; + o0 = v18 + v26; + o1 = v19 + v27; + if (v18 > 0xFFFFFFFFu - v26) { + o1 = o1 + 1u; + } + v18 = o0; + v19 = o1; + + // b = rotr64(b ^ c, 24); + xor0 = v10 ^ v18; + xor1 = v11 ^ v19; + v10 = (xor0 >> 24u) ^ (xor1 << 8u); + v11 = (xor1 >> 24u) ^ (xor0 << 8u); + + // a = a + b; + o0 = v2 + v10; + o1 = v3 + v11; + if (v2 > 0xFFFFFFFFu - v10) { + o1 = o1 + 1u; + } + v2 = o0; + v3 = o1; + + // a = a + m[sigma[r][2*i+1]]; + o0 = v2 + m6; + o1 = v3 + m7; + if (v2 > 0xFFFFFFFFu - m6) { + o1 = o1 + 1u; + } + v2 = o0; + v3 = o1; + + // d = rotr64(d ^ a, 16) + xor0 = v26 ^ v2; + xor1 = v27 ^ v3; + v26 = (xor0 >> 16u) ^ (xor1 << 16u); + v27 = (xor1 >> 16u) ^ (xor0 << 16u); + + // c = c + d; + o0 = v18 + v26; + o1 = v19 + v27; + if (v18 > 0xFFFFFFFFu - v26) { + o1 = o1 + 1u; + } + v18 = o0; + v19 = o1; + + // b = rotr64(b ^ c, 63) + xor0 = v10 ^ v18; + xor1 = v11 ^ v19; + v10 = (xor1 >> 31u) ^ (xor0 << 1u); + v11 = (xor0 >> 31u) ^ (xor1 << 1u); + + + + /** + * ROUND 0.2 + */ - G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, m4, m5, m6, m7); G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, m8, m9, 0u, 0u); G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, 0u, 0u, 0u, 0u); G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, 0u, 0u, 0u, 0u); -- 2.34.1