From 7212404bc67a58587e384f4525ab5e4509fe677b Mon Sep 17 00:00:00 2001 From: Chris Duncan Date: Tue, 14 Jan 2025 07:08:19 -0800 Subject: [PATCH] Replace more scalar addition with vector addition. --- src/shaders/compute.wgsl | 337 +++++++++++++++++++++++++-------------- 1 file changed, 216 insertions(+), 121 deletions(-) diff --git a/src/shaders/compute.wgsl b/src/shaders/compute.wgsl index e59c9b7..8dc9871 100644 --- a/src/shaders/compute.wgsl +++ b/src/shaders/compute.wgsl @@ -137,7 +137,6 @@ fn main(@builtin(global_invocation_id) id: vec3) { v8 = v_89.x; v9 = v_89.y; - // a = a + m[sigma[r][2*i+0]] o0 = v0 + m0; o1 = v1 + m1; @@ -242,11 +241,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = xor0; // c = c + d - o0 = v18 + v26; - o1 = v19 + v27; - o1 = o1 + select(0u, 1u, o0 < v18); - v18 = o0; - v19 = o1; + v_1819.x = v18; + v_1819.y = v19; + v_2627.x = v26; + v_2627.y = v27; + v_1819 = v_1819 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1819.x + v_2627.x < v_1819.x); + v18 = v_1819.x; + v19 = v_1819.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 24) xor0 = v10 ^ v18; @@ -279,11 +282,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v18 + v26; - o1 = v19 + v27; - o1 = o1 + select(0u, 1u, o0 < v18); - v18 = o0; - v19 = o1; + v_1819.x = v18; + v_1819.y = v19; + v_2627.x = v26; + v_2627.y = v27; + v_1819 = v_1819 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1819.x + v_2627.x < v_1819.x); + v18 = v_1819.x; + v19 = v_1819.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 63) xor0 = v10 ^ v18; @@ -901,11 +908,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = xor0; // c = c + d - o0 = v18 + v26; - o1 = v19 + v27; - o1 = o1 + select(0u, 1u, o0 < v18); - v18 = o0; - v19 = o1; + v_1819.x = v18; + v_1819.y = v19; + v_2627.x = v26; + v_2627.y = v27; + v_1819 = v_1819 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1819.x + v_2627.x < v_1819.x); + v18 = v_1819.x; + v19 = v_1819.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 24) xor0 = v10 ^ v18; @@ -941,11 +952,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v18 + v26; - o1 = v19 + v27; - o1 = o1 + select(0u, 1u, o0 < v18); - v18 = o0; - v19 = o1; + v_1819.x = v18; + v_1819.y = v19; + v_2627.x = v26; + v_2627.y = v27; + v_1819 = v_1819 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1819.x + v_2627.x < v_1819.x); + v18 = v_1819.x; + v19 = v_1819.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 63) xor0 = v10 ^ v18; @@ -1557,11 +1572,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = xor0; // c = c + d - o0 = v18 + v26; - o1 = v19 + v27; - o1 = o1 + select(0u, 1u, o0 < v18); - v18 = o0; - v19 = o1; + v_1819.x = v18; + v_1819.y = v19; + v_2627.x = v26; + v_2627.y = v27; + v_1819 = v_1819 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1819.x + v_2627.x < v_1819.x); + v18 = v_1819.x; + v19 = v_1819.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 24) xor0 = v10 ^ v18; @@ -1594,11 +1613,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v18 + v26; - o1 = v19 + v27; - o1 = o1 + select(0u, 1u, o0 < v18); - v18 = o0; - v19 = o1; + v_1819.x = v18; + v_1819.y = v19; + v_2627.x = v26; + v_2627.y = v27; + v_1819 = v_1819 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1819.x + v_2627.x < v_1819.x); + v18 = v_1819.x; + v19 = v_1819.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 63) xor0 = v10 ^ v18; @@ -2207,11 +2230,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = xor0; // c = c + d - o0 = v18 + v26; - o1 = v19 + v27; - o1 = o1 + select(0u, 1u, o0 < v18); - v18 = o0; - v19 = o1; + v_1819.x = v18; + v_1819.y = v19; + v_2627.x = v26; + v_2627.y = v27; + v_1819 = v_1819 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1819.x + v_2627.x < v_1819.x); + v18 = v_1819.x; + v19 = v_1819.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 24) xor0 = v10 ^ v18; @@ -2244,11 +2271,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v18 + v26; - o1 = v19 + v27; - o1 = o1 + select(0u, 1u, o0 < v18); - v18 = o0; - v19 = o1; + v_1819.x = v18; + v_1819.y = v19; + v_2627.x = v26; + v_2627.y = v27; + v_1819 = v_1819 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1819.x + v_2627.x < v_1819.x); + v18 = v_1819.x; + v19 = v_1819.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 63) xor0 = v10 ^ v18; @@ -2860,11 +2891,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = xor0; // c = c + d - o0 = v18 + v26; - o1 = v19 + v27; - o1 = o1 + select(0u, 1u, o0 < v18); - v18 = o0; - v19 = o1; + v_1819.x = v18; + v_1819.y = v19; + v_2627.x = v26; + v_2627.y = v27; + v_1819 = v_1819 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1819.x + v_2627.x < v_1819.x); + v18 = v_1819.x; + v19 = v_1819.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 24) xor0 = v10 ^ v18; @@ -2900,11 +2935,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v18 + v26; - o1 = v19 + v27; - o1 = o1 + select(0u, 1u, o0 < v18); - v18 = o0; - v19 = o1; + v_1819.x = v18; + v_1819.y = v19; + v_2627.x = v26; + v_2627.y = v27; + v_1819 = v_1819 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1819.x + v_2627.x < v_1819.x); + v18 = v_1819.x; + v19 = v_1819.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 63) xor0 = v10 ^ v18; @@ -3513,11 +3552,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = xor0; // c = c + d - o0 = v18 + v26; - o1 = v19 + v27; - o1 = o1 + select(0u, 1u, o0 < v18); - v18 = o0; - v19 = o1; + v_1819.x = v18; + v_1819.y = v19; + v_2627.x = v26; + v_2627.y = v27; + v_1819 = v_1819 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1819.x + v_2627.x < v_1819.x); + v18 = v_1819.x; + v19 = v_1819.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 24) xor0 = v10 ^ v18; @@ -3553,11 +3596,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v18 + v26; - o1 = v19 + v27; - o1 = o1 + select(0u, 1u, o0 < v18); - v18 = o0; - v19 = o1; + v_1819.x = v18; + v_1819.y = v19; + v_2627.x = v26; + v_2627.y = v27; + v_1819 = v_1819 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1819.x + v_2627.x < v_1819.x); + v18 = v_1819.x; + v19 = v_1819.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 63) xor0 = v10 ^ v18; @@ -4166,11 +4213,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = xor0; // c = c + d - o0 = v18 + v26; - o1 = v19 + v27; - o1 = o1 + select(0u, 1u, o0 < v18); - v18 = o0; - v19 = o1; + v_1819.x = v18; + v_1819.y = v19; + v_2627.x = v26; + v_2627.y = v27; + v_1819 = v_1819 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1819.x + v_2627.x < v_1819.x); + v18 = v_1819.x; + v19 = v_1819.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 24) xor0 = v10 ^ v18; @@ -4206,11 +4257,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v18 + v26; - o1 = v19 + v27; - o1 = o1 + select(0u, 1u, o0 < v18); - v18 = o0; - v19 = o1; + v_1819.x = v18; + v_1819.y = v19; + v_2627.x = v26; + v_2627.y = v27; + v_1819 = v_1819 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1819.x + v_2627.x < v_1819.x); + v18 = v_1819.x; + v19 = v_1819.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 63) xor0 = v10 ^ v18; @@ -4822,11 +4877,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = xor0; // c = c + d - o0 = v18 + v26; - o1 = v19 + v27; - o1 = o1 + select(0u, 1u, o0 < v18); - v18 = o0; - v19 = o1; + v_1819.x = v18; + v_1819.y = v19; + v_2627.x = v26; + v_2627.y = v27; + v_1819 = v_1819 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1819.x + v_2627.x < v_1819.x); + v18 = v_1819.x; + v19 = v_1819.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 24) xor0 = v10 ^ v18; @@ -4862,11 +4921,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v18 + v26; - o1 = v19 + v27; - o1 = o1 + select(0u, 1u, o0 < v18); - v18 = o0; - v19 = o1; + v_1819.x = v18; + v_1819.y = v19; + v_2627.x = v26; + v_2627.y = v27; + v_1819 = v_1819 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1819.x + v_2627.x < v_1819.x); + v18 = v_1819.x; + v19 = v_1819.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 63) xor0 = v10 ^ v18; @@ -5475,11 +5538,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = xor0; // c = c + d - o0 = v18 + v26; - o1 = v19 + v27; - o1 = o1 + select(0u, 1u, o0 < v18); - v18 = o0; - v19 = o1; + v_1819.x = v18; + v_1819.y = v19; + v_2627.x = v26; + v_2627.y = v27; + v_1819 = v_1819 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1819.x + v_2627.x < v_1819.x); + v18 = v_1819.x; + v19 = v_1819.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 24) xor0 = v10 ^ v18; @@ -5515,11 +5582,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v18 + v26; - o1 = v19 + v27; - o1 = o1 + select(0u, 1u, o0 < v18); - v18 = o0; - v19 = o1; + v_1819.x = v18; + v_1819.y = v19; + v_2627.x = v26; + v_2627.y = v27; + v_1819 = v_1819 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1819.x + v_2627.x < v_1819.x); + v18 = v_1819.x; + v19 = v_1819.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 63) xor0 = v10 ^ v18; @@ -6125,11 +6196,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = xor0; // c = c + d - o0 = v18 + v26; - o1 = v19 + v27; - o1 = o1 + select(0u, 1u, o0 < v18); - v18 = o0; - v19 = o1; + v_1819.x = v18; + v_1819.y = v19; + v_2627.x = v26; + v_2627.y = v27; + v_1819 = v_1819 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1819.x + v_2627.x < v_1819.x); + v18 = v_1819.x; + v19 = v_1819.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 24) xor0 = v10 ^ v18; @@ -6162,11 +6237,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v18 + v26; - o1 = v19 + v27; - o1 = o1 + select(0u, 1u, o0 < v18); - v18 = o0; - v19 = o1; + v_1819.x = v18; + v_1819.y = v19; + v_2627.x = v26; + v_2627.y = v27; + v_1819 = v_1819 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1819.x + v_2627.x < v_1819.x); + v18 = v_1819.x; + v19 = v_1819.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 63) xor0 = v10 ^ v18; @@ -6772,11 +6851,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = xor0; // c = c + d - o0 = v18 + v26; - o1 = v19 + v27; - o1 = o1 + select(0u, 1u, o0 < v18); - v18 = o0; - v19 = o1; + v_1819.x = v18; + v_1819.y = v19; + v_2627.x = v26; + v_2627.y = v27; + v_1819 = v_1819 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1819.x + v_2627.x < v_1819.x); + v18 = v_1819.x; + v19 = v_1819.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 24) xor0 = v10 ^ v18; @@ -6809,11 +6892,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v18 + v26; - o1 = v19 + v27; - o1 = o1 + select(0u, 1u, o0 < v18); - v18 = o0; - v19 = o1; + v_1819.x = v18; + v_1819.y = v19; + v_2627.x = v26; + v_2627.y = v27; + v_1819 = v_1819 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1819.x + v_2627.x < v_1819.x); + v18 = v_1819.x; + v19 = v_1819.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 63) xor0 = v10 ^ v18; @@ -7431,11 +7518,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = xor0; // c = c + d - o0 = v18 + v26; - o1 = v19 + v27; - o1 = o1 + select(0u, 1u, o0 < v18); - v18 = o0; - v19 = o1; + v_1819.x = v18; + v_1819.y = v19; + v_2627.x = v26; + v_2627.y = v27; + v_1819 = v_1819 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1819.x + v_2627.x < v_1819.x); + v18 = v_1819.x; + v19 = v_1819.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 24) xor0 = v10 ^ v18; @@ -7471,11 +7562,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v18 + v26; - o1 = v19 + v27; - o1 = o1 + select(0u, 1u, o0 < v18); - v18 = o0; - v19 = o1; + v_1819.x = v18; + v_1819.y = v19; + v_2627.x = v26; + v_2627.y = v27; + v_1819 = v_1819 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1819.x + v_2627.x < v_1819.x); + v18 = v_1819.x; + v19 = v_1819.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 63) xor0 = v10 ^ v18; -- 2.34.1