From 255fdbdfbe0788b977c0e0b12d328434103ea3d8 Mon Sep 17 00:00:00 2001 From: Chris Duncan Date: Tue, 14 Jan 2025 08:26:17 -0800 Subject: [PATCH] Repalce more scalar addition with vector addition. --- src/shaders/compute.wgsl | 336 +++++++++++++++++++++++++-------------- 1 file changed, 216 insertions(+), 120 deletions(-) diff --git a/src/shaders/compute.wgsl b/src/shaders/compute.wgsl index 3162f0a..ff73ed2 100644 --- a/src/shaders/compute.wgsl +++ b/src/shaders/compute.wgsl @@ -523,11 +523,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = xor0; // c = c + d - o0 = v20 + v30; - o1 = v21 + v31; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_3031.x = v30; + v_3031.y = v31; + v_2021 = v_2021 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_3031.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 24) xor0 = v10 ^ v20; @@ -563,11 +567,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v20 + v30; - o1 = v21 + v31; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_3031.x = v30; + v_3031.y = v31; + v_2021 = v_2021 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_3031.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 63) xor0 = v10 ^ v20; @@ -1233,11 +1241,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = xor0; // c = c + d - o0 = v20 + v30; - o1 = v21 + v31; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_3031.x = v30; + v_3031.y = v31; + v_2021 = v_2021 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_3031.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 24) xor0 = v10 ^ v20; @@ -1273,11 +1285,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v20 + v30; - o1 = v21 + v31; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_3031.x = v30; + v_3031.y = v31; + v_2021 = v_2021 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_3031.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 63) xor0 = v10 ^ v20; @@ -1934,11 +1950,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = xor0; // c = c + d - o0 = v20 + v30; - o1 = v21 + v31; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_3031.x = v30; + v_3031.y = v31; + v_2021 = v_2021 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_3031.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 24) xor0 = v10 ^ v20; @@ -1974,11 +1994,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v20 + v30; - o1 = v21 + v31; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_3031.x = v30; + v_3031.y = v31; + v_2021 = v_2021 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_3031.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 63) xor0 = v10 ^ v20; @@ -2632,11 +2656,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = xor0; // c = c + d - o0 = v20 + v30; - o1 = v21 + v31; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_3031.x = v30; + v_3031.y = v31; + v_2021 = v_2021 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_3031.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 24) xor0 = v10 ^ v20; @@ -2672,11 +2700,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v20 + v30; - o1 = v21 + v31; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_3031.x = v30; + v_3031.y = v31; + v_2021 = v_2021 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_3031.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 63) xor0 = v10 ^ v20; @@ -3333,11 +3365,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = xor0; // c = c + d - o0 = v20 + v30; - o1 = v21 + v31; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_3031.x = v30; + v_3031.y = v31; + v_2021 = v_2021 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_3031.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 24) xor0 = v10 ^ v20; @@ -3370,11 +3406,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v20 + v30; - o1 = v21 + v31; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_3031.x = v30; + v_3031.y = v31; + v_2021 = v_2021 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_3031.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 63) xor0 = v10 ^ v20; @@ -4031,11 +4071,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = xor0; // c = c + d - o0 = v20 + v30; - o1 = v21 + v31; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_3031.x = v30; + v_3031.y = v31; + v_2021 = v_2021 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_3031.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 24) xor0 = v10 ^ v20; @@ -4071,11 +4115,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v20 + v30; - o1 = v21 + v31; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_3031.x = v30; + v_3031.y = v31; + v_2021 = v_2021 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_3031.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 63) xor0 = v10 ^ v20; @@ -4735,11 +4783,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = xor0; // c = c + d - o0 = v20 + v30; - o1 = v21 + v31; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_3031.x = v30; + v_3031.y = v31; + v_2021 = v_2021 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_3031.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 24) xor0 = v10 ^ v20; @@ -4775,11 +4827,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v20 + v30; - o1 = v21 + v31; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_3031.x = v30; + v_3031.y = v31; + v_2021 = v_2021 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_3031.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 63) xor0 = v10 ^ v20; @@ -5439,11 +5495,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = xor0; // c = c + d - o0 = v20 + v30; - o1 = v21 + v31; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_3031.x = v30; + v_3031.y = v31; + v_2021 = v_2021 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_3031.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 24) xor0 = v10 ^ v20; @@ -5476,11 +5536,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v20 + v30; - o1 = v21 + v31; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_3031.x = v30; + v_3031.y = v31; + v_2021 = v_2021 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_3031.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 63) xor0 = v10 ^ v20; @@ -6140,11 +6204,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = xor0; // c = c + d - o0 = v20 + v30; - o1 = v21 + v31; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_3031.x = v30; + v_3031.y = v31; + v_2021 = v_2021 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_3031.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 24) xor0 = v10 ^ v20; @@ -6177,11 +6245,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v20 + v30; - o1 = v21 + v31; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_3031.x = v30; + v_3031.y = v31; + v_2021 = v_2021 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_3031.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 63) xor0 = v10 ^ v20; @@ -6838,11 +6910,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = xor0; // c = c + d - o0 = v20 + v30; - o1 = v21 + v31; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_3031.x = v30; + v_3031.y = v31; + v_2021 = v_2021 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_3031.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 24) xor0 = v10 ^ v20; @@ -6878,11 +6954,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v20 + v30; - o1 = v21 + v31; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_3031.x = v30; + v_3031.y = v31; + v_2021 = v_2021 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_3031.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 63) xor0 = v10 ^ v20; @@ -7533,11 +7613,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = xor0; // c = c + d - o0 = v20 + v30; - o1 = v21 + v31; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_3031.x = v30; + v_3031.y = v31; + v_2021 = v_2021 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_3031.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 24) xor0 = v10 ^ v20; @@ -7573,11 +7657,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v20 + v30; - o1 = v21 + v31; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_3031.x = v30; + v_3031.y = v31; + v_2021 = v_2021 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_3031.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 63) xor0 = v10 ^ v20; @@ -8243,11 +8331,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = xor0; // c = c + d - o0 = v20 + v30; - o1 = v21 + v31; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_3031.x = v30; + v_3031.y = v31; + v_2021 = v_2021 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_3031.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 24) xor0 = v10 ^ v20; @@ -8283,11 +8375,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v20 + v30; - o1 = v21 + v31; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_3031.x = v30; + v_3031.y = v31; + v_2021 = v_2021 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_3031.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 63) xor0 = v10 ^ v20; -- 2.34.1