From 080b1aa04c9f039fbcd087088adc61e1aee8acd8 Mon Sep 17 00:00:00 2001 From: Chris Duncan Date: Tue, 14 Jan 2025 08:16:14 -0800 Subject: [PATCH] Replace more scalar addition with vector addition. --- src/shaders/compute.wgsl | 336 +++++++++++++++++++++++++-------------- 1 file changed, 216 insertions(+), 120 deletions(-) diff --git a/src/shaders/compute.wgsl b/src/shaders/compute.wgsl index ea92d9a..ec65fff 100644 --- a/src/shaders/compute.wgsl +++ b/src/shaders/compute.wgsl @@ -331,11 +331,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v29 = xor0; // c = c + d - o0 = v20 + v28; - o1 = v21 + v29; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_2829.x = v28; + v_2829.y = v29; + v_2021 = v_2021 + v_2829 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_2829.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v28 = v_2829.x; + v29 = v_2829.y; // b = rotr64(b ^ c, 24) xor0 = v12 ^ v20; @@ -371,11 +375,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v29 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v20 + v28; - o1 = v21 + v29; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_2829.x = v28; + v_2829.y = v29; + v_2021 = v_2021 + v_2829 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_2829.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v28 = v_2829.x; + v29 = v_2829.y; // b = rotr64(b ^ c, 63) xor0 = v12 ^ v20; @@ -1012,11 +1020,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v29 = xor0; // c = c + d - o0 = v20 + v28; - o1 = v21 + v29; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_2829.x = v28; + v_2829.y = v29; + v_2021 = v_2021 + v_2829 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_2829.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v28 = v_2829.x; + v29 = v_2829.y; // b = rotr64(b ^ c, 24) xor0 = v12 ^ v20; @@ -1052,11 +1064,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v29 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v20 + v28; - o1 = v21 + v29; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_2829.x = v28; + v_2829.y = v29; + v_2021 = v_2021 + v_2829 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_2829.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v28 = v_2829.x; + v29 = v_2829.y; // b = rotr64(b ^ c, 63) xor0 = v12 ^ v20; @@ -1681,11 +1697,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v29 = xor0; // c = c + d - o0 = v20 + v28; - o1 = v21 + v29; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_2829.x = v28; + v_2829.y = v29; + v_2021 = v_2021 + v_2829 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_2829.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v28 = v_2829.x; + v29 = v_2829.y; // b = rotr64(b ^ c, 24) xor0 = v12 ^ v20; @@ -1718,11 +1738,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v29 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v20 + v28; - o1 = v21 + v29; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_2829.x = v28; + v_2829.y = v29; + v_2021 = v_2021 + v_2829 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_2829.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v28 = v_2829.x; + v29 = v_2829.y; // b = rotr64(b ^ c, 63) xor0 = v12 ^ v20; @@ -2347,11 +2371,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v29 = xor0; // c = c + d - o0 = v20 + v28; - o1 = v21 + v29; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_2829.x = v28; + v_2829.y = v29; + v_2021 = v_2021 + v_2829 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_2829.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v28 = v_2829.x; + v29 = v_2829.y; // b = rotr64(b ^ c, 24) xor0 = v12 ^ v20; @@ -2387,11 +2415,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v29 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v20 + v28; - o1 = v21 + v29; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_2829.x = v28; + v_2829.y = v29; + v_2021 = v_2021 + v_2829 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_2829.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v28 = v_2829.x; + v29 = v_2829.y; // b = rotr64(b ^ c, 63) xor0 = v12 ^ v20; @@ -3016,11 +3048,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v29 = xor0; // c = c + d - o0 = v20 + v28; - o1 = v21 + v29; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_2829.x = v28; + v_2829.y = v29; + v_2021 = v_2021 + v_2829 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_2829.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v28 = v_2829.x; + v29 = v_2829.y; // b = rotr64(b ^ c, 24) xor0 = v12 ^ v20; @@ -3053,11 +3089,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v29 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v20 + v28; - o1 = v21 + v29; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_2829.x = v28; + v_2829.y = v29; + v_2021 = v_2021 + v_2829 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_2829.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v28 = v_2829.x; + v29 = v_2829.y; // b = rotr64(b ^ c, 63) xor0 = v12 ^ v20; @@ -3685,11 +3725,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v29 = xor0; // c = c + d - o0 = v20 + v28; - o1 = v21 + v29; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_2829.x = v28; + v_2829.y = v29; + v_2021 = v_2021 + v_2829 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_2829.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v28 = v_2829.x; + v29 = v_2829.y; // b = rotr64(b ^ c, 24) xor0 = v12 ^ v20; @@ -3725,11 +3769,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v29 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v20 + v28; - o1 = v21 + v29; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_2829.x = v28; + v_2829.y = v29; + v_2021 = v_2021 + v_2829 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_2829.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v28 = v_2829.x; + v29 = v_2829.y; // b = rotr64(b ^ c, 63) xor0 = v12 ^ v20; @@ -4357,11 +4405,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v29 = xor0; // c = c + d - o0 = v20 + v28; - o1 = v21 + v29; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_2829.x = v28; + v_2829.y = v29; + v_2021 = v_2021 + v_2829 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_2829.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v28 = v_2829.x; + v29 = v_2829.y; // b = rotr64(b ^ c, 24) xor0 = v12 ^ v20; @@ -4397,11 +4449,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v29 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v20 + v28; - o1 = v21 + v29; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_2829.x = v28; + v_2829.y = v29; + v_2021 = v_2021 + v_2829 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_2829.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v28 = v_2829.x; + v29 = v_2829.y; // b = rotr64(b ^ c, 63) xor0 = v12 ^ v20; @@ -5029,11 +5085,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v29 = xor0; // c = c + d - o0 = v20 + v28; - o1 = v21 + v29; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_2829.x = v28; + v_2829.y = v29; + v_2021 = v_2021 + v_2829 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_2829.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v28 = v_2829.x; + v29 = v_2829.y; // b = rotr64(b ^ c, 24) xor0 = v12 ^ v20; @@ -5066,11 +5126,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v29 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v20 + v28; - o1 = v21 + v29; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_2829.x = v28; + v_2829.y = v29; + v_2021 = v_2021 + v_2829 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_2829.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v28 = v_2829.x; + v29 = v_2829.y; // b = rotr64(b ^ c, 63) xor0 = v12 ^ v20; @@ -5698,11 +5762,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v29 = xor0; // c = c + d - o0 = v20 + v28; - o1 = v21 + v29; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_2829.x = v28; + v_2829.y = v29; + v_2021 = v_2021 + v_2829 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_2829.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v28 = v_2829.x; + v29 = v_2829.y; // b = rotr64(b ^ c, 24) xor0 = v12 ^ v20; @@ -5735,11 +5803,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v29 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v20 + v28; - o1 = v21 + v29; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_2829.x = v28; + v_2829.y = v29; + v_2021 = v_2021 + v_2829 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_2829.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v28 = v_2829.x; + v29 = v_2829.y; // b = rotr64(b ^ c, 63) xor0 = v12 ^ v20; @@ -6361,11 +6433,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v29 = xor0; // c = c + d - o0 = v20 + v28; - o1 = v21 + v29; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_2829.x = v28; + v_2829.y = v29; + v_2021 = v_2021 + v_2829 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_2829.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v28 = v_2829.x; + v29 = v_2829.y; // b = rotr64(b ^ c, 24) xor0 = v12 ^ v20; @@ -6401,11 +6477,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v29 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v20 + v28; - o1 = v21 + v29; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_2829.x = v28; + v_2829.y = v29; + v_2021 = v_2021 + v_2829 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_2829.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v28 = v_2829.x; + v29 = v_2829.y; // b = rotr64(b ^ c, 63) xor0 = v12 ^ v20; @@ -7021,11 +7101,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v29 = xor0; // c = c + d - o0 = v20 + v28; - o1 = v21 + v29; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_2829.x = v28; + v_2829.y = v29; + v_2021 = v_2021 + v_2829 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_2829.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v28 = v_2829.x; + v29 = v_2829.y; // b = rotr64(b ^ c, 24) xor0 = v12 ^ v20; @@ -7061,11 +7145,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v29 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v20 + v28; - o1 = v21 + v29; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_2829.x = v28; + v_2829.y = v29; + v_2021 = v_2021 + v_2829 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_2829.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v28 = v_2829.x; + v29 = v_2829.y; // b = rotr64(b ^ c, 63) xor0 = v12 ^ v20; @@ -7702,11 +7790,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v29 = xor0; // c = c + d - o0 = v20 + v28; - o1 = v21 + v29; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_2829.x = v28; + v_2829.y = v29; + v_2021 = v_2021 + v_2829 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_2829.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v28 = v_2829.x; + v29 = v_2829.y; // b = rotr64(b ^ c, 24) xor0 = v12 ^ v20; @@ -7742,11 +7834,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v29 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v20 + v28; - o1 = v21 + v29; - o1 = o1 + select(0u, 1u, o0 < v20); - v20 = o0; - v21 = o1; + v_2021.x = v20; + v_2021.y = v21; + v_2829.x = v28; + v_2829.y = v29; + v_2021 = v_2021 + v_2829 + select(vec2(0u), vec2(0u, 1u), v_2021.x + v_2829.x < v_2021.x); + v20 = v_2021.x; + v21 = v_2021.y; + v28 = v_2829.x; + v29 = v_2829.y; // b = rotr64(b ^ c, 63) xor0 = v12 ^ v20; -- 2.34.1