From 703d431ff98ebf31fd8a630643913dcc81bdff47 Mon Sep 17 00:00:00 2001 From: Chris Duncan Date: Tue, 14 Jan 2025 08:13:34 -0800 Subject: [PATCH] Replace more scalar addition with vector addition. --- src/shaders/compute.wgsl | 336 +++++++++++++++++++++++++-------------- 1 file changed, 216 insertions(+), 120 deletions(-) diff --git a/src/shaders/compute.wgsl b/src/shaders/compute.wgsl index 8dc9871..ea92d9a 100644 --- a/src/shaders/compute.wgsl +++ b/src/shaders/compute.wgsl @@ -307,11 +307,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v4 + v12; - o1 = v5 + v13; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1213.x = v12; + v_1213.y = v13; + v_45 = v_45 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1213.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v12 = v_1213.x; + v13 = v_1213.y; // a = a + m[sigma[r][2*i+0]] o0 = v4 + m8; @@ -340,11 +344,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v13 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v4 + v12; - o1 = v5 + v13; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1213.x = v12; + v_1213.y = v13; + v_45 = v_45 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1213.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v12 = v_1213.x; + v13 = v_1213.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -977,11 +985,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v4 + v12; - o1 = v5 + v13; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1213.x = v12; + v_1213.y = v13; + v_45 = v_45 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1213.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v12 = v_1213.x; + v13 = v_1213.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -1013,11 +1025,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v13 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v4 + v12; - o1 = v5 + v13; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1213.x = v12; + v_1213.y = v13; + v_45 = v_45 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1213.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v12 = v_1213.x; + v13 = v_1213.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -1638,11 +1654,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v4 + v12; - o1 = v5 + v13; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1213.x = v12; + v_1213.y = v13; + v_45 = v_45 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1213.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v12 = v_1213.x; + v13 = v_1213.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -1674,11 +1694,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v13 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v4 + v12; - o1 = v5 + v13; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1213.x = v12; + v_1213.y = v13; + v_45 = v_45 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1213.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v12 = v_1213.x; + v13 = v_1213.y; // a = a + m[sigma[r][2*i+1]] o0 = v4 + m4; @@ -2296,11 +2320,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v4 + v12; - o1 = v5 + v13; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1213.x = v12; + v_1213.y = v13; + v_45 = v_45 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1213.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v12 = v_1213.x; + v13 = v_1213.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -2332,11 +2360,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v13 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v4 + v12; - o1 = v5 + v13; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1213.x = v12; + v_1213.y = v13; + v_45 = v_45 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1213.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v12 = v_1213.x; + v13 = v_1213.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -2960,11 +2992,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v4 + v12; - o1 = v5 + v13; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1213.x = v12; + v_1213.y = v13; + v_45 = v_45 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1213.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v12 = v_1213.x; + v13 = v_1213.y; // a = a + m[sigma[r][2*i+0]] o0 = v4 + m4; @@ -2993,11 +3029,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v13 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v4 + v12; - o1 = v5 + v13; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1213.x = v12; + v_1213.y = v13; + v_45 = v_45 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1213.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v12 = v_1213.x; + v13 = v_1213.y; // a = a + m[sigma[r][2*i+1]] o0 = v4 + m8; @@ -3621,11 +3661,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v4 + v12; - o1 = v5 + v13; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1213.x = v12; + v_1213.y = v13; + v_45 = v_45 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1213.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v12 = v_1213.x; + v13 = v_1213.y; // a = a + m[sigma[r][2*i+0]] o0 = v4 + m0; @@ -3654,11 +3698,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v13 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v4 + v12; - o1 = v5 + v13; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1213.x = v12; + v_1213.y = v13; + v_45 = v_45 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1213.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v12 = v_1213.x; + v13 = v_1213.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -4282,11 +4330,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v4 + v12; - o1 = v5 + v13; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1213.x = v12; + v_1213.y = v13; + v_45 = v_45 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1213.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v12 = v_1213.x; + v13 = v_1213.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -4318,11 +4370,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v13 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v4 + v12; - o1 = v5 + v13; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1213.x = v12; + v_1213.y = v13; + v_45 = v_45 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1213.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v12 = v_1213.x; + v13 = v_1213.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -4946,11 +5002,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v4 + v12; - o1 = v5 + v13; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1213.x = v12; + v_1213.y = v13; + v_45 = v_45 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1213.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v12 = v_1213.x; + v13 = v_1213.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -4982,11 +5042,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v13 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v4 + v12; - o1 = v5 + v13; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1213.x = v12; + v_1213.y = v13; + v_45 = v_45 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1213.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v12 = v_1213.x; + v13 = v_1213.y; // a = a + m[sigma[r][2*i+1]] o0 = v4 + m2; @@ -5607,11 +5671,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v4 + v12; - o1 = v5 + v13; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1213.x = v12; + v_1213.y = v13; + v_45 = v_45 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1213.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v12 = v_1213.x; + v13 = v_1213.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -5643,11 +5711,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v13 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v4 + v12; - o1 = v5 + v13; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1213.x = v12; + v_1213.y = v13; + v_45 = v_45 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1213.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v12 = v_1213.x; + v13 = v_1213.y; // a = a + m[sigma[r][2*i+1]] o0 = v4 + m6; @@ -6262,11 +6334,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v4 + v12; - o1 = v5 + v13; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1213.x = v12; + v_1213.y = v13; + v_45 = v_45 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1213.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v12 = v_1213.x; + v13 = v_1213.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -6298,11 +6374,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v13 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v4 + v12; - o1 = v5 + v13; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1213.x = v12; + v_1213.y = v13; + v_45 = v_45 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1213.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v12 = v_1213.x; + v13 = v_1213.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -6917,11 +6997,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v4 + v12; - o1 = v5 + v13; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1213.x = v12; + v_1213.y = v13; + v_45 = v_45 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1213.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v12 = v_1213.x; + v13 = v_1213.y; // a = a + m[sigma[r][2*i+0]] o0 = v4 + m8; @@ -6950,11 +7034,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v13 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v4 + v12; - o1 = v5 + v13; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1213.x = v12; + v_1213.y = v13; + v_45 = v_45 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1213.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v12 = v_1213.x; + v13 = v_1213.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -7587,11 +7675,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v4 + v12; - o1 = v5 + v13; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1213.x = v12; + v_1213.y = v13; + v_45 = v_45 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1213.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v12 = v_1213.x; + v13 = v_1213.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -7623,11 +7715,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v13 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v4 + v12; - o1 = v5 + v13; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1213.x = v12; + v_1213.y = v13; + v_45 = v_45 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1213.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v12 = v_1213.x; + v13 = v_1213.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing -- 2.34.1