From cb178a8a636ec4dd71f9caeaf27777ebf8cc7013 Mon Sep 17 00:00:00 2001 From: Chris Duncan Date: Tue, 14 Jan 2025 08:19:59 -0800 Subject: [PATCH] Replace more scalar addition with vector addition. --- src/shaders/compute.wgsl | 336 +++++++++++++++++++++++++-------------- 1 file changed, 216 insertions(+), 120 deletions(-) diff --git a/src/shaders/compute.wgsl b/src/shaders/compute.wgsl index ec65fff..3b9c769 100644 --- a/src/shaders/compute.wgsl +++ b/src/shaders/compute.wgsl @@ -400,11 +400,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v6 + v14; - o1 = v7 + v15; - o1 = o1 + select(0u, 1u, o0 < v6); - v6 = o0; - v7 = o1; + v_67.x = v6; + v_67.y = v7; + v_1415.x = v14; + v_1415.y = v15; + v_67 = v_67 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_67.x + v_1415.x < v_67.x); + v6 = v_67.x; + v7 = v_67.y; + v14 = v_1415.x; + v15 = v_1415.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -436,11 +440,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v15 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v6 + v14; - o1 = v7 + v15; - o1 = o1 + select(0u, 1u, o0 < v6); - v6 = o0; - v7 = o1; + v_67.x = v6; + v_67.y = v7; + v_1415.x = v14; + v_1415.y = v15; + v_67 = v_67 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_67.x + v_1415.x < v_67.x); + v6 = v_67.x; + v7 = v_67.y; + v14 = v_1415.x; + v15 = v_1415.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -1089,11 +1097,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v6 + v14; - o1 = v7 + v15; - o1 = o1 + select(0u, 1u, o0 < v6); - v6 = o0; - v7 = o1; + v_67.x = v6; + v_67.y = v7; + v_1415.x = v14; + v_1415.y = v15; + v_67 = v_67 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_67.x + v_1415.x < v_67.x); + v6 = v_67.x; + v7 = v_67.y; + v14 = v_1415.x; + v15 = v_1415.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -1125,11 +1137,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v15 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v6 + v14; - o1 = v7 + v15; - o1 = o1 + select(0u, 1u, o0 < v6); - v6 = o0; - v7 = o1; + v_67.x = v6; + v_67.y = v7; + v_1415.x = v14; + v_1415.y = v15; + v_67 = v_67 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_67.x + v_1415.x < v_67.x); + v6 = v_67.x; + v7 = v_67.y; + v14 = v_1415.x; + v15 = v_1415.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -1763,11 +1779,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v6 + v14; - o1 = v7 + v15; - o1 = o1 + select(0u, 1u, o0 < v6); - v6 = o0; - v7 = o1; + v_67.x = v6; + v_67.y = v7; + v_1415.x = v14; + v_1415.y = v15; + v_67 = v_67 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_67.x + v_1415.x < v_67.x); + v6 = v_67.x; + v7 = v_67.y; + v14 = v_1415.x; + v15 = v_1415.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -1799,11 +1819,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v15 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v6 + v14; - o1 = v7 + v15; - o1 = o1 + select(0u, 1u, o0 < v6); - v6 = o0; - v7 = o1; + v_67.x = v6; + v_67.y = v7; + v_1415.x = v14; + v_1415.y = v15; + v_67 = v_67 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_67.x + v_1415.x < v_67.x); + v6 = v_67.x; + v7 = v_67.y; + v14 = v_1415.x; + v15 = v_1415.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -2440,11 +2464,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v6 + v14; - o1 = v7 + v15; - o1 = o1 + select(0u, 1u, o0 < v6); - v6 = o0; - v7 = o1; + v_67.x = v6; + v_67.y = v7; + v_1415.x = v14; + v_1415.y = v15; + v_67 = v_67 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_67.x + v_1415.x < v_67.x); + v6 = v_67.x; + v7 = v_67.y; + v14 = v_1415.x; + v15 = v_1415.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -2476,11 +2504,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v15 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v6 + v14; - o1 = v7 + v15; - o1 = o1 + select(0u, 1u, o0 < v6); - v6 = o0; - v7 = o1; + v_67.x = v6; + v_67.y = v7; + v_1415.x = v14; + v_1415.y = v15; + v_67 = v_67 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_67.x + v_1415.x < v_67.x); + v6 = v_67.x; + v7 = v_67.y; + v14 = v_1415.x; + v15 = v_1415.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -3114,11 +3146,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v6 + v14; - o1 = v7 + v15; - o1 = o1 + select(0u, 1u, o0 < v6); - v6 = o0; - v7 = o1; + v_67.x = v6; + v_67.y = v7; + v_1415.x = v14; + v_1415.y = v15; + v_67 = v_67 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_67.x + v_1415.x < v_67.x); + v6 = v_67.x; + v7 = v_67.y; + v14 = v_1415.x; + v15 = v_1415.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -3150,11 +3186,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v15 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v6 + v14; - o1 = v7 + v15; - o1 = o1 + select(0u, 1u, o0 < v6); - v6 = o0; - v7 = o1; + v_67.x = v6; + v_67.y = v7; + v_1415.x = v14; + v_1415.y = v15; + v_67 = v_67 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_67.x + v_1415.x < v_67.x); + v6 = v_67.x; + v7 = v_67.y; + v14 = v_1415.x; + v15 = v_1415.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -3794,11 +3834,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v6 + v14; - o1 = v7 + v15; - o1 = o1 + select(0u, 1u, o0 < v6); - v6 = o0; - v7 = o1; + v_67.x = v6; + v_67.y = v7; + v_1415.x = v14; + v_1415.y = v15; + v_67 = v_67 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_67.x + v_1415.x < v_67.x); + v6 = v_67.x; + v7 = v_67.y; + v14 = v_1415.x; + v15 = v_1415.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -3830,11 +3874,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v15 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v6 + v14; - o1 = v7 + v15; - o1 = o1 + select(0u, 1u, o0 < v6); - v6 = o0; - v7 = o1; + v_67.x = v6; + v_67.y = v7; + v_1415.x = v14; + v_1415.y = v15; + v_67 = v_67 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_67.x + v_1415.x < v_67.x); + v6 = v_67.x; + v7 = v_67.y; + v14 = v_1415.x; + v15 = v_1415.y; // a = a + m[sigma[r][2*i+1]] o0 = v6 + m6; @@ -4474,11 +4522,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v6 + v14; - o1 = v7 + v15; - o1 = o1 + select(0u, 1u, o0 < v6); - v6 = o0; - v7 = o1; + v_67.x = v6; + v_67.y = v7; + v_1415.x = v14; + v_1415.y = v15; + v_67 = v_67 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_67.x + v_1415.x < v_67.x); + v6 = v_67.x; + v7 = v_67.y; + v14 = v_1415.x; + v15 = v_1415.y; // a = a + m[sigma[r][2*i+0]] o0 = v6 + m8; @@ -4507,11 +4559,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v15 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v6 + v14; - o1 = v7 + v15; - o1 = o1 + select(0u, 1u, o0 < v6); - v6 = o0; - v7 = o1; + v_67.x = v6; + v_67.y = v7; + v_1415.x = v14; + v_1415.y = v15; + v_67 = v_67 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_67.x + v_1415.x < v_67.x); + v6 = v_67.x; + v7 = v_67.y; + v14 = v_1415.x; + v15 = v_1415.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -5151,11 +5207,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v6 + v14; - o1 = v7 + v15; - o1 = o1 + select(0u, 1u, o0 < v6); - v6 = o0; - v7 = o1; + v_67.x = v6; + v_67.y = v7; + v_1415.x = v14; + v_1415.y = v15; + v_67 = v_67 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_67.x + v_1415.x < v_67.x); + v6 = v_67.x; + v7 = v_67.y; + v14 = v_1415.x; + v15 = v_1415.y; // a = a + m[sigma[r][2*i+0]] o0 = v6 + m6; @@ -5184,11 +5244,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v15 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v6 + v14; - o1 = v7 + v15; - o1 = o1 + select(0u, 1u, o0 < v6); - v6 = o0; - v7 = o1; + v_67.x = v6; + v_67.y = v7; + v_1415.x = v14; + v_1415.y = v15; + v_67 = v_67 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_67.x + v_1415.x < v_67.x); + v6 = v_67.x; + v7 = v_67.y; + v14 = v_1415.x; + v15 = v_1415.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -5828,11 +5892,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v6 + v14; - o1 = v7 + v15; - o1 = o1 + select(0u, 1u, o0 < v6); - v6 = o0; - v7 = o1; + v_67.x = v6; + v_67.y = v7; + v_1415.x = v14; + v_1415.y = v15; + v_67 = v_67 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_67.x + v_1415.x < v_67.x); + v6 = v_67.x; + v7 = v_67.y; + v14 = v_1415.x; + v15 = v_1415.y; // a = a + m[sigma[r][2*i+0]] o0 = v6 + m0; @@ -5861,11 +5929,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v15 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v6 + v14; - o1 = v7 + v15; - o1 = o1 + select(0u, 1u, o0 < v6); - v6 = o0; - v7 = o1; + v_67.x = v6; + v_67.y = v7; + v_1415.x = v14; + v_1415.y = v15; + v_67 = v_67 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_67.x + v_1415.x < v_67.x); + v6 = v_67.x; + v7 = v_67.y; + v14 = v_1415.x; + v15 = v_1415.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -6502,11 +6574,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v6 + v14; - o1 = v7 + v15; - o1 = o1 + select(0u, 1u, o0 < v6); - v6 = o0; - v7 = o1; + v_67.x = v6; + v_67.y = v7; + v_1415.x = v14; + v_1415.y = v15; + v_67 = v_67 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_67.x + v_1415.x < v_67.x); + v6 = v_67.x; + v7 = v_67.y; + v14 = v_1415.x; + v15 = v_1415.y; // a = a + m[sigma[r][2*i+0]] o0 = v6 + m2; @@ -6535,11 +6611,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v15 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v6 + v14; - o1 = v7 + v15; - o1 = o1 + select(0u, 1u, o0 < v6); - v6 = o0; - v7 = o1; + v_67.x = v6; + v_67.y = v7; + v_1415.x = v14; + v_1415.y = v15; + v_67 = v_67 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_67.x + v_1415.x < v_67.x); + v6 = v_67.x; + v7 = v_67.y; + v14 = v_1415.x; + v15 = v_1415.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -7170,11 +7250,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v6 + v14; - o1 = v7 + v15; - o1 = o1 + select(0u, 1u, o0 < v6); - v6 = o0; - v7 = o1; + v_67.x = v6; + v_67.y = v7; + v_1415.x = v14; + v_1415.y = v15; + v_67 = v_67 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_67.x + v_1415.x < v_67.x); + v6 = v_67.x; + v7 = v_67.y; + v14 = v_1415.x; + v15 = v_1415.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -7206,11 +7290,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v15 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v6 + v14; - o1 = v7 + v15; - o1 = o1 + select(0u, 1u, o0 < v6); - v6 = o0; - v7 = o1; + v_67.x = v6; + v_67.y = v7; + v_1415.x = v14; + v_1415.y = v15; + v_67 = v_67 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_67.x + v_1415.x < v_67.x); + v6 = v_67.x; + v7 = v_67.y; + v14 = v_1415.x; + v15 = v_1415.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -7859,11 +7947,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v6 + v14; - o1 = v7 + v15; - o1 = o1 + select(0u, 1u, o0 < v6); - v6 = o0; - v7 = o1; + v_67.x = v6; + v_67.y = v7; + v_1415.x = v14; + v_1415.y = v15; + v_67 = v_67 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_67.x + v_1415.x < v_67.x); + v6 = v_67.x; + v7 = v_67.y; + v14 = v_1415.x; + v15 = v_1415.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -7895,11 +7987,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v15 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v6 + v14; - o1 = v7 + v15; - o1 = o1 + select(0u, 1u, o0 < v6); - v6 = o0; - v7 = o1; + v_67.x = v6; + v_67.y = v7; + v_1415.x = v14; + v_1415.y = v15; + v_67 = v_67 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_67.x + v_1415.x < v_67.x); + v6 = v_67.x; + v7 = v_67.y; + v14 = v_1415.x; + v15 = v_1415.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing -- 2.34.1