From: Chris Duncan Date: Tue, 14 Jan 2025 16:28:14 +0000 (-0800) Subject: Replace more scalar addition with vector addition. X-Git-Tag: v2.0.0~133 X-Git-Url: https://zoso.dev/?a=commitdiff_plain;h=9410edf379d49716fe2b2be4712d7c2a106b015a;p=nano-pow.git Replace more scalar addition with vector addition. --- diff --git a/src/shaders/compute.wgsl b/src/shaders/compute.wgsl index ff73ed2..c3fd9de 100644 --- a/src/shaders/compute.wgsl +++ b/src/shaders/compute.wgsl @@ -592,11 +592,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v2 + v12; - o1 = v3 + v13; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1213.x = v12; + v_1213.y = v13; + v_23 = v_23 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1213.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v12 = v_1213.x; + v13 = v_1213.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -628,11 +632,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v13 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v2 + v12; - o1 = v3 + v13; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1213.x = v12; + v_1213.y = v13; + v_23 = v_23 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1213.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v12 = v_1213.x; + v13 = v_1213.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -1310,11 +1318,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v2 + v12; - o1 = v3 + v13; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1213.x = v12; + v_1213.y = v13; + v_23 = v_23 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1213.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v12 = v_1213.x; + v13 = v_1213.y; // a = a + m[sigma[r][2*i+0]] o0 = v2 + m0; @@ -1343,11 +1355,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v13 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v2 + v12; - o1 = v3 + v13; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1213.x = v12; + v_1213.y = v13; + v_23 = v_23 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1213.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v12 = v_1213.x; + v13 = v_1213.y; // a = a + m[sigma[r][2*i+1]] o0 = v2 + m4; @@ -2019,11 +2035,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v2 + v12; - o1 = v3 + v13; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1213.x = v12; + v_1213.y = v13; + v_23 = v_23 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1213.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v12 = v_1213.x; + v13 = v_1213.y; // a = a + m[sigma[r][2*i+0]] o0 = v2 + m6; @@ -2052,11 +2072,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v13 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v2 + v12; - o1 = v3 + v13; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1213.x = v12; + v_1213.y = v13; + v_23 = v_23 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1213.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v12 = v_1213.x; + v13 = v_1213.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -2725,11 +2749,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v2 + v12; - o1 = v3 + v13; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1213.x = v12; + v_1213.y = v13; + v_23 = v_23 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1213.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v12 = v_1213.x; + v13 = v_1213.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -2761,11 +2789,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v13 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v2 + v12; - o1 = v3 + v13; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1213.x = v12; + v_1213.y = v13; + v_23 = v_23 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1213.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v12 = v_1213.x; + v13 = v_1213.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -3431,11 +3463,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v2 + v12; - o1 = v3 + v13; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1213.x = v12; + v_1213.y = v13; + v_23 = v_23 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1213.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v12 = v_1213.x; + v13 = v_1213.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -3467,11 +3503,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v13 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v2 + v12; - o1 = v3 + v13; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1213.x = v12; + v_1213.y = v13; + v_23 = v_23 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1213.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v12 = v_1213.x; + v13 = v_1213.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -4140,11 +4180,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v2 + v12; - o1 = v3 + v13; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1213.x = v12; + v_1213.y = v13; + v_23 = v_23 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1213.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v12 = v_1213.x; + v13 = v_1213.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -4176,11 +4220,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v13 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v2 + v12; - o1 = v3 + v13; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1213.x = v12; + v_1213.y = v13; + v_23 = v_23 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1213.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v12 = v_1213.x; + v13 = v_1213.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -4852,11 +4900,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v2 + v12; - o1 = v3 + v13; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1213.x = v12; + v_1213.y = v13; + v_23 = v_23 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1213.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v12 = v_1213.x; + v13 = v_1213.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -4888,11 +4940,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v13 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v2 + v12; - o1 = v3 + v13; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1213.x = v12; + v_1213.y = v13; + v_23 = v_23 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1213.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v12 = v_1213.x; + v13 = v_1213.y; // a = a + m[sigma[r][2*i+1]] o0 = v2 + m6; @@ -5561,11 +5617,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v2 + v12; - o1 = v3 + v13; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1213.x = v12; + v_1213.y = v13; + v_23 = v_23 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1213.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v12 = v_1213.x; + v13 = v_1213.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -5597,11 +5657,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v13 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v2 + v12; - o1 = v3 + v13; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1213.x = v12; + v_1213.y = v13; + v_23 = v_23 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1213.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v12 = v_1213.x; + v13 = v_1213.y; // a = a + m[sigma[r][2*i+1]] o0 = v2 + m8; @@ -6270,11 +6334,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v2 + v12; - o1 = v3 + v13; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1213.x = v12; + v_1213.y = v13; + v_23 = v_23 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1213.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v12 = v_1213.x; + v13 = v_1213.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -6306,11 +6374,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v13 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v2 + v12; - o1 = v3 + v13; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1213.x = v12; + v_1213.y = v13; + v_23 = v_23 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1213.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v12 = v_1213.x; + v13 = v_1213.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -6979,11 +7051,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v2 + v12; - o1 = v3 + v13; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1213.x = v12; + v_1213.y = v13; + v_23 = v_23 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1213.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v12 = v_1213.x; + v13 = v_1213.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -7015,11 +7091,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v13 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v2 + v12; - o1 = v3 + v13; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1213.x = v12; + v_1213.y = v13; + v_23 = v_23 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1213.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v12 = v_1213.x; + v13 = v_1213.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -7682,11 +7762,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v2 + v12; - o1 = v3 + v13; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1213.x = v12; + v_1213.y = v13; + v_23 = v_23 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1213.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v12 = v_1213.x; + v13 = v_1213.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -7718,11 +7802,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v13 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v2 + v12; - o1 = v3 + v13; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1213.x = v12; + v_1213.y = v13; + v_23 = v_23 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1213.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v12 = v_1213.x; + v13 = v_1213.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -8400,11 +8488,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v2 + v12; - o1 = v3 + v13; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1213.x = v12; + v_1213.y = v13; + v_23 = v_23 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1213.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v12 = v_1213.x; + v13 = v_1213.y; // a = a + m[sigma[r][2*i+0]] o0 = v2 + m0; @@ -8433,11 +8525,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v13 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v2 + v12; - o1 = v3 + v13; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1213.x = v12; + v_1213.y = v13; + v_23 = v_23 + v_1213 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1213.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v12 = v_1213.x; + v13 = v_1213.y; // a = a + m[sigma[r][2*i+1]] o0 = v2 + m4;