From: Chris Duncan Date: Tue, 14 Jan 2025 16:21:51 +0000 (-0800) Subject: Replace more scalar addition with vector addition. X-Git-Url: https://zoso.dev/?a=commitdiff_plain;h=b299692f121ca1075f8a3a7f18d5e19aabd145aa;p=nano-pow.git Replace more scalar addition with vector addition. --- diff --git a/src/shaders/compute.wgsl b/src/shaders/compute.wgsl index 3b9c769..53610b6 100644 --- a/src/shaders/compute.wgsl +++ b/src/shaders/compute.wgsl @@ -427,11 +427,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = xor0; // c = c + d - o0 = v22 + v30; - o1 = v23 + v31; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_3031.x = v30; + v_3031.y = v31; + v_2223 = v_2223 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_3031.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 24) xor0 = v14 ^ v22; @@ -467,11 +471,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v22 + v30; - o1 = v23 + v31; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_3031.x = v30; + v_3031.y = v31; + v_2223 = v_2223 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_3031.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 63) xor0 = v14 ^ v22; @@ -1124,11 +1132,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = xor0; // c = c + d - o0 = v22 + v30; - o1 = v23 + v31; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_3031.x = v30; + v_3031.y = v31; + v_2223 = v_2223 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_3031.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 24) xor0 = v14 ^ v22; @@ -1164,11 +1176,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v22 + v30; - o1 = v23 + v31; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_3031.x = v30; + v_3031.y = v31; + v_2223 = v_2223 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_3031.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 63) xor0 = v14 ^ v22; @@ -1806,11 +1822,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = xor0; // c = c + d - o0 = v22 + v30; - o1 = v23 + v31; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_3031.x = v30; + v_3031.y = v31; + v_2223 = v_2223 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_3031.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 24) xor0 = v14 ^ v22; @@ -1846,11 +1866,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v22 + v30; - o1 = v23 + v31; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_3031.x = v30; + v_3031.y = v31; + v_2223 = v_2223 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_3031.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 63) xor0 = v14 ^ v22; @@ -2491,11 +2515,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = xor0; // c = c + d - o0 = v22 + v30; - o1 = v23 + v31; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_3031.x = v30; + v_3031.y = v31; + v_2223 = v_2223 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_3031.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 24) xor0 = v14 ^ v22; @@ -2531,11 +2559,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v22 + v30; - o1 = v23 + v31; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_3031.x = v30; + v_3031.y = v31; + v_2223 = v_2223 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_3031.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 63) xor0 = v14 ^ v22; @@ -3173,11 +3205,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = xor0; // c = c + d - o0 = v22 + v30; - o1 = v23 + v31; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_3031.x = v30; + v_3031.y = v31; + v_2223 = v_2223 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_3031.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 24) xor0 = v14 ^ v22; @@ -3213,11 +3249,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v22 + v30; - o1 = v23 + v31; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_3031.x = v30; + v_3031.y = v31; + v_2223 = v_2223 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_3031.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 63) xor0 = v14 ^ v22; @@ -3861,11 +3901,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = xor0; // c = c + d - o0 = v22 + v30; - o1 = v23 + v31; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_3031.x = v30; + v_3031.y = v31; + v_2223 = v_2223 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_3031.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 24) xor0 = v14 ^ v22; @@ -3898,11 +3942,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v22 + v30; - o1 = v23 + v31; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_3031.x = v30; + v_3031.y = v31; + v_2223 = v_2223 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_3031.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 63) xor0 = v14 ^ v22; @@ -4546,11 +4594,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = xor0; // c = c + d - o0 = v22 + v30; - o1 = v23 + v31; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_3031.x = v30; + v_3031.y = v31; + v_2223 = v_2223 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_3031.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 24) xor0 = v14 ^ v22; @@ -4586,11 +4638,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v22 + v30; - o1 = v23 + v31; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_3031.x = v30; + v_3031.y = v31; + v_2223 = v_2223 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_3031.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 63) xor0 = v14 ^ v22; @@ -5231,11 +5287,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = xor0; // c = c + d - o0 = v22 + v30; - o1 = v23 + v31; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_3031.x = v30; + v_3031.y = v31; + v_2223 = v_2223 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_3031.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 24) xor0 = v14 ^ v22; @@ -5271,11 +5331,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v22 + v30; - o1 = v23 + v31; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_3031.x = v30; + v_3031.y = v31; + v_2223 = v_2223 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_3031.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 63) xor0 = v14 ^ v22; @@ -5916,11 +5980,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = xor0; // c = c + d - o0 = v22 + v30; - o1 = v23 + v31; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_3031.x = v30; + v_3031.y = v31; + v_2223 = v_2223 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_3031.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 24) xor0 = v14 ^ v22; @@ -5956,11 +6024,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v22 + v30; - o1 = v23 + v31; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_3031.x = v30; + v_3031.y = v31; + v_2223 = v_2223 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_3031.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 63) xor0 = v14 ^ v22; @@ -6598,11 +6670,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = xor0; // c = c + d - o0 = v22 + v30; - o1 = v23 + v31; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_3031.x = v30; + v_3031.y = v31; + v_2223 = v_2223 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_3031.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 24) xor0 = v14 ^ v22; @@ -6638,11 +6714,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v22 + v30; - o1 = v23 + v31; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_3031.x = v30; + v_3031.y = v31; + v_2223 = v_2223 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_3031.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 63) xor0 = v14 ^ v22; @@ -7277,11 +7357,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = xor0; // c = c + d - o0 = v22 + v30; - o1 = v23 + v31; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_3031.x = v30; + v_3031.y = v31; + v_2223 = v_2223 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_3031.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 24) xor0 = v14 ^ v22; @@ -7317,11 +7401,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v22 + v30; - o1 = v23 + v31; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_3031.x = v30; + v_3031.y = v31; + v_2223 = v_2223 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_3031.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 63) xor0 = v14 ^ v22; @@ -7974,11 +8062,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = xor0; // c = c + d - o0 = v22 + v30; - o1 = v23 + v31; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_3031.x = v30; + v_3031.y = v31; + v_2223 = v_2223 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_3031.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 24) xor0 = v14 ^ v22; @@ -8014,11 +8106,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v31 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v22 + v30; - o1 = v23 + v31; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_3031.x = v30; + v_3031.y = v31; + v_2223 = v_2223 + v_3031 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_3031.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v30 = v_3031.x; + v31 = v_3031.y; // b = rotr64(b ^ c, 63) xor0 = v14 ^ v22;