From: Chris Duncan Date: Tue, 14 Jan 2025 16:33:13 +0000 (-0800) Subject: Replace more scalar addition with vector addition. X-Git-Tag: v2.0.0~131 X-Git-Url: https://zoso.dev/?a=commitdiff_plain;h=ee940cdb9d1f34367efb800654862fbef1100402;p=nano-pow.git Replace more scalar addition with vector addition. --- diff --git a/src/shaders/compute.wgsl b/src/shaders/compute.wgsl index d3b1628..c8b6f83 100644 --- a/src/shaders/compute.wgsl +++ b/src/shaders/compute.wgsl @@ -715,11 +715,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = xor0; // c = c + d - o0 = v16 + v26; - o1 = v17 + v27; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2627.x = v26; + v_2627.y = v27; + v_1617 = v_1617 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2627.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 24) xor0 = v14 ^ v16; @@ -755,11 +759,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v16 + v26; - o1 = v17 + v27; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2627.x = v26; + v_2627.y = v27; + v_1617 = v_1617 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2627.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 63) xor0 = v14 ^ v16; @@ -1451,11 +1459,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = xor0; // c = c + d - o0 = v16 + v26; - o1 = v17 + v27; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2627.x = v26; + v_2627.y = v27; + v_1617 = v_1617 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2627.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 24) xor0 = v14 ^ v16; @@ -1491,11 +1503,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v16 + v26; - o1 = v17 + v27; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2627.x = v26; + v_2627.y = v27; + v_1617 = v_1617 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2627.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 63) xor0 = v14 ^ v16; @@ -2187,11 +2203,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = xor0; // c = c + d - o0 = v16 + v26; - o1 = v17 + v27; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2627.x = v26; + v_2627.y = v27; + v_1617 = v_1617 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2627.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 24) xor0 = v14 ^ v16; @@ -2224,11 +2244,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v16 + v26; - o1 = v17 + v27; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2627.x = v26; + v_2627.y = v27; + v_1617 = v_1617 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2627.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 63) xor0 = v14 ^ v16; @@ -2917,11 +2941,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = xor0; // c = c + d - o0 = v16 + v26; - o1 = v17 + v27; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2627.x = v26; + v_2627.y = v27; + v_1617 = v_1617 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2627.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 24) xor0 = v14 ^ v16; @@ -2954,11 +2982,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v16 + v26; - o1 = v17 + v27; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2627.x = v26; + v_2627.y = v27; + v_1617 = v_1617 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2627.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 63) xor0 = v14 ^ v16; @@ -3650,11 +3682,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = xor0; // c = c + d - o0 = v16 + v26; - o1 = v17 + v27; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2627.x = v26; + v_2627.y = v27; + v_1617 = v_1617 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2627.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 24) xor0 = v14 ^ v16; @@ -3690,11 +3726,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v16 + v26; - o1 = v17 + v27; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2627.x = v26; + v_2627.y = v27; + v_1617 = v_1617 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2627.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 63) xor0 = v14 ^ v16; @@ -4383,11 +4423,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = xor0; // c = c + d - o0 = v16 + v26; - o1 = v17 + v27; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2627.x = v26; + v_2627.y = v27; + v_1617 = v_1617 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2627.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 24) xor0 = v14 ^ v16; @@ -4423,11 +4467,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v16 + v26; - o1 = v17 + v27; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2627.x = v26; + v_2627.y = v27; + v_1617 = v_1617 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2627.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 63) xor0 = v14 ^ v16; @@ -5116,11 +5164,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = xor0; // c = c + d - o0 = v16 + v26; - o1 = v17 + v27; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2627.x = v26; + v_2627.y = v27; + v_1617 = v_1617 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2627.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 24) xor0 = v14 ^ v16; @@ -5153,11 +5205,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v16 + v26; - o1 = v17 + v27; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2627.x = v26; + v_2627.y = v27; + v_1617 = v_1617 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2627.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 63) xor0 = v14 ^ v16; @@ -5849,11 +5905,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = xor0; // c = c + d - o0 = v16 + v26; - o1 = v17 + v27; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2627.x = v26; + v_2627.y = v27; + v_1617 = v_1617 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2627.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 24) xor0 = v14 ^ v16; @@ -5889,11 +5949,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v16 + v26; - o1 = v17 + v27; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2627.x = v26; + v_2627.y = v27; + v_1617 = v_1617 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2627.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 63) xor0 = v14 ^ v16; @@ -6582,11 +6646,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = xor0; // c = c + d - o0 = v16 + v26; - o1 = v17 + v27; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2627.x = v26; + v_2627.y = v27; + v_1617 = v_1617 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2627.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 24) xor0 = v14 ^ v16; @@ -6619,11 +6687,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v16 + v26; - o1 = v17 + v27; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2627.x = v26; + v_2627.y = v27; + v_1617 = v_1617 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2627.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 63) xor0 = v14 ^ v16; @@ -7315,11 +7387,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = xor0; // c = c + d - o0 = v16 + v26; - o1 = v17 + v27; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2627.x = v26; + v_2627.y = v27; + v_1617 = v_1617 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2627.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 24) xor0 = v14 ^ v16; @@ -7355,11 +7431,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v16 + v26; - o1 = v17 + v27; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2627.x = v26; + v_2627.y = v27; + v_1617 = v_1617 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2627.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 63) xor0 = v14 ^ v16; @@ -8045,11 +8125,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = xor0; // c = c + d - o0 = v16 + v26; - o1 = v17 + v27; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2627.x = v26; + v_2627.y = v27; + v_1617 = v_1617 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2627.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 24) xor0 = v14 ^ v16; @@ -8085,11 +8169,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v16 + v26; - o1 = v17 + v27; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2627.x = v26; + v_2627.y = v27; + v_1617 = v_1617 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2627.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 63) xor0 = v14 ^ v16; @@ -8781,11 +8869,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = xor0; // c = c + d - o0 = v16 + v26; - o1 = v17 + v27; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2627.x = v26; + v_2627.y = v27; + v_1617 = v_1617 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2627.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 24) xor0 = v14 ^ v16; @@ -8821,11 +8913,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v16 + v26; - o1 = v17 + v27; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2627.x = v26; + v_2627.y = v27; + v_1617 = v_1617 + v_2627 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2627.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v26 = v_2627.x; + v27 = v_2627.y; // b = rotr64(b ^ c, 63) xor0 = v14 ^ v16;