From: Chris Duncan Date: Tue, 14 Jan 2025 19:29:22 +0000 (-0800) Subject: Replace more 24-bit scalar rotations with vector rotations. X-Git-Url: https://zoso.dev/?a=commitdiff_plain;h=00f73edbad590fcf9bbfa95d60fef3c9ed463acb;p=nano-pow.git Replace more 24-bit scalar rotations with vector rotations. --- diff --git a/src/shaders/compute.wgsl b/src/shaders/compute.wgsl index 2981957..a8e99a9 100644 --- a/src/shaders/compute.wgsl +++ b/src/shaders/compute.wgsl @@ -752,10 +752,14 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = v_2627.y; // b = rotr64(b ^ c, 24) - xor0 = v14 ^ v16; - xor1 = v15 ^ v17; - v14 = (xor0 >> 24u) ^ (xor1 << 8u); - v15 = (xor1 >> 24u) ^ (xor0 << 8u); + v_1617.x = v16; + v_1617.y = v17; + v_1415.x = v14; + v_1415.y = v15; + xor = v_1415 ^ v_1617; + v_1415 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); + v14 = v_1415.x; + v15 = v_1415.y; // a = a + b v_45.x = v4; @@ -1527,10 +1531,14 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = v_2627.y; // b = rotr64(b ^ c, 24) - xor0 = v14 ^ v16; - xor1 = v15 ^ v17; - v14 = (xor0 >> 24u) ^ (xor1 << 8u); - v15 = (xor1 >> 24u) ^ (xor0 << 8u); + v_1617.x = v16; + v_1617.y = v17; + v_1415.x = v14; + v_1415.y = v15; + xor = v_1415 ^ v_1617; + v_1415 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); + v14 = v_1415.x; + v15 = v_1415.y; // a = a + b v_45.x = v4; @@ -2293,10 +2301,14 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = v_2627.y; // b = rotr64(b ^ c, 24) - xor0 = v14 ^ v16; - xor1 = v15 ^ v17; - v14 = (xor0 >> 24u) ^ (xor1 << 8u); - v15 = (xor1 >> 24u) ^ (xor0 << 8u); + v_1617.x = v16; + v_1617.y = v17; + v_1415.x = v14; + v_1415.y = v15; + xor = v_1415 ^ v_1617; + v_1415 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); + v14 = v_1415.x; + v15 = v_1415.y; // a = a + b v_45.x = v4; @@ -3062,10 +3074,14 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = v_2627.y; // b = rotr64(b ^ c, 24) - xor0 = v14 ^ v16; - xor1 = v15 ^ v17; - v14 = (xor0 >> 24u) ^ (xor1 << 8u); - v15 = (xor1 >> 24u) ^ (xor0 << 8u); + v_1617.x = v16; + v_1617.y = v17; + v_1415.x = v14; + v_1415.y = v15; + xor = v_1415 ^ v_1617; + v_1415 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); + v14 = v_1415.x; + v15 = v_1415.y; // a = a + b v_45.x = v4; @@ -3843,10 +3859,14 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = v_2627.y; // b = rotr64(b ^ c, 24) - xor0 = v14 ^ v16; - xor1 = v15 ^ v17; - v14 = (xor0 >> 24u) ^ (xor1 << 8u); - v15 = (xor1 >> 24u) ^ (xor0 << 8u); + v_1617.x = v16; + v_1617.y = v17; + v_1415.x = v14; + v_1415.y = v15; + xor = v_1415 ^ v_1617; + v_1415 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); + v14 = v_1415.x; + v15 = v_1415.y; // a = a + b v_45.x = v4; @@ -4624,10 +4644,14 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = v_2627.y; // b = rotr64(b ^ c, 24) - xor0 = v14 ^ v16; - xor1 = v15 ^ v17; - v14 = (xor0 >> 24u) ^ (xor1 << 8u); - v15 = (xor1 >> 24u) ^ (xor0 << 8u); + v_1617.x = v16; + v_1617.y = v17; + v_1415.x = v14; + v_1415.y = v15; + xor = v_1415 ^ v_1617; + v_1415 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); + v14 = v_1415.x; + v15 = v_1415.y; // a = a + b v_45.x = v4; @@ -5396,10 +5420,14 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = v_2627.y; // b = rotr64(b ^ c, 24) - xor0 = v14 ^ v16; - xor1 = v15 ^ v17; - v14 = (xor0 >> 24u) ^ (xor1 << 8u); - v15 = (xor1 >> 24u) ^ (xor0 << 8u); + v_1617.x = v16; + v_1617.y = v17; + v_1415.x = v14; + v_1415.y = v15; + xor = v_1415 ^ v_1617; + v_1415 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); + v14 = v_1415.x; + v15 = v_1415.y; // a = a + b v_45.x = v4; @@ -6168,10 +6196,14 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = v_2627.y; // b = rotr64(b ^ c, 24) - xor0 = v14 ^ v16; - xor1 = v15 ^ v17; - v14 = (xor0 >> 24u) ^ (xor1 << 8u); - v15 = (xor1 >> 24u) ^ (xor0 << 8u); + v_1617.x = v16; + v_1617.y = v17; + v_1415.x = v14; + v_1415.y = v15; + xor = v_1415 ^ v_1617; + v_1415 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); + v14 = v_1415.x; + v15 = v_1415.y; // a = a + b v_45.x = v4; @@ -6940,10 +6972,14 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = v_2627.y; // b = rotr64(b ^ c, 24) - xor0 = v14 ^ v16; - xor1 = v15 ^ v17; - v14 = (xor0 >> 24u) ^ (xor1 << 8u); - v15 = (xor1 >> 24u) ^ (xor0 << 8u); + v_1617.x = v16; + v_1617.y = v17; + v_1415.x = v14; + v_1415.y = v15; + xor = v_1415 ^ v_1617; + v_1415 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); + v14 = v_1415.x; + v15 = v_1415.y; // a = a + b v_45.x = v4; @@ -7712,10 +7748,14 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = v_2627.y; // b = rotr64(b ^ c, 24) - xor0 = v14 ^ v16; - xor1 = v15 ^ v17; - v14 = (xor0 >> 24u) ^ (xor1 << 8u); - v15 = (xor1 >> 24u) ^ (xor0 << 8u); + v_1617.x = v16; + v_1617.y = v17; + v_1415.x = v14; + v_1415.y = v15; + xor = v_1415 ^ v_1617; + v_1415 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); + v14 = v_1415.x; + v15 = v_1415.y; // a = a + b v_45.x = v4; @@ -8490,10 +8530,14 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = v_2627.y; // b = rotr64(b ^ c, 24) - xor0 = v14 ^ v16; - xor1 = v15 ^ v17; - v14 = (xor0 >> 24u) ^ (xor1 << 8u); - v15 = (xor1 >> 24u) ^ (xor0 << 8u); + v_1617.x = v16; + v_1617.y = v17; + v_1415.x = v14; + v_1415.y = v15; + xor = v_1415 ^ v_1617; + v_1415 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); + v14 = v_1415.x; + v15 = v_1415.y; // a = a + b v_45.x = v4; @@ -9265,10 +9309,14 @@ fn main(@builtin(global_invocation_id) id: vec3) { v27 = v_2627.y; // b = rotr64(b ^ c, 24) - xor0 = v14 ^ v16; - xor1 = v15 ^ v17; - v14 = (xor0 >> 24u) ^ (xor1 << 8u); - v15 = (xor1 >> 24u) ^ (xor0 << 8u); + v_1617.x = v16; + v_1617.y = v17; + v_1415.x = v14; + v_1415.y = v15; + xor = v_1415 ^ v_1617; + v_1415 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); + v14 = v_1415.x; + v15 = v_1415.y; // a = a + b v_45.x = v4;