From 9a51ccb5b5eabac565ba912d5b5799e2f2ff30c6 Mon Sep 17 00:00:00 2001 From: Chris Duncan Date: Tue, 14 Jan 2025 11:20:39 -0800 Subject: [PATCH] Replace some 24-bit scalar rotations with vector rotations. --- src/shaders/compute.wgsl | 144 +++++++++++++++++++++++++-------------- 1 file changed, 92 insertions(+), 52 deletions(-) diff --git a/src/shaders/compute.wgsl b/src/shaders/compute.wgsl index a233b14..5a3d119 100644 --- a/src/shaders/compute.wgsl +++ b/src/shaders/compute.wgsl @@ -164,10 +164,14 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = v_2425.y; // b = rotr64(b ^ c, 24) - xor0 = v8 ^ v16; - xor1 = v9 ^ v17; - v8 = (xor0 >> 24u) ^ (xor1 << 8u); - v9 = (xor1 >> 24u) ^ (xor0 << 8u); + v_1617.x = v16; + v_1617.y = v17; + v_89.x = v8; + v_89.y = v9; + xor = v_89 ^ v_1617; + v_89 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); + v8 = v_89.x; + v9 = v_89.y; // a = a + b v_89.x = v8; @@ -807,10 +811,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v7 = o1; // d = rotr64(d ^ a, 32) - xor0 = v28 ^ v6; - xor1 = v29 ^ v7; - v28 = xor1; - v29 = xor0; + v_67.x = v6; + v_67.y = v7; + v_2829.x = v28; + v_2829.y = v29; + v_2829 = v_2829 ^ v_67; + v28 = v_2829.y; + v29 = v_2829.x; // c = c + d v_1819.x = v18; @@ -1559,10 +1566,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v7 = o1; // d = rotr64(d ^ a, 32) - xor0 = v28 ^ v6; - xor1 = v29 ^ v7; - v28 = xor1; - v29 = xor0; + v_67.x = v6; + v_67.y = v7; + v_2829.x = v28; + v_2829.y = v29; + v_2829 = v_2829 ^ v_67; + v28 = v_2829.y; + v29 = v_2829.x; // c = c + d v_1819.x = v18; @@ -2299,10 +2309,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v7 = o1; // d = rotr64(d ^ a, 32) - xor0 = v28 ^ v6; - xor1 = v29 ^ v7; - v28 = xor1; - v29 = xor0; + v_67.x = v6; + v_67.y = v7; + v_2829.x = v28; + v_2829.y = v29; + v_2829 = v_2829 ^ v_67; + v28 = v_2829.y; + v29 = v_2829.x; // c = c + d v_1819.x = v18; @@ -3045,10 +3058,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v7 = o1; // d = rotr64(d ^ a, 32) - xor0 = v28 ^ v6; - xor1 = v29 ^ v7; - v28 = xor1; - v29 = xor0; + v_67.x = v6; + v_67.y = v7; + v_2829.x = v28; + v_2829.y = v29; + v_2829 = v_2829 ^ v_67; + v28 = v_2829.y; + v29 = v_2829.x; // c = c + d v_1819.x = v18; @@ -3803,10 +3819,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { v7 = o1; // d = rotr64(d ^ a, 32) - xor0 = v28 ^ v6; - xor1 = v29 ^ v7; - v28 = xor1; - v29 = xor0; + v_67.x = v6; + v_67.y = v7; + v_2829.x = v28; + v_2829.y = v29; + v_2829 = v_2829 ^ v_67; + v28 = v_2829.y; + v29 = v_2829.x; // c = c + d v_1819.x = v18; @@ -4561,10 +4580,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { v7 = o1; // d = rotr64(d ^ a, 32) - xor0 = v28 ^ v6; - xor1 = v29 ^ v7; - v28 = xor1; - v29 = xor0; + v_67.x = v6; + v_67.y = v7; + v_2829.x = v28; + v_2829.y = v29; + v_2829 = v_2829 ^ v_67; + v28 = v_2829.y; + v29 = v_2829.x; // c = c + d v_1819.x = v18; @@ -5310,10 +5332,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v7 = o1; // d = rotr64(d ^ a, 32) - xor0 = v28 ^ v6; - xor1 = v29 ^ v7; - v28 = xor1; - v29 = xor0; + v_67.x = v6; + v_67.y = v7; + v_2829.x = v28; + v_2829.y = v29; + v_2829 = v_2829 ^ v_67; + v28 = v_2829.y; + v29 = v_2829.x; // c = c + d v_1819.x = v18; @@ -6059,10 +6084,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { v7 = o1; // d = rotr64(d ^ a, 32) - xor0 = v28 ^ v6; - xor1 = v29 ^ v7; - v28 = xor1; - v29 = xor0; + v_67.x = v6; + v_67.y = v7; + v_2829.x = v28; + v_2829.y = v29; + v_2829 = v_2829 ^ v_67; + v28 = v_2829.y; + v29 = v_2829.x; // c = c + d v_1819.x = v18; @@ -6808,10 +6836,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v7 = o1; // d = rotr64(d ^ a, 32) - xor0 = v28 ^ v6; - xor1 = v29 ^ v7; - v28 = xor1; - v29 = xor0; + v_67.x = v6; + v_67.y = v7; + v_2829.x = v28; + v_2829.y = v29; + v_2829 = v_2829 ^ v_67; + v28 = v_2829.y; + v29 = v_2829.x; // c = c + d v_1819.x = v18; @@ -7560,10 +7591,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v7 = o1; // d = rotr64(d ^ a, 32) - xor0 = v28 ^ v6; - xor1 = v29 ^ v7; - v28 = xor1; - v29 = xor0; + v_67.x = v6; + v_67.y = v7; + v_2829.x = v28; + v_2829.y = v29; + v_2829 = v_2829 ^ v_67; + v28 = v_2829.y; + v29 = v_2829.x; // c = c + d v_1819.x = v18; @@ -8315,10 +8349,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v7 = o1; // d = rotr64(d ^ a, 32) - xor0 = v28 ^ v6; - xor1 = v29 ^ v7; - v28 = xor1; - v29 = xor0; + v_67.x = v6; + v_67.y = v7; + v_2829.x = v28; + v_2829.y = v29; + v_2829 = v_2829 ^ v_67; + v28 = v_2829.y; + v29 = v_2829.x; // c = c + d v_1819.x = v18; @@ -9067,10 +9104,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v7 = o1; // d = rotr64(d ^ a, 32) - xor0 = v28 ^ v6; - xor1 = v29 ^ v7; - v28 = xor1; - v29 = xor0; + v_67.x = v6; + v_67.y = v7; + v_2829.x = v28; + v_2829.y = v29; + v_2829 = v_2829 ^ v_67; + v28 = v_2829.y; + v29 = v_2829.x; // c = c + d v_1819.x = v18; -- 2.34.1