From 09ab6f27fce2e8a475b78a315b1ecd9e78bbab3f Mon Sep 17 00:00:00 2001 From: Chris Duncan Date: Tue, 14 Jan 2025 10:46:26 -0800 Subject: [PATCH] Replace more 32-bit scalar rotations with vector rotations. --- src/shaders/compute.wgsl | 132 +++++++++++++++++++++++++-------------- 1 file changed, 84 insertions(+), 48 deletions(-) diff --git a/src/shaders/compute.wgsl b/src/shaders/compute.wgsl index a484a8d..b26bb20 100644 --- a/src/shaders/compute.wgsl +++ b/src/shaders/compute.wgsl @@ -518,10 +518,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor0 = v30 ^ v0; - xor1 = v31 ^ v1; - v30 = xor1; - v31 = xor0; + v_01.x = v0; + v_01.y = v1; + v_3031.x = v30; + v_3031.y = v31; + v_3031 = v_3031 ^ v_01; + v30 = v_3031.y; + v31 = v_3031.x; // c = c + d v_2021.x = v20; @@ -1267,10 +1270,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { v1 = o1; // d = rotr64(d ^ a, 32) - xor0 = v30 ^ v0; - xor1 = v31 ^ v1; - v30 = xor1; - v31 = xor0; + v_01.x = v0; + v_01.y = v1; + v_3031.x = v30; + v_3031.y = v31; + v_3031 = v_3031 ^ v_01; + v30 = v_3031.y; + v31 = v_3031.x; // c = c + d v_2021.x = v20; @@ -1998,10 +2004,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor0 = v30 ^ v0; - xor1 = v31 ^ v1; - v30 = xor1; - v31 = xor0; + v_01.x = v0; + v_01.y = v1; + v_3031.x = v30; + v_3031.y = v31; + v_3031 = v_3031 ^ v_01; + v30 = v_3031.y; + v31 = v_3031.x; // c = c + d v_2021.x = v20; @@ -2735,10 +2744,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { v1 = o1; // d = rotr64(d ^ a, 32) - xor0 = v30 ^ v0; - xor1 = v31 ^ v1; - v30 = xor1; - v31 = xor0; + v_01.x = v0; + v_01.y = v1; + v_3031.x = v30; + v_3031.y = v31; + v_3031 = v_3031 ^ v_01; + v30 = v_3031.y; + v31 = v_3031.x; // c = c + d v_2021.x = v20; @@ -3475,10 +3487,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor0 = v30 ^ v0; - xor1 = v31 ^ v1; - v30 = xor1; - v31 = xor0; + v_01.x = v0; + v_01.y = v1; + v_3031.x = v30; + v_3031.y = v31; + v_3031 = v_3031 ^ v_01; + v30 = v_3031.y; + v31 = v_3031.x; // c = c + d v_2021.x = v20; @@ -4230,10 +4245,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { v1 = o1; // d = rotr64(d ^ a, 32) - xor0 = v30 ^ v0; - xor1 = v31 ^ v1; - v30 = xor1; - v31 = xor0; + v_01.x = v0; + v_01.y = v1; + v_3031.x = v30; + v_3031.y = v31; + v_3031 = v_3031 ^ v_01; + v30 = v_3031.y; + v31 = v_3031.x; // c = c + d v_2021.x = v20; @@ -4973,10 +4991,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { v1 = o1; // d = rotr64(d ^ a, 32) - xor0 = v30 ^ v0; - xor1 = v31 ^ v1; - v30 = xor1; - v31 = xor0; + v_01.x = v0; + v_01.y = v1; + v_3031.x = v30; + v_3031.y = v31; + v_3031 = v_3031 ^ v_01; + v30 = v_3031.y; + v31 = v_3031.x; // c = c + d v_2021.x = v20; @@ -5707,10 +5728,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor0 = v30 ^ v0; - xor1 = v31 ^ v1; - v30 = xor1; - v31 = xor0; + v_01.x = v0; + v_01.y = v1; + v_3031.x = v30; + v_3031.y = v31; + v_3031 = v_3031 ^ v_01; + v30 = v_3031.y; + v31 = v_3031.x; // c = c + d v_2021.x = v20; @@ -6447,10 +6471,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor0 = v30 ^ v0; - xor1 = v31 ^ v1; - v30 = xor1; - v31 = xor0; + v_01.x = v0; + v_01.y = v1; + v_3031.x = v30; + v_3031.y = v31; + v_3031 = v_3031 ^ v_01; + v30 = v_3031.y; + v31 = v_3031.x; // c = c + d v_2021.x = v20; @@ -7193,10 +7220,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor0 = v30 ^ v0; - xor1 = v31 ^ v1; - v30 = xor1; - v31 = xor0; + v_01.x = v0; + v_01.y = v1; + v_3031.x = v30; + v_3031.y = v31; + v_3031 = v_3031 ^ v_01; + v30 = v_3031.y; + v31 = v_3031.x; // c = c + d v_2021.x = v20; @@ -7936,10 +7966,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor0 = v30 ^ v0; - xor1 = v31 ^ v1; - v30 = xor1; - v31 = xor0; + v_01.x = v0; + v_01.y = v1; + v_3031.x = v30; + v_3031.y = v31; + v_3031 = v_3031 ^ v_01; + v30 = v_3031.y; + v31 = v_3031.x; // c = c + d v_2021.x = v20; @@ -8685,10 +8718,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { v1 = o1; // d = rotr64(d ^ a, 32) - xor0 = v30 ^ v0; - xor1 = v31 ^ v1; - v30 = xor1; - v31 = xor0; + v_01.x = v0; + v_01.y = v1; + v_3031.x = v30; + v_3031.y = v31; + v_3031 = v_3031 ^ v_01; + v30 = v_3031.y; + v31 = v_3031.x; // c = c + d v_2021.x = v20; -- 2.34.1