From e658330eed9429b29ec4984c587d36577b2af2a7 Mon Sep 17 00:00:00 2001 From: Chris Duncan Date: Tue, 14 Jan 2025 09:50:15 -0800 Subject: [PATCH] Replace some 32-bit scalar rotations with vector rotations. --- src/shaders/compute.wgsl | 84 +++++++++++++++++----------------------- 1 file changed, 36 insertions(+), 48 deletions(-) diff --git a/src/shaders/compute.wgsl b/src/shaders/compute.wgsl index 48cfd75..2349da9 100644 --- a/src/shaders/compute.wgsl +++ b/src/shaders/compute.wgsl @@ -145,10 +145,9 @@ fn main(@builtin(global_invocation_id) id: vec3) { v1 = o1; // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v0; - xor1 = v25 ^ v1; - v24 = xor1; - v25 = xor0; + v_2425 = v_2425 ^ v_01; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_1617.x = v16; @@ -878,10 +877,9 @@ fn main(@builtin(global_invocation_id) id: vec3) { // // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v0; - xor1 = v25 ^ v1; - v24 = xor1; - v25 = xor0; + v_2425 = v_2425 ^ v_01; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_1617.x = v16; @@ -1608,10 +1606,9 @@ fn main(@builtin(global_invocation_id) id: vec3) { // // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v0; - xor1 = v25 ^ v1; - v24 = xor1; - v25 = xor0; + v_2425 = v_2425 ^ v_01; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_1617.x = v16; @@ -2329,10 +2326,9 @@ fn main(@builtin(global_invocation_id) id: vec3) { // // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v0; - xor1 = v25 ^ v1; - v24 = xor1; - v25 = xor0; + v_2425 = v_2425 ^ v_01; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_1617.x = v16; @@ -3059,10 +3055,9 @@ fn main(@builtin(global_invocation_id) id: vec3) { // // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v0; - xor1 = v25 ^ v1; - v24 = xor1; - v25 = xor0; + v_2425 = v_2425 ^ v_01; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_1617.x = v16; @@ -3804,10 +3799,9 @@ fn main(@builtin(global_invocation_id) id: vec3) { v1 = o1; // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v0; - xor1 = v25 ^ v1; - v24 = xor1; - v25 = xor0; + v_2425 = v_2425 ^ v_01; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_1617.x = v16; @@ -4537,10 +4531,9 @@ fn main(@builtin(global_invocation_id) id: vec3) { // // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v0; - xor1 = v25 ^ v1; - v24 = xor1; - v25 = xor0; + v_2425 = v_2425 ^ v_01; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_1617.x = v16; @@ -5267,10 +5260,9 @@ fn main(@builtin(global_invocation_id) id: vec3) { // // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v0; - xor1 = v25 ^ v1; - v24 = xor1; - v25 = xor0; + v_2425 = v_2425 ^ v_01; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_1617.x = v16; @@ -5997,10 +5989,9 @@ fn main(@builtin(global_invocation_id) id: vec3) { // // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v0; - xor1 = v25 ^ v1; - v24 = xor1; - v25 = xor0; + v_2425 = v_2425 ^ v_01; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_1617.x = v16; @@ -6727,10 +6718,9 @@ fn main(@builtin(global_invocation_id) id: vec3) { // // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v0; - xor1 = v25 ^ v1; - v24 = xor1; - v25 = xor0; + v_2425 = v_2425 ^ v_01; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_1617.x = v16; @@ -7463,10 +7453,9 @@ fn main(@builtin(global_invocation_id) id: vec3) { v1 = o1; // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v0; - xor1 = v25 ^ v1; - v24 = xor1; - v25 = xor0; + v_2425 = v_2425 ^ v_01; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_1617.x = v16; @@ -8196,10 +8185,9 @@ fn main(@builtin(global_invocation_id) id: vec3) { // // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v0; - xor1 = v25 ^ v1; - v24 = xor1; - v25 = xor0; + v_2425 = v_2425 ^ v_01; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_1617.x = v16; -- 2.34.1