From 16c60de6f5ac874dc1c12539793a6c8c81c06a03 Mon Sep 17 00:00:00 2001 From: Chris Duncan Date: Tue, 14 Jan 2025 10:47:56 -0800 Subject: [PATCH] Replace more 32-bit scalar rotations with vector rotations. --- src/shaders/compute.wgsl | 132 +++++++++++++++++++++++++-------------- 1 file changed, 84 insertions(+), 48 deletions(-) diff --git a/src/shaders/compute.wgsl b/src/shaders/compute.wgsl index b26bb20..ad68c43 100644 --- a/src/shaders/compute.wgsl +++ b/src/shaders/compute.wgsl @@ -608,10 +608,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v3 = o1; // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v2; - xor1 = v25 ^ v3; - v24 = xor1; - v25 = xor0; + v_23.x = v2; + v_23.y = v3; + v_2425.x = v24; + v_2425.y = v25; + v_2425 = v_2425 ^ v_23; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_2223.x = v22; @@ -1357,10 +1360,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { v3 = o1; // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v2; - xor1 = v25 ^ v3; - v24 = xor1; - v25 = xor0; + v_23.x = v2; + v_23.y = v3; + v_2425.x = v24; + v_2425.y = v25; + v_2425 = v_2425 ^ v_23; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_2223.x = v22; @@ -2091,10 +2097,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { v3 = o1; // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v2; - xor1 = v25 ^ v3; - v24 = xor1; - v25 = xor0; + v_23.x = v2; + v_23.y = v3; + v_2425.x = v24; + v_2425.y = v25; + v_2425 = v_2425 ^ v_23; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_2223.x = v22; @@ -2834,10 +2843,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v3 = o1; // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v2; - xor1 = v25 ^ v3; - v24 = xor1; - v25 = xor0; + v_23.x = v2; + v_23.y = v3; + v_2425.x = v24; + v_2425.y = v25; + v_2425 = v_2425 ^ v_23; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_2223.x = v22; @@ -3583,10 +3595,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v3 = o1; // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v2; - xor1 = v25 ^ v3; - v24 = xor1; - v25 = xor0; + v_23.x = v2; + v_23.y = v3; + v_2425.x = v24; + v_2425.y = v25; + v_2425 = v_2425 ^ v_23; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_2223.x = v22; @@ -4335,10 +4350,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v3 = o1; // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v2; - xor1 = v25 ^ v3; - v24 = xor1; - v25 = xor0; + v_23.x = v2; + v_23.y = v3; + v_2425.x = v24; + v_2425.y = v25; + v_2425 = v_2425 ^ v_23; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_2223.x = v22; @@ -5081,10 +5099,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v3 = o1; // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v2; - xor1 = v25 ^ v3; - v24 = xor1; - v25 = xor0; + v_23.x = v2; + v_23.y = v3; + v_2425.x = v24; + v_2425.y = v25; + v_2425 = v_2425 ^ v_23; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_2223.x = v22; @@ -5824,10 +5845,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v3 = o1; // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v2; - xor1 = v25 ^ v3; - v24 = xor1; - v25 = xor0; + v_23.x = v2; + v_23.y = v3; + v_2425.x = v24; + v_2425.y = v25; + v_2425 = v_2425 ^ v_23; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_2223.x = v22; @@ -6567,10 +6591,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v3 = o1; // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v2; - xor1 = v25 ^ v3; - v24 = xor1; - v25 = xor0; + v_23.x = v2; + v_23.y = v3; + v_2425.x = v24; + v_2425.y = v25; + v_2425 = v_2425 ^ v_23; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_2223.x = v22; @@ -7310,10 +7337,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v3 = o1; // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v2; - xor1 = v25 ^ v3; - v24 = xor1; - v25 = xor0; + v_23.x = v2; + v_23.y = v3; + v_2425.x = v24; + v_2425.y = v25; + v_2425 = v_2425 ^ v_23; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_2223.x = v22; @@ -8056,10 +8086,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v3 = o1; // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v2; - xor1 = v25 ^ v3; - v24 = xor1; - v25 = xor0; + v_23.x = v2; + v_23.y = v3; + v_2425.x = v24; + v_2425.y = v25; + v_2425 = v_2425 ^ v_23; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_2223.x = v22; @@ -8805,10 +8838,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { v3 = o1; // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v2; - xor1 = v25 ^ v3; - v24 = xor1; - v25 = xor0; + v_23.x = v2; + v_23.y = v3; + v_2425.x = v24; + v_2425.y = v25; + v_2425 = v_2425 ^ v_23; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_2223.x = v22; -- 2.34.1