From 0aeccea1eced8156586c73457aa9faad845fcb1b Mon Sep 17 00:00:00 2001 From: Chris Duncan Date: Tue, 14 Jan 2025 10:49:23 -0800 Subject: [PATCH] Replace more 32-bit scalar rotations with vector rotations. --- src/shaders/compute.wgsl | 132 +++++++++++++++++++++++++-------------- 1 file changed, 84 insertions(+), 48 deletions(-) diff --git a/src/shaders/compute.wgsl b/src/shaders/compute.wgsl index ad68c43..e89e4f3 100644 --- a/src/shaders/compute.wgsl +++ b/src/shaders/compute.wgsl @@ -707,10 +707,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v5 = o1; // d = rotr64(d ^ a, 32) - xor0 = v26 ^ v4; - xor1 = v27 ^ v5; - v26 = xor1; - v27 = xor0; + v_45.x = v4; + v_45.y = v5; + v_2627.x = v26; + v_2627.y = v27; + v_2627 = v_2627 ^ v_45; + v26 = v_2627.y; + v27 = v_2627.x; // c = c + d v_1617.x = v16; @@ -1456,10 +1459,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v5 = o1; // d = rotr64(d ^ a, 32) - xor0 = v26 ^ v4; - xor1 = v27 ^ v5; - v26 = xor1; - v27 = xor0; + v_45.x = v4; + v_45.y = v5; + v_2627.x = v26; + v_2627.y = v27; + v_2627 = v_2627 ^ v_45; + v26 = v_2627.y; + v27 = v_2627.x; // c = c + d v_1617.x = v16; @@ -2196,10 +2202,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v5 = o1; // d = rotr64(d ^ a, 32) - xor0 = v26 ^ v4; - xor1 = v27 ^ v5; - v26 = xor1; - v27 = xor0; + v_45.x = v4; + v_45.y = v5; + v_2627.x = v26; + v_2627.y = v27; + v_2627 = v_2627 ^ v_45; + v26 = v_2627.y; + v27 = v_2627.x; // c = c + d v_1617.x = v16; @@ -2939,10 +2948,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { v5 = o1; // d = rotr64(d ^ a, 32) - xor0 = v26 ^ v4; - xor1 = v27 ^ v5; - v26 = xor1; - v27 = xor0; + v_45.x = v4; + v_45.y = v5; + v_2627.x = v26; + v_2627.y = v27; + v_2627 = v_2627 ^ v_45; + v26 = v_2627.y; + v27 = v_2627.x; // c = c + d v_1617.x = v16; @@ -3694,10 +3706,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v5 = o1; // d = rotr64(d ^ a, 32) - xor0 = v26 ^ v4; - xor1 = v27 ^ v5; - v26 = xor1; - v27 = xor0; + v_45.x = v4; + v_45.y = v5; + v_2627.x = v26; + v_2627.y = v27; + v_2627 = v_2627 ^ v_45; + v26 = v_2627.y; + v27 = v_2627.x; // c = c + d v_1617.x = v16; @@ -4449,10 +4464,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v5 = o1; // d = rotr64(d ^ a, 32) - xor0 = v26 ^ v4; - xor1 = v27 ^ v5; - v26 = xor1; - v27 = xor0; + v_45.x = v4; + v_45.y = v5; + v_2627.x = v26; + v_2627.y = v27; + v_2627 = v_2627 ^ v_45; + v26 = v_2627.y; + v27 = v_2627.x; // c = c + d v_1617.x = v16; @@ -5195,10 +5213,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v5 = o1; // d = rotr64(d ^ a, 32) - xor0 = v26 ^ v4; - xor1 = v27 ^ v5; - v26 = xor1; - v27 = xor0; + v_45.x = v4; + v_45.y = v5; + v_2627.x = v26; + v_2627.y = v27; + v_2627 = v_2627 ^ v_45; + v26 = v_2627.y; + v27 = v_2627.x; // c = c + d v_1617.x = v16; @@ -5941,10 +5962,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v5 = o1; // d = rotr64(d ^ a, 32) - xor0 = v26 ^ v4; - xor1 = v27 ^ v5; - v26 = xor1; - v27 = xor0; + v_45.x = v4; + v_45.y = v5; + v_2627.x = v26; + v_2627.y = v27; + v_2627 = v_2627 ^ v_45; + v26 = v_2627.y; + v27 = v_2627.x; // c = c + d v_1617.x = v16; @@ -6687,10 +6711,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { v5 = o1; // d = rotr64(d ^ a, 32) - xor0 = v26 ^ v4; - xor1 = v27 ^ v5; - v26 = xor1; - v27 = xor0; + v_45.x = v4; + v_45.y = v5; + v_2627.x = v26; + v_2627.y = v27; + v_2627 = v_2627 ^ v_45; + v26 = v_2627.y; + v27 = v_2627.x; // c = c + d v_1617.x = v16; @@ -7433,10 +7460,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { v5 = o1; // d = rotr64(d ^ a, 32) - xor0 = v26 ^ v4; - xor1 = v27 ^ v5; - v26 = xor1; - v27 = xor0; + v_45.x = v4; + v_45.y = v5; + v_2627.x = v26; + v_2627.y = v27; + v_2627 = v_2627 ^ v_45; + v26 = v_2627.y; + v27 = v_2627.x; // c = c + d v_1617.x = v16; @@ -8185,10 +8215,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v5 = o1; // d = rotr64(d ^ a, 32) - xor0 = v26 ^ v4; - xor1 = v27 ^ v5; - v26 = xor1; - v27 = xor0; + v_45.x = v4; + v_45.y = v5; + v_2627.x = v26; + v_2627.y = v27; + v_2627 = v_2627 ^ v_45; + v26 = v_2627.y; + v27 = v_2627.x; // c = c + d v_1617.x = v16; @@ -8934,10 +8967,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v5 = o1; // d = rotr64(d ^ a, 32) - xor0 = v26 ^ v4; - xor1 = v27 ^ v5; - v26 = xor1; - v27 = xor0; + v_45.x = v4; + v_45.y = v5; + v_2627.x = v26; + v_2627.y = v27; + v_2627 = v_2627 ^ v_45; + v26 = v_2627.y; + v27 = v_2627.x; // c = c + d v_1617.x = v16; -- 2.34.1