From: Chris Duncan Date: Tue, 14 Jan 2025 18:49:23 +0000 (-0800) Subject: Replace more 32-bit scalar rotations with vector rotations. X-Git-Url: https://zoso.dev/?a=commitdiff_plain;h=0aeccea1eced8156586c73457aa9faad845fcb1b;p=nano-pow.git Replace more 32-bit scalar rotations with vector rotations. --- diff --git a/src/shaders/compute.wgsl b/src/shaders/compute.wgsl index ad68c43..e89e4f3 100644 --- a/src/shaders/compute.wgsl +++ b/src/shaders/compute.wgsl @@ -707,10 +707,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v5 = o1; // d = rotr64(d ^ a, 32) - xor0 = v26 ^ v4; - xor1 = v27 ^ v5; - v26 = xor1; - v27 = xor0; + v_45.x = v4; + v_45.y = v5; + v_2627.x = v26; + v_2627.y = v27; + v_2627 = v_2627 ^ v_45; + v26 = v_2627.y; + v27 = v_2627.x; // c = c + d v_1617.x = v16; @@ -1456,10 +1459,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v5 = o1; // d = rotr64(d ^ a, 32) - xor0 = v26 ^ v4; - xor1 = v27 ^ v5; - v26 = xor1; - v27 = xor0; + v_45.x = v4; + v_45.y = v5; + v_2627.x = v26; + v_2627.y = v27; + v_2627 = v_2627 ^ v_45; + v26 = v_2627.y; + v27 = v_2627.x; // c = c + d v_1617.x = v16; @@ -2196,10 +2202,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v5 = o1; // d = rotr64(d ^ a, 32) - xor0 = v26 ^ v4; - xor1 = v27 ^ v5; - v26 = xor1; - v27 = xor0; + v_45.x = v4; + v_45.y = v5; + v_2627.x = v26; + v_2627.y = v27; + v_2627 = v_2627 ^ v_45; + v26 = v_2627.y; + v27 = v_2627.x; // c = c + d v_1617.x = v16; @@ -2939,10 +2948,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { v5 = o1; // d = rotr64(d ^ a, 32) - xor0 = v26 ^ v4; - xor1 = v27 ^ v5; - v26 = xor1; - v27 = xor0; + v_45.x = v4; + v_45.y = v5; + v_2627.x = v26; + v_2627.y = v27; + v_2627 = v_2627 ^ v_45; + v26 = v_2627.y; + v27 = v_2627.x; // c = c + d v_1617.x = v16; @@ -3694,10 +3706,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v5 = o1; // d = rotr64(d ^ a, 32) - xor0 = v26 ^ v4; - xor1 = v27 ^ v5; - v26 = xor1; - v27 = xor0; + v_45.x = v4; + v_45.y = v5; + v_2627.x = v26; + v_2627.y = v27; + v_2627 = v_2627 ^ v_45; + v26 = v_2627.y; + v27 = v_2627.x; // c = c + d v_1617.x = v16; @@ -4449,10 +4464,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v5 = o1; // d = rotr64(d ^ a, 32) - xor0 = v26 ^ v4; - xor1 = v27 ^ v5; - v26 = xor1; - v27 = xor0; + v_45.x = v4; + v_45.y = v5; + v_2627.x = v26; + v_2627.y = v27; + v_2627 = v_2627 ^ v_45; + v26 = v_2627.y; + v27 = v_2627.x; // c = c + d v_1617.x = v16; @@ -5195,10 +5213,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v5 = o1; // d = rotr64(d ^ a, 32) - xor0 = v26 ^ v4; - xor1 = v27 ^ v5; - v26 = xor1; - v27 = xor0; + v_45.x = v4; + v_45.y = v5; + v_2627.x = v26; + v_2627.y = v27; + v_2627 = v_2627 ^ v_45; + v26 = v_2627.y; + v27 = v_2627.x; // c = c + d v_1617.x = v16; @@ -5941,10 +5962,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v5 = o1; // d = rotr64(d ^ a, 32) - xor0 = v26 ^ v4; - xor1 = v27 ^ v5; - v26 = xor1; - v27 = xor0; + v_45.x = v4; + v_45.y = v5; + v_2627.x = v26; + v_2627.y = v27; + v_2627 = v_2627 ^ v_45; + v26 = v_2627.y; + v27 = v_2627.x; // c = c + d v_1617.x = v16; @@ -6687,10 +6711,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { v5 = o1; // d = rotr64(d ^ a, 32) - xor0 = v26 ^ v4; - xor1 = v27 ^ v5; - v26 = xor1; - v27 = xor0; + v_45.x = v4; + v_45.y = v5; + v_2627.x = v26; + v_2627.y = v27; + v_2627 = v_2627 ^ v_45; + v26 = v_2627.y; + v27 = v_2627.x; // c = c + d v_1617.x = v16; @@ -7433,10 +7460,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { v5 = o1; // d = rotr64(d ^ a, 32) - xor0 = v26 ^ v4; - xor1 = v27 ^ v5; - v26 = xor1; - v27 = xor0; + v_45.x = v4; + v_45.y = v5; + v_2627.x = v26; + v_2627.y = v27; + v_2627 = v_2627 ^ v_45; + v26 = v_2627.y; + v27 = v_2627.x; // c = c + d v_1617.x = v16; @@ -8185,10 +8215,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v5 = o1; // d = rotr64(d ^ a, 32) - xor0 = v26 ^ v4; - xor1 = v27 ^ v5; - v26 = xor1; - v27 = xor0; + v_45.x = v4; + v_45.y = v5; + v_2627.x = v26; + v_2627.y = v27; + v_2627 = v_2627 ^ v_45; + v26 = v_2627.y; + v27 = v_2627.x; // c = c + d v_1617.x = v16; @@ -8934,10 +8967,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v5 = o1; // d = rotr64(d ^ a, 32) - xor0 = v26 ^ v4; - xor1 = v27 ^ v5; - v26 = xor1; - v27 = xor0; + v_45.x = v4; + v_45.y = v5; + v_2627.x = v26; + v_2627.y = v27; + v_2627 = v_2627 ^ v_45; + v26 = v_2627.y; + v27 = v_2627.x; // c = c + d v_1617.x = v16;