From: Chris Duncan Date: Tue, 14 Jan 2025 18:42:22 +0000 (-0800) Subject: Replace more 32-bit scalar rotations with vector rotations. X-Git-Url: https://zoso.dev/?a=commitdiff_plain;h=4107f71f51f1ad6d102acc564fd8abc576627945;p=nano-pow.git Replace more 32-bit scalar rotations with vector rotations. --- diff --git a/src/shaders/compute.wgsl b/src/shaders/compute.wgsl index e025ff9..8b5ef92 100644 --- a/src/shaders/compute.wgsl +++ b/src/shaders/compute.wgsl @@ -329,10 +329,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { v5 = o1; // d = rotr64(d ^ a, 32) - xor0 = v28 ^ v4; - xor1 = v29 ^ v5; - v28 = xor1; - v29 = xor0; + v_45.x = v4; + v_45.y = v5; + v_2829.x = v28; + v_2829.y = v29; + v_2829 = v_2829 ^ v_45; + v28 = v_2829.y; + v29 = v_2829.x; // c = c + d v_2021.x = v20; @@ -1066,10 +1069,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v5 = o1; // d = rotr64(d ^ a, 32) - xor0 = v28 ^ v4; - xor1 = v29 ^ v5; - v28 = xor1; - v29 = xor0; + v_45.x = v4; + v_45.y = v5; + v_2829.x = v28; + v_2829.y = v29; + v_2829 = v_2829 ^ v_45; + v28 = v_2829.y; + v29 = v_2829.x; // c = c + d v_2021.x = v20; @@ -1800,10 +1806,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v5 = o1; // d = rotr64(d ^ a, 32) - xor0 = v28 ^ v4; - xor1 = v29 ^ v5; - v28 = xor1; - v29 = xor0; + v_45.x = v4; + v_45.y = v5; + v_2829.x = v28; + v_2829.y = v29; + v_2829 = v_2829 ^ v_45; + v28 = v_2829.y; + v29 = v_2829.x; // c = c + d v_2021.x = v20; @@ -2522,10 +2531,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v5 = o1; // d = rotr64(d ^ a, 32) - xor0 = v28 ^ v4; - xor1 = v29 ^ v5; - v28 = xor1; - v29 = xor0; + v_45.x = v4; + v_45.y = v5; + v_2829.x = v28; + v_2829.y = v29; + v_2829 = v_2829 ^ v_45; + v28 = v_2829.y; + v29 = v_2829.x; // c = c + d v_2021.x = v20; @@ -3265,10 +3277,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { v5 = o1; // d = rotr64(d ^ a, 32) - xor0 = v28 ^ v4; - xor1 = v29 ^ v5; - v28 = xor1; - v29 = xor0; + v_45.x = v4; + v_45.y = v5; + v_2829.x = v28; + v_2829.y = v29; + v_2829 = v_2829 ^ v_45; + v28 = v_2829.y; + v29 = v_2829.x; // c = c + d v_2021.x = v20; @@ -4008,10 +4023,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { v5 = o1; // d = rotr64(d ^ a, 32) - xor0 = v28 ^ v4; - xor1 = v29 ^ v5; - v28 = xor1; - v29 = xor0; + v_45.x = v4; + v_45.y = v5; + v_2829.x = v28; + v_2829.y = v29; + v_2829 = v_2829 ^ v_45; + v28 = v_2829.y; + v29 = v_2829.x; // c = c + d v_2021.x = v20; @@ -4745,10 +4763,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v5 = o1; // d = rotr64(d ^ a, 32) - xor0 = v28 ^ v4; - xor1 = v29 ^ v5; - v28 = xor1; - v29 = xor0; + v_45.x = v4; + v_45.y = v5; + v_2829.x = v28; + v_2829.y = v29; + v_2829 = v_2829 ^ v_45; + v28 = v_2829.y; + v29 = v_2829.x; // c = c + d v_2021.x = v20; @@ -5482,10 +5503,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v5 = o1; // d = rotr64(d ^ a, 32) - xor0 = v28 ^ v4; - xor1 = v29 ^ v5; - v28 = xor1; - v29 = xor0; + v_45.x = v4; + v_45.y = v5; + v_2829.x = v28; + v_2829.y = v29; + v_2829 = v_2829 ^ v_45; + v28 = v_2829.y; + v29 = v_2829.x; // c = c + d v_2021.x = v20; @@ -6216,10 +6240,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v5 = o1; // d = rotr64(d ^ a, 32) - xor0 = v28 ^ v4; - xor1 = v29 ^ v5; - v28 = xor1; - v29 = xor0; + v_45.x = v4; + v_45.y = v5; + v_2829.x = v28; + v_2829.y = v29; + v_2829 = v_2829 ^ v_45; + v28 = v_2829.y; + v29 = v_2829.x; // c = c + d v_2021.x = v20; @@ -6953,10 +6980,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v5 = o1; // d = rotr64(d ^ a, 32) - xor0 = v28 ^ v4; - xor1 = v29 ^ v5; - v28 = xor1; - v29 = xor0; + v_45.x = v4; + v_45.y = v5; + v_2829.x = v28; + v_2829.y = v29; + v_2829 = v_2829 ^ v_45; + v28 = v_2829.y; + v29 = v_2829.x; // c = c + d v_2021.x = v20; @@ -7687,10 +7717,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { v5 = o1; // d = rotr64(d ^ a, 32) - xor0 = v28 ^ v4; - xor1 = v29 ^ v5; - v28 = xor1; - v29 = xor0; + v_45.x = v4; + v_45.y = v5; + v_2829.x = v28; + v_2829.y = v29; + v_2829 = v_2829 ^ v_45; + v28 = v_2829.y; + v29 = v_2829.x; // c = c + d v_2021.x = v20; @@ -8424,10 +8457,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v5 = o1; // d = rotr64(d ^ a, 32) - xor0 = v28 ^ v4; - xor1 = v29 ^ v5; - v28 = xor1; - v29 = xor0; + v_45.x = v4; + v_45.y = v5; + v_2829.x = v28; + v_2829.y = v29; + v_2829 = v_2829 ^ v_45; + v28 = v_2829.y; + v29 = v_2829.x; // c = c + d v_2021.x = v20;