]> zoso.dev Git - nano-pow.git/commitdiff
Replace more 32-bit scalar rotations with vector rotations.
authorChris Duncan <chris@zoso.dev>
Tue, 14 Jan 2025 18:42:22 +0000 (10:42 -0800)
committerChris Duncan <chris@zoso.dev>
Tue, 14 Jan 2025 18:42:22 +0000 (10:42 -0800)
src/shaders/compute.wgsl

index e025ff92c1206ff9443c74daa4461ddb0e1b3cc2..8b5ef92f3de84b1e8043f042acbcb9d5509455b8 100644 (file)
@@ -329,10 +329,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        v5 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v28 ^ v4;
-       xor1 = v29 ^ v5;
-       v28 = xor1;
-       v29 = xor0;
+       v_45.x = v4;
+       v_45.y = v5;
+       v_2829.x = v28;
+       v_2829.y = v29;
+       v_2829 = v_2829 ^ v_45;
+       v28 = v_2829.y;
+       v29 = v_2829.x;
 
        // c = c + d
        v_2021.x = v20;
@@ -1066,10 +1069,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        // v5 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v28 ^ v4;
-       xor1 = v29 ^ v5;
-       v28 = xor1;
-       v29 = xor0;
+       v_45.x = v4;
+       v_45.y = v5;
+       v_2829.x = v28;
+       v_2829.y = v29;
+       v_2829 = v_2829 ^ v_45;
+       v28 = v_2829.y;
+       v29 = v_2829.x;
 
        // c = c + d
        v_2021.x = v20;
@@ -1800,10 +1806,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        // v5 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v28 ^ v4;
-       xor1 = v29 ^ v5;
-       v28 = xor1;
-       v29 = xor0;
+       v_45.x = v4;
+       v_45.y = v5;
+       v_2829.x = v28;
+       v_2829.y = v29;
+       v_2829 = v_2829 ^ v_45;
+       v28 = v_2829.y;
+       v29 = v_2829.x;
 
        // c = c + d
        v_2021.x = v20;
@@ -2522,10 +2531,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        // v5 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v28 ^ v4;
-       xor1 = v29 ^ v5;
-       v28 = xor1;
-       v29 = xor0;
+       v_45.x = v4;
+       v_45.y = v5;
+       v_2829.x = v28;
+       v_2829.y = v29;
+       v_2829 = v_2829 ^ v_45;
+       v28 = v_2829.y;
+       v29 = v_2829.x;
 
        // c = c + d
        v_2021.x = v20;
@@ -3265,10 +3277,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        v5 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v28 ^ v4;
-       xor1 = v29 ^ v5;
-       v28 = xor1;
-       v29 = xor0;
+       v_45.x = v4;
+       v_45.y = v5;
+       v_2829.x = v28;
+       v_2829.y = v29;
+       v_2829 = v_2829 ^ v_45;
+       v28 = v_2829.y;
+       v29 = v_2829.x;
 
        // c = c + d
        v_2021.x = v20;
@@ -4008,10 +4023,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        v5 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v28 ^ v4;
-       xor1 = v29 ^ v5;
-       v28 = xor1;
-       v29 = xor0;
+       v_45.x = v4;
+       v_45.y = v5;
+       v_2829.x = v28;
+       v_2829.y = v29;
+       v_2829 = v_2829 ^ v_45;
+       v28 = v_2829.y;
+       v29 = v_2829.x;
 
        // c = c + d
        v_2021.x = v20;
@@ -4745,10 +4763,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        // v5 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v28 ^ v4;
-       xor1 = v29 ^ v5;
-       v28 = xor1;
-       v29 = xor0;
+       v_45.x = v4;
+       v_45.y = v5;
+       v_2829.x = v28;
+       v_2829.y = v29;
+       v_2829 = v_2829 ^ v_45;
+       v28 = v_2829.y;
+       v29 = v_2829.x;
 
        // c = c + d
        v_2021.x = v20;
@@ -5482,10 +5503,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        // v5 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v28 ^ v4;
-       xor1 = v29 ^ v5;
-       v28 = xor1;
-       v29 = xor0;
+       v_45.x = v4;
+       v_45.y = v5;
+       v_2829.x = v28;
+       v_2829.y = v29;
+       v_2829 = v_2829 ^ v_45;
+       v28 = v_2829.y;
+       v29 = v_2829.x;
 
        // c = c + d
        v_2021.x = v20;
@@ -6216,10 +6240,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        // v5 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v28 ^ v4;
-       xor1 = v29 ^ v5;
-       v28 = xor1;
-       v29 = xor0;
+       v_45.x = v4;
+       v_45.y = v5;
+       v_2829.x = v28;
+       v_2829.y = v29;
+       v_2829 = v_2829 ^ v_45;
+       v28 = v_2829.y;
+       v29 = v_2829.x;
 
        // c = c + d
        v_2021.x = v20;
@@ -6953,10 +6980,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        // v5 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v28 ^ v4;
-       xor1 = v29 ^ v5;
-       v28 = xor1;
-       v29 = xor0;
+       v_45.x = v4;
+       v_45.y = v5;
+       v_2829.x = v28;
+       v_2829.y = v29;
+       v_2829 = v_2829 ^ v_45;
+       v28 = v_2829.y;
+       v29 = v_2829.x;
 
        // c = c + d
        v_2021.x = v20;
@@ -7687,10 +7717,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        v5 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v28 ^ v4;
-       xor1 = v29 ^ v5;
-       v28 = xor1;
-       v29 = xor0;
+       v_45.x = v4;
+       v_45.y = v5;
+       v_2829.x = v28;
+       v_2829.y = v29;
+       v_2829 = v_2829 ^ v_45;
+       v28 = v_2829.y;
+       v29 = v_2829.x;
 
        // c = c + d
        v_2021.x = v20;
@@ -8424,10 +8457,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        // v5 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v28 ^ v4;
-       xor1 = v29 ^ v5;
-       v28 = xor1;
-       v29 = xor0;
+       v_45.x = v4;
+       v_45.y = v5;
+       v_2829.x = v28;
+       v_2829.y = v29;
+       v_2829 = v_2829 ^ v_45;
+       v28 = v_2829.y;
+       v29 = v_2829.x;
 
        // c = c + d
        v_2021.x = v20;