]> zoso.dev Git - nano-pow.git/commitdiff
Replace more 32-bit scalar rotations with vector rotations.
authorChris Duncan <chris@zoso.dev>
Tue, 14 Jan 2025 18:49:23 +0000 (10:49 -0800)
committerChris Duncan <chris@zoso.dev>
Tue, 14 Jan 2025 18:49:23 +0000 (10:49 -0800)
src/shaders/compute.wgsl

index ad68c439ec3fa99994f3c1e2c6f8ca83a475359e..e89e4f3be742f89fadef1eb2868282926376c595 100644 (file)
@@ -707,10 +707,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        // v5 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v26 ^ v4;
-       xor1 = v27 ^ v5;
-       v26 = xor1;
-       v27 = xor0;
+       v_45.x = v4;
+       v_45.y = v5;
+       v_2627.x = v26;
+       v_2627.y = v27;
+       v_2627 = v_2627 ^ v_45;
+       v26 = v_2627.y;
+       v27 = v_2627.x;
 
        // c = c + d
        v_1617.x = v16;
@@ -1456,10 +1459,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        // v5 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v26 ^ v4;
-       xor1 = v27 ^ v5;
-       v26 = xor1;
-       v27 = xor0;
+       v_45.x = v4;
+       v_45.y = v5;
+       v_2627.x = v26;
+       v_2627.y = v27;
+       v_2627 = v_2627 ^ v_45;
+       v26 = v_2627.y;
+       v27 = v_2627.x;
 
        // c = c + d
        v_1617.x = v16;
@@ -2196,10 +2202,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        // v5 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v26 ^ v4;
-       xor1 = v27 ^ v5;
-       v26 = xor1;
-       v27 = xor0;
+       v_45.x = v4;
+       v_45.y = v5;
+       v_2627.x = v26;
+       v_2627.y = v27;
+       v_2627 = v_2627 ^ v_45;
+       v26 = v_2627.y;
+       v27 = v_2627.x;
 
        // c = c + d
        v_1617.x = v16;
@@ -2939,10 +2948,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        v5 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v26 ^ v4;
-       xor1 = v27 ^ v5;
-       v26 = xor1;
-       v27 = xor0;
+       v_45.x = v4;
+       v_45.y = v5;
+       v_2627.x = v26;
+       v_2627.y = v27;
+       v_2627 = v_2627 ^ v_45;
+       v26 = v_2627.y;
+       v27 = v_2627.x;
 
        // c = c + d
        v_1617.x = v16;
@@ -3694,10 +3706,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        // v5 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v26 ^ v4;
-       xor1 = v27 ^ v5;
-       v26 = xor1;
-       v27 = xor0;
+       v_45.x = v4;
+       v_45.y = v5;
+       v_2627.x = v26;
+       v_2627.y = v27;
+       v_2627 = v_2627 ^ v_45;
+       v26 = v_2627.y;
+       v27 = v_2627.x;
 
        // c = c + d
        v_1617.x = v16;
@@ -4449,10 +4464,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        // v5 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v26 ^ v4;
-       xor1 = v27 ^ v5;
-       v26 = xor1;
-       v27 = xor0;
+       v_45.x = v4;
+       v_45.y = v5;
+       v_2627.x = v26;
+       v_2627.y = v27;
+       v_2627 = v_2627 ^ v_45;
+       v26 = v_2627.y;
+       v27 = v_2627.x;
 
        // c = c + d
        v_1617.x = v16;
@@ -5195,10 +5213,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        // v5 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v26 ^ v4;
-       xor1 = v27 ^ v5;
-       v26 = xor1;
-       v27 = xor0;
+       v_45.x = v4;
+       v_45.y = v5;
+       v_2627.x = v26;
+       v_2627.y = v27;
+       v_2627 = v_2627 ^ v_45;
+       v26 = v_2627.y;
+       v27 = v_2627.x;
 
        // c = c + d
        v_1617.x = v16;
@@ -5941,10 +5962,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        // v5 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v26 ^ v4;
-       xor1 = v27 ^ v5;
-       v26 = xor1;
-       v27 = xor0;
+       v_45.x = v4;
+       v_45.y = v5;
+       v_2627.x = v26;
+       v_2627.y = v27;
+       v_2627 = v_2627 ^ v_45;
+       v26 = v_2627.y;
+       v27 = v_2627.x;
 
        // c = c + d
        v_1617.x = v16;
@@ -6687,10 +6711,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        v5 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v26 ^ v4;
-       xor1 = v27 ^ v5;
-       v26 = xor1;
-       v27 = xor0;
+       v_45.x = v4;
+       v_45.y = v5;
+       v_2627.x = v26;
+       v_2627.y = v27;
+       v_2627 = v_2627 ^ v_45;
+       v26 = v_2627.y;
+       v27 = v_2627.x;
 
        // c = c + d
        v_1617.x = v16;
@@ -7433,10 +7460,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        v5 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v26 ^ v4;
-       xor1 = v27 ^ v5;
-       v26 = xor1;
-       v27 = xor0;
+       v_45.x = v4;
+       v_45.y = v5;
+       v_2627.x = v26;
+       v_2627.y = v27;
+       v_2627 = v_2627 ^ v_45;
+       v26 = v_2627.y;
+       v27 = v_2627.x;
 
        // c = c + d
        v_1617.x = v16;
@@ -8185,10 +8215,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        // v5 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v26 ^ v4;
-       xor1 = v27 ^ v5;
-       v26 = xor1;
-       v27 = xor0;
+       v_45.x = v4;
+       v_45.y = v5;
+       v_2627.x = v26;
+       v_2627.y = v27;
+       v_2627 = v_2627 ^ v_45;
+       v26 = v_2627.y;
+       v27 = v_2627.x;
 
        // c = c + d
        v_1617.x = v16;
@@ -8934,10 +8967,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        // v5 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v26 ^ v4;
-       xor1 = v27 ^ v5;
-       v26 = xor1;
-       v27 = xor0;
+       v_45.x = v4;
+       v_45.y = v5;
+       v_2627.x = v26;
+       v_2627.y = v27;
+       v_2627 = v_2627 ^ v_45;
+       v26 = v_2627.y;
+       v27 = v_2627.x;
 
        // c = c + d
        v_1617.x = v16;