]> zoso.dev Git - nano-pow.git/commitdiff
Replace more 32-bit scalar rotations with vector rotations.
authorChris Duncan <chris@zoso.dev>
Tue, 14 Jan 2025 18:46:26 +0000 (10:46 -0800)
committerChris Duncan <chris@zoso.dev>
Tue, 14 Jan 2025 18:46:26 +0000 (10:46 -0800)
src/shaders/compute.wgsl

index a484a8df65b093910edafb315a176a2801768bdd..b26bb20990e6c6ad1091d710ee72684ea950a3bb 100644 (file)
@@ -518,10 +518,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        // // skip since adding 0u does nothing
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v30 ^ v0;
-       xor1 = v31 ^ v1;
-       v30 = xor1;
-       v31 = xor0;
+       v_01.x = v0;
+       v_01.y = v1;
+       v_3031.x = v30;
+       v_3031.y = v31;
+       v_3031 = v_3031 ^ v_01;
+       v30 = v_3031.y;
+       v31 = v_3031.x;
 
        // c = c + d
        v_2021.x = v20;
@@ -1267,10 +1270,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        v1 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v30 ^ v0;
-       xor1 = v31 ^ v1;
-       v30 = xor1;
-       v31 = xor0;
+       v_01.x = v0;
+       v_01.y = v1;
+       v_3031.x = v30;
+       v_3031.y = v31;
+       v_3031 = v_3031 ^ v_01;
+       v30 = v_3031.y;
+       v31 = v_3031.x;
 
        // c = c + d
        v_2021.x = v20;
@@ -1998,10 +2004,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        // // skip since adding 0u does nothing
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v30 ^ v0;
-       xor1 = v31 ^ v1;
-       v30 = xor1;
-       v31 = xor0;
+       v_01.x = v0;
+       v_01.y = v1;
+       v_3031.x = v30;
+       v_3031.y = v31;
+       v_3031 = v_3031 ^ v_01;
+       v30 = v_3031.y;
+       v31 = v_3031.x;
 
        // c = c + d
        v_2021.x = v20;
@@ -2735,10 +2744,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        v1 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v30 ^ v0;
-       xor1 = v31 ^ v1;
-       v30 = xor1;
-       v31 = xor0;
+       v_01.x = v0;
+       v_01.y = v1;
+       v_3031.x = v30;
+       v_3031.y = v31;
+       v_3031 = v_3031 ^ v_01;
+       v30 = v_3031.y;
+       v31 = v_3031.x;
 
        // c = c + d
        v_2021.x = v20;
@@ -3475,10 +3487,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        // // skip since adding 0u does nothing
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v30 ^ v0;
-       xor1 = v31 ^ v1;
-       v30 = xor1;
-       v31 = xor0;
+       v_01.x = v0;
+       v_01.y = v1;
+       v_3031.x = v30;
+       v_3031.y = v31;
+       v_3031 = v_3031 ^ v_01;
+       v30 = v_3031.y;
+       v31 = v_3031.x;
 
        // c = c + d
        v_2021.x = v20;
@@ -4230,10 +4245,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        v1 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v30 ^ v0;
-       xor1 = v31 ^ v1;
-       v30 = xor1;
-       v31 = xor0;
+       v_01.x = v0;
+       v_01.y = v1;
+       v_3031.x = v30;
+       v_3031.y = v31;
+       v_3031 = v_3031 ^ v_01;
+       v30 = v_3031.y;
+       v31 = v_3031.x;
 
        // c = c + d
        v_2021.x = v20;
@@ -4973,10 +4991,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        v1 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v30 ^ v0;
-       xor1 = v31 ^ v1;
-       v30 = xor1;
-       v31 = xor0;
+       v_01.x = v0;
+       v_01.y = v1;
+       v_3031.x = v30;
+       v_3031.y = v31;
+       v_3031 = v_3031 ^ v_01;
+       v30 = v_3031.y;
+       v31 = v_3031.x;
 
        // c = c + d
        v_2021.x = v20;
@@ -5707,10 +5728,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        // // skip since adding 0u does nothing
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v30 ^ v0;
-       xor1 = v31 ^ v1;
-       v30 = xor1;
-       v31 = xor0;
+       v_01.x = v0;
+       v_01.y = v1;
+       v_3031.x = v30;
+       v_3031.y = v31;
+       v_3031 = v_3031 ^ v_01;
+       v30 = v_3031.y;
+       v31 = v_3031.x;
 
        // c = c + d
        v_2021.x = v20;
@@ -6447,10 +6471,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        // // skip since adding 0u does nothing
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v30 ^ v0;
-       xor1 = v31 ^ v1;
-       v30 = xor1;
-       v31 = xor0;
+       v_01.x = v0;
+       v_01.y = v1;
+       v_3031.x = v30;
+       v_3031.y = v31;
+       v_3031 = v_3031 ^ v_01;
+       v30 = v_3031.y;
+       v31 = v_3031.x;
 
        // c = c + d
        v_2021.x = v20;
@@ -7193,10 +7220,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        // // skip since adding 0u does nothing
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v30 ^ v0;
-       xor1 = v31 ^ v1;
-       v30 = xor1;
-       v31 = xor0;
+       v_01.x = v0;
+       v_01.y = v1;
+       v_3031.x = v30;
+       v_3031.y = v31;
+       v_3031 = v_3031 ^ v_01;
+       v30 = v_3031.y;
+       v31 = v_3031.x;
 
        // c = c + d
        v_2021.x = v20;
@@ -7936,10 +7966,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        // // skip since adding 0u does nothing
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v30 ^ v0;
-       xor1 = v31 ^ v1;
-       v30 = xor1;
-       v31 = xor0;
+       v_01.x = v0;
+       v_01.y = v1;
+       v_3031.x = v30;
+       v_3031.y = v31;
+       v_3031 = v_3031 ^ v_01;
+       v30 = v_3031.y;
+       v31 = v_3031.x;
 
        // c = c + d
        v_2021.x = v20;
@@ -8685,10 +8718,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        v1 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v30 ^ v0;
-       xor1 = v31 ^ v1;
-       v30 = xor1;
-       v31 = xor0;
+       v_01.x = v0;
+       v_01.y = v1;
+       v_3031.x = v30;
+       v_3031.y = v31;
+       v_3031 = v_3031 ^ v_01;
+       v30 = v_3031.y;
+       v31 = v_3031.x;
 
        // c = c + d
        v_2021.x = v20;