]> zoso.dev Git - nano-pow.git/commitdiff
Replace more 32-bit scalar rotations with vector rotations.
authorChris Duncan <chris@zoso.dev>
Tue, 14 Jan 2025 18:31:48 +0000 (10:31 -0800)
committerChris Duncan <chris@zoso.dev>
Tue, 14 Jan 2025 18:31:48 +0000 (10:31 -0800)
src/shaders/compute.wgsl

index d202d4291f74b63e7090ca665981f6862f543664..e025ff92c1206ff9443c74daa4461ddb0e1b3cc2 100644 (file)
@@ -236,10 +236,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        v3 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v26 ^ v2;
-       xor1 = v27 ^ v3;
-       v26 = xor1;
-       v27 = xor0;
+       v_23.x = v2;
+       v_23.y = v3;
+       v_2627.x = v26;
+       v_2627.y = v27;
+       v_2627 = v_2627 ^ v_23;
+       v26 = v_2627.y;
+       v27 = v_2627.x;
 
        // c = c + d
        v_1819.x = v18;
@@ -964,10 +967,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        v3 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v26 ^ v2;
-       xor1 = v27 ^ v3;
-       v26 = xor1;
-       v27 = xor0;
+       v_23.x = v2;
+       v_23.y = v3;
+       v_2627.x = v26;
+       v_2627.y = v27;
+       v_2627 = v_2627 ^ v_23;
+       v26 = v_2627.y;
+       v27 = v_2627.x;
 
        // c = c + d
        v_1819.x = v18;
@@ -1698,10 +1704,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        // v3 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v26 ^ v2;
-       xor1 = v27 ^ v3;
-       v26 = xor1;
-       v27 = xor0;
+       v_23.x = v2;
+       v_23.y = v3;
+       v_2627.x = v26;
+       v_2627.y = v27;
+       v_2627 = v_2627 ^ v_23;
+       v26 = v_2627.y;
+       v27 = v_2627.x;
 
        // c = c + d
        v_1819.x = v18;
@@ -2417,10 +2426,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        v3 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v26 ^ v2;
-       xor1 = v27 ^ v3;
-       v26 = xor1;
-       v27 = xor0;
+       v_23.x = v2;
+       v_23.y = v3;
+       v_2627.x = v26;
+       v_2627.y = v27;
+       v_2627 = v_2627 ^ v_23;
+       v26 = v_2627.y;
+       v27 = v_2627.x;
 
        // c = c + d
        v_1819.x = v18;
@@ -3157,10 +3169,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        // v3 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v26 ^ v2;
-       xor1 = v27 ^ v3;
-       v26 = xor1;
-       v27 = xor0;
+       v_23.x = v2;
+       v_23.y = v3;
+       v_2627.x = v26;
+       v_2627.y = v27;
+       v_2627 = v_2627 ^ v_23;
+       v26 = v_2627.y;
+       v27 = v_2627.x;
 
        // c = c + d
        v_1819.x = v18;
@@ -3897,10 +3912,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        // v3 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v26 ^ v2;
-       xor1 = v27 ^ v3;
-       v26 = xor1;
-       v27 = xor0;
+       v_23.x = v2;
+       v_23.y = v3;
+       v_2627.x = v26;
+       v_2627.y = v27;
+       v_2627 = v_2627 ^ v_23;
+       v26 = v_2627.y;
+       v27 = v_2627.x;
 
        // c = c + d
        v_1819.x = v18;
@@ -4628,10 +4646,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        v3 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v26 ^ v2;
-       xor1 = v27 ^ v3;
-       v26 = xor1;
-       v27 = xor0;
+       v_23.x = v2;
+       v_23.y = v3;
+       v_2627.x = v26;
+       v_2627.y = v27;
+       v_2627 = v_2627 ^ v_23;
+       v26 = v_2627.y;
+       v27 = v_2627.x;
 
        // c = c + d
        v_1819.x = v18;
@@ -5362,10 +5383,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        // v3 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v26 ^ v2;
-       xor1 = v27 ^ v3;
-       v26 = xor1;
-       v27 = xor0;
+       v_23.x = v2;
+       v_23.y = v3;
+       v_2627.x = v26;
+       v_2627.y = v27;
+       v_2627 = v_2627 ^ v_23;
+       v26 = v_2627.y;
+       v27 = v_2627.x;
 
        // c = c + d
        v_1819.x = v18;
@@ -6093,10 +6117,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        // v3 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v26 ^ v2;
-       xor1 = v27 ^ v3;
-       v26 = xor1;
-       v27 = xor0;
+       v_23.x = v2;
+       v_23.y = v3;
+       v_2627.x = v26;
+       v_2627.y = v27;
+       v_2627 = v_2627 ^ v_23;
+       v26 = v_2627.y;
+       v27 = v_2627.x;
 
        // c = c + d
        v_1819.x = v18;
@@ -6830,10 +6857,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        // v3 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v26 ^ v2;
-       xor1 = v27 ^ v3;
-       v26 = xor1;
-       v27 = xor0;
+       v_23.x = v2;
+       v_23.y = v3;
+       v_2627.x = v26;
+       v_2627.y = v27;
+       v_2627 = v_2627 ^ v_23;
+       v26 = v_2627.y;
+       v27 = v_2627.x;
 
        // c = c + d
        v_1819.x = v18;
@@ -7564,10 +7594,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        v3 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v26 ^ v2;
-       xor1 = v27 ^ v3;
-       v26 = xor1;
-       v27 = xor0;
+       v_23.x = v2;
+       v_23.y = v3;
+       v_2627.x = v26;
+       v_2627.y = v27;
+       v_2627 = v_2627 ^ v_23;
+       v26 = v_2627.y;
+       v27 = v_2627.x;
 
        // c = c + d
        v_1819.x = v18;
@@ -8292,10 +8325,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        v3 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v26 ^ v2;
-       xor1 = v27 ^ v3;
-       v26 = xor1;
-       v27 = xor0;
+       v_23.x = v2;
+       v_23.y = v3;
+       v_2627.x = v26;
+       v_2627.y = v27;
+       v_2627 = v_2627 ^ v_23;
+       v26 = v_2627.y;
+       v27 = v_2627.x;
 
        // c = c + d
        v_1819.x = v18;