]> zoso.dev Git - nano-pow.git/commitdiff
Replace some 24-bit scalar rotations with vector rotations.
authorChris Duncan <chris@zoso.dev>
Tue, 14 Jan 2025 19:20:39 +0000 (11:20 -0800)
committerChris Duncan <chris@zoso.dev>
Tue, 14 Jan 2025 19:20:39 +0000 (11:20 -0800)
src/shaders/compute.wgsl

index a233b1458a86d76cc8c6e969fd0109d236a310c2..5a3d119c5d9390a50c3032c153ef021987b4b90a 100644 (file)
@@ -164,10 +164,14 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        v25 = v_2425.y;
 
        // b = rotr64(b ^ c, 24)
-       xor0 = v8 ^ v16;
-       xor1 = v9 ^ v17;
-       v8 = (xor0 >> 24u) ^ (xor1 << 8u);
-       v9 = (xor1 >> 24u) ^ (xor0 << 8u);
+       v_1617.x = v16;
+       v_1617.y = v17;
+       v_89.x = v8;
+       v_89.y = v9;
+       xor = v_89 ^ v_1617;
+       v_89 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u));
+       v8 = v_89.x;
+       v9 = v_89.y;
 
        // a = a + b
        v_89.x = v8;
@@ -807,10 +811,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        // v7 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v28 ^ v6;
-       xor1 = v29 ^ v7;
-       v28 = xor1;
-       v29 = xor0;
+       v_67.x = v6;
+       v_67.y = v7;
+       v_2829.x = v28;
+       v_2829.y = v29;
+       v_2829 = v_2829 ^ v_67;
+       v28 = v_2829.y;
+       v29 = v_2829.x;
 
        // c = c + d
        v_1819.x = v18;
@@ -1559,10 +1566,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        // v7 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v28 ^ v6;
-       xor1 = v29 ^ v7;
-       v28 = xor1;
-       v29 = xor0;
+       v_67.x = v6;
+       v_67.y = v7;
+       v_2829.x = v28;
+       v_2829.y = v29;
+       v_2829 = v_2829 ^ v_67;
+       v28 = v_2829.y;
+       v29 = v_2829.x;
 
        // c = c + d
        v_1819.x = v18;
@@ -2299,10 +2309,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        // v7 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v28 ^ v6;
-       xor1 = v29 ^ v7;
-       v28 = xor1;
-       v29 = xor0;
+       v_67.x = v6;
+       v_67.y = v7;
+       v_2829.x = v28;
+       v_2829.y = v29;
+       v_2829 = v_2829 ^ v_67;
+       v28 = v_2829.y;
+       v29 = v_2829.x;
 
        // c = c + d
        v_1819.x = v18;
@@ -3045,10 +3058,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        // v7 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v28 ^ v6;
-       xor1 = v29 ^ v7;
-       v28 = xor1;
-       v29 = xor0;
+       v_67.x = v6;
+       v_67.y = v7;
+       v_2829.x = v28;
+       v_2829.y = v29;
+       v_2829 = v_2829 ^ v_67;
+       v28 = v_2829.y;
+       v29 = v_2829.x;
 
        // c = c + d
        v_1819.x = v18;
@@ -3803,10 +3819,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        v7 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v28 ^ v6;
-       xor1 = v29 ^ v7;
-       v28 = xor1;
-       v29 = xor0;
+       v_67.x = v6;
+       v_67.y = v7;
+       v_2829.x = v28;
+       v_2829.y = v29;
+       v_2829 = v_2829 ^ v_67;
+       v28 = v_2829.y;
+       v29 = v_2829.x;
 
        // c = c + d
        v_1819.x = v18;
@@ -4561,10 +4580,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        v7 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v28 ^ v6;
-       xor1 = v29 ^ v7;
-       v28 = xor1;
-       v29 = xor0;
+       v_67.x = v6;
+       v_67.y = v7;
+       v_2829.x = v28;
+       v_2829.y = v29;
+       v_2829 = v_2829 ^ v_67;
+       v28 = v_2829.y;
+       v29 = v_2829.x;
 
        // c = c + d
        v_1819.x = v18;
@@ -5310,10 +5332,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        // v7 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v28 ^ v6;
-       xor1 = v29 ^ v7;
-       v28 = xor1;
-       v29 = xor0;
+       v_67.x = v6;
+       v_67.y = v7;
+       v_2829.x = v28;
+       v_2829.y = v29;
+       v_2829 = v_2829 ^ v_67;
+       v28 = v_2829.y;
+       v29 = v_2829.x;
 
        // c = c + d
        v_1819.x = v18;
@@ -6059,10 +6084,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        v7 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v28 ^ v6;
-       xor1 = v29 ^ v7;
-       v28 = xor1;
-       v29 = xor0;
+       v_67.x = v6;
+       v_67.y = v7;
+       v_2829.x = v28;
+       v_2829.y = v29;
+       v_2829 = v_2829 ^ v_67;
+       v28 = v_2829.y;
+       v29 = v_2829.x;
 
        // c = c + d
        v_1819.x = v18;
@@ -6808,10 +6836,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        // v7 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v28 ^ v6;
-       xor1 = v29 ^ v7;
-       v28 = xor1;
-       v29 = xor0;
+       v_67.x = v6;
+       v_67.y = v7;
+       v_2829.x = v28;
+       v_2829.y = v29;
+       v_2829 = v_2829 ^ v_67;
+       v28 = v_2829.y;
+       v29 = v_2829.x;
 
        // c = c + d
        v_1819.x = v18;
@@ -7560,10 +7591,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        // v7 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v28 ^ v6;
-       xor1 = v29 ^ v7;
-       v28 = xor1;
-       v29 = xor0;
+       v_67.x = v6;
+       v_67.y = v7;
+       v_2829.x = v28;
+       v_2829.y = v29;
+       v_2829 = v_2829 ^ v_67;
+       v28 = v_2829.y;
+       v29 = v_2829.x;
 
        // c = c + d
        v_1819.x = v18;
@@ -8315,10 +8349,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        // v7 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v28 ^ v6;
-       xor1 = v29 ^ v7;
-       v28 = xor1;
-       v29 = xor0;
+       v_67.x = v6;
+       v_67.y = v7;
+       v_2829.x = v28;
+       v_2829.y = v29;
+       v_2829 = v_2829 ^ v_67;
+       v28 = v_2829.y;
+       v29 = v_2829.x;
 
        // c = c + d
        v_1819.x = v18;
@@ -9067,10 +9104,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
        // v7 = o1;
 
        // d = rotr64(d ^ a, 32)
-       xor0 = v28 ^ v6;
-       xor1 = v29 ^ v7;
-       v28 = xor1;
-       v29 = xor0;
+       v_67.x = v6;
+       v_67.y = v7;
+       v_2829.x = v28;
+       v_2829.y = v29;
+       v_2829 = v_2829 ^ v_67;
+       v28 = v_2829.y;
+       v29 = v_2829.x;
 
        // c = c + d
        v_1819.x = v18;