From: Chris Duncan Date: Thu, 16 Jan 2025 13:42:28 +0000 (-0800) Subject: Replace 32-bit rotations using two assignments with one assignment. X-Git-Tag: v2.0.0~50 X-Git-Url: https://zoso.dev/?a=commitdiff_plain;h=20850a682d32b9147bbca4ce7512e90aa5b6911b;p=nano-pow.git Replace 32-bit rotations using two assignments with one assignment. --- diff --git a/src/shaders/compute.wgsl b/src/shaders/compute.wgsl index 8cee7b9..6629595 100644 --- a/src/shaders/compute.wgsl +++ b/src/shaders/compute.wgsl @@ -118,8 +118,7 @@ fn main(id: vec3) { v0 = v0 + nonce + vec2(0u, u32(v0.x + nonce.x < v0.x)); // d = rotr64(d ^ a, 32) - xor = v12 ^ v0; - v12 = xor.yx; + v12 = v12.yx ^ v0.yx; // c = c + d v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x)); @@ -160,8 +159,7 @@ fn main(id: vec3) { v1 = v1 + m2 + vec2(0u, u32(v1.x + m2.x < v1.x)); // d = rotr64(d ^ a, 32) - xor = v13 ^ v1; - v13 = xor.yx; + v13 = v13.yx ^ v1.yx; // c = c + d v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x)); @@ -202,8 +200,7 @@ fn main(id: vec3) { v2 = v2 + m4 + vec2(0u, u32(v2.x + m4.x < v2.x)); // d = rotr64(d ^ a, 32) - xor = v14 ^ v2; - v14 = xor.yx; + v14 = v14.yx ^ v2.yx; // c = c + d v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x)); @@ -244,8 +241,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v15 ^ v3; - v15 = xor.yx; + v15 = v15.yx ^ v3.yx; // c = c + d v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x)); @@ -286,8 +282,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v15 ^ v0; - v15 = xor.yx; + v15 = v15.yx ^ v0.yx; // c = c + d v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x)); @@ -328,8 +323,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v12 ^ v1; - v12 = xor.yx; + v12 = v12.yx ^ v1.yx; // c = c + d v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x)); @@ -370,8 +364,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v13 ^ v2; - v13 = xor.yx; + v13 = v13.yx ^ v2.yx; // c = c + d v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x)); @@ -412,8 +405,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v14 ^ v3; - v14 = xor.yx; + v14 = v14.yx ^ v3.yx; // c = c + d v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x)); @@ -458,8 +450,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v12 ^ v0; - v12 = xor.yx; + v12 = v12.yx ^ v0.yx; // c = c + d v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x)); @@ -500,8 +491,7 @@ fn main(id: vec3) { v1 = v1 + m4 + vec2(0u, u32(v1.x + m4.x < v1.x)); // d = rotr64(d ^ a, 32) - xor = v13 ^ v1; - v13 = xor.yx; + v13 = v13.yx ^ v1.yx; // c = c + d v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x)); @@ -542,8 +532,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v14 ^ v2; - v14 = xor.yx; + v14 = v14.yx ^ v2.yx; // c = c + d v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x)); @@ -584,8 +573,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v15 ^ v3; - v15 = xor.yx; + v15 = v15.yx ^ v3.yx; // c = c + d v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x)); @@ -626,8 +614,7 @@ fn main(id: vec3) { v0 = v0 + m1 + vec2(0u, u32(v0.x + m1.x < v0.x)); // d = rotr64(d ^ a, 32) - xor = v15 ^ v0; - v15 = xor.yx; + v15 = v15.yx ^ v0.yx; // c = c + d v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x)); @@ -668,8 +655,7 @@ fn main(id: vec3) { v1 = v1 + nonce + vec2(0u, u32(v1.x + nonce.x < v1.x)); // d = rotr64(d ^ a, 32) - xor = v12 ^ v1; - v12 = xor.yx; + v12 = v12.yx ^ v1.yx; // c = c + d v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x)); @@ -710,8 +696,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v13 ^ v2; - v13 = xor.yx; + v13 = v13.yx ^ v2.yx; // c = c + d v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x)); @@ -752,8 +737,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v14 ^ v3; - v14 = xor.yx; + v14 = v14.yx ^ v3.yx; // c = c + d v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x)); @@ -798,8 +782,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v12 ^ v0; - v12 = xor.yx; + v12 = v12.yx ^ v0.yx; // c = c + d v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x)); @@ -840,8 +823,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v13 ^ v1; - v13 = xor.yx; + v13 = v13.yx ^ v1.yx; // c = c + d v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x)); @@ -882,8 +864,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v14 ^ v2; - v14 = xor.yx; + v14 = v14.yx ^ v2.yx; // c = c + d v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x)); @@ -924,8 +905,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v15 ^ v3; - v15 = xor.yx; + v15 = v15.yx ^ v3.yx; // c = c + d v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x)); @@ -966,8 +946,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v15 ^ v0; - v15 = xor.yx; + v15 = v15.yx ^ v0.yx; // c = c + d v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x)); @@ -1008,8 +987,7 @@ fn main(id: vec3) { v1 = v1 + m3 + vec2(0u, u32(v1.x + m3.x < v1.x)); // d = rotr64(d ^ a, 32) - xor = v12 ^ v1; - v12 = xor.yx; + v12 = v12.yx ^ v1.yx; // c = c + d v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x)); @@ -1050,8 +1028,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v13 ^ v2; - v13 = xor.yx; + v13 = v13.yx ^ v2.yx; // c = c + d v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x)); @@ -1092,8 +1069,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v14 ^ v3; - v14 = xor.yx; + v14 = v14.yx ^ v3.yx; // c = c + d v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x)); @@ -1138,8 +1114,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v12 ^ v0; - v12 = xor.yx; + v12 = v12.yx ^ v0.yx; // c = c + d v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x)); @@ -1180,8 +1155,7 @@ fn main(id: vec3) { v1 = v1 + m3 + vec2(0u, u32(v1.x + m3.x < v1.x)); // d = rotr64(d ^ a, 32) - xor = v13 ^ v1; - v13 = xor.yx; + v13 = v13.yx ^ v1.yx; // c = c + d v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x)); @@ -1222,8 +1196,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v14 ^ v2; - v14 = xor.yx; + v14 = v14.yx ^ v2.yx; // c = c + d v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x)); @@ -1264,8 +1237,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v15 ^ v3; - v15 = xor.yx; + v15 = v15.yx ^ v3.yx; // c = c + d v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x)); @@ -1306,8 +1278,7 @@ fn main(id: vec3) { v0 = v0 + m2 + vec2(0u, u32(v0.x + m2.x < v0.x)); // d = rotr64(d ^ a, 32) - xor = v15 ^ v0; - v15 = xor.yx; + v15 = v15.yx ^ v0.yx; // c = c + d v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x)); @@ -1348,8 +1319,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v12 ^ v1; - v12 = xor.yx; + v12 = v12.yx ^ v1.yx; // c = c + d v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x)); @@ -1390,8 +1360,7 @@ fn main(id: vec3) { v2 = v2 + m4 + vec2(0u, u32(v2.x + m4.x < v2.x)); // d = rotr64(d ^ a, 32) - xor = v13 ^ v2; - v13 = xor.yx; + v13 = v13.yx ^ v2.yx; // c = c + d v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x)); @@ -1432,8 +1401,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v14 ^ v3; - v14 = xor.yx; + v14 = v14.yx ^ v3.yx; // c = c + d v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x)); @@ -1478,8 +1446,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v12 ^ v0; - v12 = xor.yx; + v12 = v12.yx ^ v0.yx; // c = c + d v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x)); @@ -1520,8 +1487,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v13 ^ v1; - v13 = xor.yx; + v13 = v13.yx ^ v1.yx; // c = c + d v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x)); @@ -1562,8 +1528,7 @@ fn main(id: vec3) { v2 = v2 + m2 + vec2(0u, u32(v2.x + m2.x < v2.x)); // d = rotr64(d ^ a, 32) - xor = v14 ^ v2; - v14 = xor.yx; + v14 = v14.yx ^ v2.yx; // c = c + d v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x)); @@ -1604,8 +1569,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v15 ^ v3; - v15 = xor.yx; + v15 = v15.yx ^ v3.yx; // c = c + d v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x)); @@ -1646,8 +1610,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v15 ^ v0; - v15 = xor.yx; + v15 = v15.yx ^ v0.yx; // c = c + d v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x)); @@ -1688,8 +1651,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v12 ^ v1; - v12 = xor.yx; + v12 = v12.yx ^ v1.yx; // c = c + d v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x)); @@ -1730,8 +1692,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v13 ^ v2; - v13 = xor.yx; + v13 = v13.yx ^ v2.yx; // c = c + d v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x)); @@ -1772,8 +1733,7 @@ fn main(id: vec3) { v3 = v3 + m3 + vec2(0u, u32(v3.x + m3.x < v3.x)); // d = rotr64(d ^ a, 32) - xor = v14 ^ v3; - v14 = xor.yx; + v14 = v14.yx ^ v3.yx; // c = c + d v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x)); @@ -1818,8 +1778,7 @@ fn main(id: vec3) { v0 = v0 + m2 + vec2(0u, u32(v0.x + m2.x < v0.x)); // d = rotr64(d ^ a, 32) - xor = v12 ^ v0; - v12 = xor.yx; + v12 = v12.yx ^ v0.yx; // c = c + d v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x)); @@ -1860,8 +1819,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v13 ^ v1; - v13 = xor.yx; + v13 = v13.yx ^ v1.yx; // c = c + d v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x)); @@ -1902,8 +1860,7 @@ fn main(id: vec3) { v2 = v2 + nonce + vec2(0u, u32(v2.x + nonce.x < v2.x)); // d = rotr64(d ^ a, 32) - xor = v14 ^ v2; - v14 = xor.yx; + v14 = v14.yx ^ v2.yx; // c = c + d v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x)); @@ -1944,8 +1901,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v15 ^ v3; - v15 = xor.yx; + v15 = v15.yx ^ v3.yx; // c = c + d v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x)); @@ -1986,8 +1942,7 @@ fn main(id: vec3) { v0 = v0 + m4 + vec2(0u, u32(v0.x + m4.x < v0.x)); // d = rotr64(d ^ a, 32) - xor = v15 ^ v0; - v15 = xor.yx; + v15 = v15.yx ^ v0.yx; // c = c + d v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x)); @@ -2028,8 +1983,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v12 ^ v1; - v12 = xor.yx; + v12 = v12.yx ^ v1.yx; // c = c + d v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x)); @@ -2070,8 +2024,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v13 ^ v2; - v13 = xor.yx; + v13 = v13.yx ^ v2.yx; // c = c + d v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x)); @@ -2112,8 +2065,7 @@ fn main(id: vec3) { v3 = v3 + m1 + vec2(0u, u32(v3.x + m1.x < v3.x)); // d = rotr64(d ^ a, 32) - xor = v14 ^ v3; - v14 = xor.yx; + v14 = v14.yx ^ v3.yx; // c = c + d v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x)); @@ -2158,8 +2110,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v12 ^ v0; - v12 = xor.yx; + v12 = v12.yx ^ v0.yx; // c = c + d v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x)); @@ -2200,8 +2151,7 @@ fn main(id: vec3) { v1 = v1 + m1 + vec2(0u, u32(v1.x + m1.x < v1.x)); // d = rotr64(d ^ a, 32) - xor = v13 ^ v1; - v13 = xor.yx; + v13 = v13.yx ^ v1.yx; // c = c + d v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x)); @@ -2242,8 +2192,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v14 ^ v2; - v14 = xor.yx; + v14 = v14.yx ^ v2.yx; // c = c + d v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x)); @@ -2284,8 +2233,7 @@ fn main(id: vec3) { v3 = v3 + m4 + vec2(0u, u32(v3.x + m4.x < v3.x)); // d = rotr64(d ^ a, 32) - xor = v15 ^ v3; - v15 = xor.yx; + v15 = v15.yx ^ v3.yx; // c = c + d v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x)); @@ -2326,8 +2274,7 @@ fn main(id: vec3) { v0 = v0 + nonce + vec2(0u, u32(v0.x + nonce.x < v0.x)); // d = rotr64(d ^ a, 32) - xor = v15 ^ v0; - v15 = xor.yx; + v15 = v15.yx ^ v0.yx; // c = c + d v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x)); @@ -2368,8 +2315,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v12 ^ v1; - v12 = xor.yx; + v12 = v12.yx ^ v1.yx; // c = c + d v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x)); @@ -2410,8 +2356,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v13 ^ v2; - v13 = xor.yx; + v13 = v13.yx ^ v2.yx; // c = c + d v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x)); @@ -2452,8 +2397,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v14 ^ v3; - v14 = xor.yx; + v14 = v14.yx ^ v3.yx; // c = c + d v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x)); @@ -2498,8 +2442,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v12 ^ v0; - v12 = xor.yx; + v12 = v12.yx ^ v0.yx; // c = c + d v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x)); @@ -2540,8 +2483,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v13 ^ v1; - v13 = xor.yx; + v13 = v13.yx ^ v1.yx; // c = c + d v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x)); @@ -2582,8 +2524,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v14 ^ v2; - v14 = xor.yx; + v14 = v14.yx ^ v2.yx; // c = c + d v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x)); @@ -2624,8 +2565,7 @@ fn main(id: vec3) { v3 = v3 + m3 + vec2(0u, u32(v3.x + m3.x < v3.x)); // d = rotr64(d ^ a, 32) - xor = v15 ^ v3; - v15 = xor.yx; + v15 = v15.yx ^ v3.yx; // c = c + d v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x)); @@ -2666,8 +2606,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v15 ^ v0; - v15 = xor.yx; + v15 = v15.yx ^ v0.yx; // c = c + d v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x)); @@ -2708,8 +2647,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v12 ^ v1; - v12 = xor.yx; + v12 = v12.yx ^ v1.yx; // c = c + d v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x)); @@ -2750,8 +2688,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v13 ^ v2; - v13 = xor.yx; + v13 = v13.yx ^ v2.yx; // c = c + d v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x)); @@ -2792,8 +2729,7 @@ fn main(id: vec3) { v3 = v3 + m2 + vec2(0u, u32(v3.x + m2.x < v3.x)); // d = rotr64(d ^ a, 32) - xor = v14 ^ v3; - v14 = xor.yx; + v14 = v14.yx ^ v3.yx; // c = c + d v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x)); @@ -2838,8 +2774,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v12 ^ v0; - v12 = xor.yx; + v12 = v12.yx ^ v0.yx; // c = c + d v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x)); @@ -2880,8 +2815,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v13 ^ v1; - v13 = xor.yx; + v13 = v13.yx ^ v1.yx; // c = c + d v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x)); @@ -2922,8 +2856,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v14 ^ v2; - v14 = xor.yx; + v14 = v14.yx ^ v2.yx; // c = c + d v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x)); @@ -2964,8 +2897,7 @@ fn main(id: vec3) { v3 = v3 + nonce + vec2(0u, u32(v3.x + nonce.x < v3.x)); // d = rotr64(d ^ a, 32) - xor = v15 ^ v3; - v15 = xor.yx; + v15 = v15.yx ^ v3.yx; // c = c + d v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x)); @@ -3006,8 +2938,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v15 ^ v0; - v15 = xor.yx; + v15 = v15.yx ^ v0.yx; // c = c + d v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x)); @@ -3048,8 +2979,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v12 ^ v1; - v12 = xor.yx; + v12 = v12.yx ^ v1.yx; // c = c + d v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x)); @@ -3090,8 +3020,7 @@ fn main(id: vec3) { v2 = v2 + m1 + vec2(0u, u32(v2.x + m1.x < v2.x)); // d = rotr64(d ^ a, 32) - xor = v13 ^ v2; - v13 = xor.yx; + v13 = v13.yx ^ v2.yx; // c = c + d v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x)); @@ -3132,8 +3061,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v14 ^ v3; - v14 = xor.yx; + v14 = v14.yx ^ v3.yx; // c = c + d v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x)); @@ -3178,8 +3106,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v12 ^ v0; - v12 = xor.yx; + v12 = v12.yx ^ v0.yx; // c = c + d v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x)); @@ -3220,8 +3147,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v13 ^ v1; - v13 = xor.yx; + v13 = v13.yx ^ v1.yx; // c = c + d v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x)); @@ -3262,8 +3188,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v14 ^ v2; - v14 = xor.yx; + v14 = v14.yx ^ v2.yx; // c = c + d v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x)); @@ -3304,8 +3229,7 @@ fn main(id: vec3) { v3 = v3 + m1 + vec2(0u, u32(v3.x + m1.x < v3.x)); // d = rotr64(d ^ a, 32) - xor = v15 ^ v3; - v15 = xor.yx; + v15 = v15.yx ^ v3.yx; // c = c + d v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x)); @@ -3346,8 +3270,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v15 ^ v0; - v15 = xor.yx; + v15 = v15.yx ^ v0.yx; // c = c + d v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x)); @@ -3388,8 +3311,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v12 ^ v1; - v12 = xor.yx; + v12 = v12.yx ^ v1.yx; // c = c + d v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x)); @@ -3430,8 +3352,7 @@ fn main(id: vec3) { v2 = v2 + m3 + vec2(0u, u32(v2.x + m3.x < v2.x)); // d = rotr64(d ^ a, 32) - xor = v13 ^ v2; - v13 = xor.yx; + v13 = v13.yx ^ v2.yx; // c = c + d v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x)); @@ -3472,8 +3393,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v14 ^ v3; - v14 = xor.yx; + v14 = v14.yx ^ v3.yx; // c = c + d v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x)); @@ -3518,8 +3438,7 @@ fn main(id: vec3) { v0 = v0 + nonce + vec2(0u, u32(v0.x + nonce.x < v0.x)); // d = rotr64(d ^ a, 32) - xor = v12 ^ v0; - v12 = xor.yx; + v12 = v12.yx ^ v0.yx; // c = c + d v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x)); @@ -3560,8 +3479,7 @@ fn main(id: vec3) { v1 = v1 + m2 + vec2(0u, u32(v1.x + m2.x < v1.x)); // d = rotr64(d ^ a, 32) - xor = v13 ^ v1; - v13 = xor.yx; + v13 = v13.yx ^ v1.yx; // c = c + d v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x)); @@ -3602,8 +3520,7 @@ fn main(id: vec3) { v2 = v2 + m4 + vec2(0u, u32(v2.x + m4.x < v2.x)); // d = rotr64(d ^ a, 32) - xor = v14 ^ v2; - v14 = xor.yx; + v14 = v14.yx ^ v2.yx; // c = c + d v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x)); @@ -3644,8 +3561,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v15 ^ v3; - v15 = xor.yx; + v15 = v15.yx ^ v3.yx; // c = c + d v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x)); @@ -3686,8 +3602,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v15 ^ v0; - v15 = xor.yx; + v15 = v15.yx ^ v0.yx; // c = c + d v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x)); @@ -3728,8 +3643,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v12 ^ v1; - v12 = xor.yx; + v12 = v12.yx ^ v1.yx; // c = c + d v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x)); @@ -3770,8 +3684,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v13 ^ v2; - v13 = xor.yx; + v13 = v13.yx ^ v2.yx; // c = c + d v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x)); @@ -3812,8 +3725,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v14 ^ v3; - v14 = xor.yx; + v14 = v14.yx ^ v3.yx; // c = c + d v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x)); @@ -3858,8 +3770,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v12 ^ v0; - v12 = xor.yx; + v12 = v12.yx ^ v0.yx; // c = c + d v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x)); @@ -3900,8 +3811,7 @@ fn main(id: vec3) { v1 = v1 + m4 + vec2(0u, u32(v1.x + m4.x < v1.x)); // d = rotr64(d ^ a, 32) - xor = v13 ^ v1; - v13 = xor.yx; + v13 = v13.yx ^ v1.yx; // c = c + d v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x)); @@ -3942,8 +3852,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v14 ^ v2; - v14 = xor.yx; + v14 = v14.yx ^ v2.yx; // c = c + d v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x)); @@ -3984,8 +3893,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v15 ^ v3; - v15 = xor.yx; + v15 = v15.yx ^ v3.yx; // c = c + d v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x)); @@ -4026,8 +3934,7 @@ fn main(id: vec3) { v0 = v0 + m1 + vec2(0u, u32(v0.x + m1.x < v0.x)); // d = rotr64(d ^ a, 32) - xor = v15 ^ v0; - v15 = xor.yx; + v15 = v15.yx ^ v0.yx; // c = c + d v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x)); @@ -4068,8 +3975,7 @@ fn main(id: vec3) { v1 = v1 + nonce + vec2(0u, u32(v1.x + nonce.x < v1.x)); // d = rotr64(d ^ a, 32) - xor = v12 ^ v1; - v12 = xor.yx; + v12 = v12.yx ^ v1.yx; // c = c + d v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x)); @@ -4110,8 +4016,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v13 ^ v2; - v13 = xor.yx; + v13 = v13.yx ^ v2.yx; // c = c + d v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x)); @@ -4152,8 +4057,7 @@ fn main(id: vec3) { // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor = v14 ^ v3; - v14 = xor.yx; + v14 = v14.yx ^ v3.yx; // c = c + d v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x));