From: Chris Duncan Date: Tue, 14 Jan 2025 14:54:11 +0000 (-0800) Subject: Replace more scalar addition with vector addition. Interestingly, found that commenti... X-Git-Url: https://zoso.dev/?a=commitdiff_plain;h=00da999f8bb23486dc37519287bc09b802ae0772;p=nano-pow.git Replace more scalar addition with vector addition. Interestingly, found that commenting out ` v24 = v_2425.x;` or `v25 = v_2425.y;` still results in a correct nonce. This presents an opportunity to prune even more instructions, so this will be pursued in a separate branch. --- diff --git a/src/shaders/compute.wgsl b/src/shaders/compute.wgsl index 4dcc796..e59c9b7 100644 --- a/src/shaders/compute.wgsl +++ b/src/shaders/compute.wgsl @@ -127,8 +127,10 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - // v1 = v1 + v9 + select(0u, 1u, v0 + v8 < v0); - // v0 = v0 + v8; + v_01.x = v0; + v_01.y = v1; + v_89.x = v8; + v_89.y = v9; v_01 = v_01 + v_89 + select(vec2(0u), vec2(0u, 1u), v_01.x + v_89.x < v_01.x); v0 = v_01.x; v1 = v_01.y; @@ -150,11 +152,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = xor0; // c = c + d - o0 = v16 + v24; - o1 = v17 + v25; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2425.x = v24; + v_2425.y = v25; + v_1617 = v_1617 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2425.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 24) xor0 = v8 ^ v16; @@ -163,9 +169,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v9 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - v1 = v1 + v9; - v1 = v1 + select(0u, 1u, v0 + v8 < v0); - v0 = v0 + v8; + v_01.x = v0; + v_01.y = v1; + v_89.x = v8; + v_89.y = v9; + v_01 = v_01 + v_89 + select(vec2(0u), vec2(0u, 1u), v_01.x + v_89.x < v_01.x); + v0 = v_01.x; + v1 = v_01.y; + v8 = v_89.x; + v9 = v_89.y; // a = a + m[sigma[r][2*i+1]] o0 = v0 + m2; @@ -181,11 +193,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v16 + v24; - o1 = v17 + v25; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2425.x = v24; + v_2425.y = v25; + v_1617 = v_1617 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2425.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 63) xor0 = v8 ^ v16; @@ -202,19 +218,16 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b + v_23.x = v2; + v_23.y = v3; + v_1011.x = v10; + v_1011.y = v11; v_23 = v_23 + v_1011 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1011.x < v_23.x); v2 = v_23.x; v3 = v_23.y; v10 = v_1011.x; v11 = v_1011.y; - // o0 = v2 + v10; - // o1 = v3 + v11; - // o1 = o1 + select(0u, 1u, o0 < v2); - // v2 = o0; - // v3 = o1; - - // a = a + m[sigma[r][2*i+0]] o0 = v2 + m4; o1 = v3 + m5; @@ -242,11 +255,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v11 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v2 + v10; - o1 = v3 + v11; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1011.x = v10; + v_1011.y = v11; + v_23 = v_23 + v_1011 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1011.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v10 = v_1011.x; + v11 = v_1011.y; // a = a + m[sigma[r][2*i+1]] o0 = v2 + m6; @@ -764,9 +781,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - v1 = v1 + v9; - v1 = v1 + select(0u, 1u, v0 + v8 < v0); - v0 = v0 + v8; + v_01.x = v0; + v_01.y = v1; + v_89.x = v8; + v_89.y = v9; + v_01 = v_01 + v_89 + select(vec2(0u), vec2(0u, 1u), v_01.x + v_89.x < v_01.x); + v0 = v_01.x; + v1 = v_01.y; + v8 = v_89.x; + v9 = v_89.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -785,11 +808,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = xor0; // c = c + d - o0 = v16 + v24; - o1 = v17 + v25; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2425.x = v24; + v_2425.y = v25; + v_1617 = v_1617 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2425.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 24) xor0 = v8 ^ v16; @@ -798,9 +825,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v9 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - v1 = v1 + v9; - v1 = v1 + select(0u, 1u, v0 + v8 < v0); - v0 = v0 + v8; + v_01.x = v0; + v_01.y = v1; + v_89.x = v8; + v_89.y = v9; + v_01 = v_01 + v_89 + select(vec2(0u), vec2(0u, 1u), v_01.x + v_89.x < v_01.x); + v0 = v_01.x; + v1 = v_01.y; + v8 = v_89.x; + v9 = v_89.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -819,11 +852,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v16 + v24; - o1 = v17 + v25; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2425.x = v24; + v_2425.y = v25; + v_1617 = v_1617 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2425.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 63) xor0 = v8 ^ v16; @@ -840,11 +877,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v2 + v10; - o1 = v3 + v11; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1011.x = v10; + v_1011.y = v11; + v_23 = v_23 + v_1011 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1011.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v10 = v_1011.x; + v11 = v_1011.y; // a = a + m[sigma[r][2*i+0]] o0 = v2 + m8; @@ -873,11 +914,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v11 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v2 + v10; - o1 = v3 + v11; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1011.x = v10; + v_1011.y = v11; + v_23 = v_23 + v_1011 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1011.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v10 = v_1011.x; + v11 = v_1011.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -1389,9 +1434,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - v1 = v1 + v9; - v1 = v1 + select(0u, 1u, v0 + v8 < v0); - v0 = v0 + v8; + v_01.x = v0; + v_01.y = v1; + v_89.x = v8; + v_89.y = v9; + v_01 = v_01 + v_89 + select(vec2(0u), vec2(0u, 1u), v_01.x + v_89.x < v_01.x); + v0 = v_01.x; + v1 = v_01.y; + v8 = v_89.x; + v9 = v_89.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -1410,11 +1461,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = xor0; // c = c + d - o0 = v16 + v24; - o1 = v17 + v25; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2425.x = v24; + v_2425.y = v25; + v_1617 = v_1617 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2425.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 24) xor0 = v8 ^ v16; @@ -1423,9 +1478,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v9 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - v1 = v1 + v9; - v1 = v1 + select(0u, 1u, v0 + v8 < v0); - v0 = v0 + v8; + v_01.x = v0; + v_01.y = v1; + v_89.x = v8; + v_89.y = v9; + v_01 = v_01 + v_89 + select(vec2(0u), vec2(0u, 1u), v_01.x + v_89.x < v_01.x); + v0 = v_01.x; + v1 = v_01.y; + v8 = v_89.x; + v9 = v_89.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -1444,11 +1505,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v16 + v24; - o1 = v17 + v25; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2425.x = v24; + v_2425.y = v25; + v_1617 = v_1617 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2425.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 63) xor0 = v8 ^ v16; @@ -1465,11 +1530,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v2 + v10; - o1 = v3 + v11; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1011.x = v10; + v_1011.y = v11; + v_23 = v_23 + v_1011 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1011.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v10 = v_1011.x; + v11 = v_1011.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -1501,11 +1570,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v11 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v2 + v10; - o1 = v3 + v11; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1011.x = v10; + v_1011.y = v11; + v_23 = v_23 + v_1011 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1011.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v10 = v_1011.x; + v11 = v_1011.y; // a = a + m[sigma[r][2*i+1]] o0 = v2 + m0; @@ -2014,9 +2087,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - v1 = v1 + v9; - v1 = v1 + select(0u, 1u, v0 + v8 < v0); - v0 = v0 + v8; + v_01.x = v0; + v_01.y = v1; + v_89.x = v8; + v_89.y = v9; + v_01 = v_01 + v_89 + select(vec2(0u), vec2(0u, 1u), v_01.x + v_89.x < v_01.x); + v0 = v_01.x; + v1 = v_01.y; + v8 = v_89.x; + v9 = v_89.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -2035,11 +2114,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = xor0; // c = c + d - o0 = v16 + v24; - o1 = v17 + v25; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2425.x = v24; + v_2425.y = v25; + v_1617 = v_1617 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2425.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 24) xor0 = v8 ^ v16; @@ -2048,9 +2131,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v9 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - v1 = v1 + v9; - v1 = v1 + select(0u, 1u, v0 + v8 < v0); - v0 = v0 + v8; + v_01.x = v0; + v_01.y = v1; + v_89.x = v8; + v_89.y = v9; + v_01 = v_01 + v_89 + select(vec2(0u), vec2(0u, 1u), v_01.x + v_89.x < v_01.x); + v0 = v_01.x; + v1 = v_01.y; + v8 = v_89.x; + v9 = v_89.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -2069,11 +2158,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v16 + v24; - o1 = v17 + v25; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2425.x = v24; + v_2425.y = v25; + v_1617 = v_1617 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2425.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 63) xor0 = v8 ^ v16; @@ -2090,11 +2183,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v2 + v10; - o1 = v3 + v11; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1011.x = v10; + v_1011.y = v11; + v_23 = v_23 + v_1011 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1011.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v10 = v_1011.x; + v11 = v_1011.y; // a = a + m[sigma[r][2*i+0]] o0 = v2 + m6; @@ -2123,11 +2220,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v11 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v2 + v10; - o1 = v3 + v11; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1011.x = v10; + v_1011.y = v11; + v_23 = v_23 + v_1011 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1011.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v10 = v_1011.x; + v11 = v_1011.y; // a = a + m[sigma[r][2*i+1]] o0 = v2 + m2; @@ -2639,9 +2740,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - v1 = v1 + v9; - v1 = v1 + select(0u, 1u, v0 + v8 < v0); - v0 = v0 + v8; + v_01.x = v0; + v_01.y = v1; + v_89.x = v8; + v_89.y = v9; + v_01 = v_01 + v_89 + select(vec2(0u), vec2(0u, 1u), v_01.x + v_89.x < v_01.x); + v0 = v_01.x; + v1 = v_01.y; + v8 = v_89.x; + v9 = v_89.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -2660,11 +2767,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = xor0; // c = c + d - o0 = v16 + v24; - o1 = v17 + v25; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2425.x = v24; + v_2425.y = v25; + v_1617 = v_1617 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2425.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 24) xor0 = v8 ^ v16; @@ -2673,9 +2784,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v9 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - v1 = v1 + v9; - v1 = v1 + select(0u, 1u, v0 + v8 < v0); - v0 = v0 + v8; + v_01.x = v0; + v_01.y = v1; + v_89.x = v8; + v_89.y = v9; + v_01 = v_01 + v_89 + select(vec2(0u), vec2(0u, 1u), v_01.x + v_89.x < v_01.x); + v0 = v_01.x; + v1 = v_01.y; + v8 = v_89.x; + v9 = v_89.y; // a = a + m[sigma[r][2*i+1]] o0 = v0 + m0; @@ -2691,11 +2808,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v16 + v24; - o1 = v17 + v25; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2425.x = v24; + v_2425.y = v25; + v_1617 = v_1617 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2425.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 63) xor0 = v8 ^ v16; @@ -2712,11 +2833,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v2 + v10; - o1 = v3 + v11; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1011.x = v10; + v_1011.y = v11; + v_23 = v_23 + v_1011 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1011.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v10 = v_1011.x; + v11 = v_1011.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -2748,11 +2873,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v11 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v2 + v10; - o1 = v3 + v11; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1011.x = v10; + v_1011.y = v11; + v_23 = v_23 + v_1011 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1011.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v10 = v_1011.x; + v11 = v_1011.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -3264,9 +3393,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - v1 = v1 + v9; - v1 = v1 + select(0u, 1u, v0 + v8 < v0); - v0 = v0 + v8; + v_01.x = v0; + v_01.y = v1; + v_89.x = v8; + v_89.y = v9; + v_01 = v_01 + v_89 + select(vec2(0u), vec2(0u, 1u), v_01.x + v_89.x < v_01.x); + v0 = v_01.x; + v1 = v_01.y; + v8 = v_89.x; + v9 = v_89.y; // a = a + m[sigma[r][2*i+0]] o0 = v0 + m4; @@ -3282,11 +3417,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = xor0; // c = c + d - o0 = v16 + v24; - o1 = v17 + v25; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2425.x = v24; + v_2425.y = v25; + v_1617 = v_1617 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2425.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 24) xor0 = v8 ^ v16; @@ -3295,9 +3434,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v9 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - v1 = v1 + v9; - v1 = v1 + select(0u, 1u, v0 + v8 < v0); - v0 = v0 + v8; + v_01.x = v0; + v_01.y = v1; + v_89.x = v8; + v_89.y = v9; + v_01 = v_01 + v_89 + select(vec2(0u), vec2(0u, 1u), v_01.x + v_89.x < v_01.x); + v0 = v_01.x; + v1 = v_01.y; + v8 = v_89.x; + v9 = v_89.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -3316,11 +3461,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v16 + v24; - o1 = v17 + v25; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2425.x = v24; + v_2425.y = v25; + v_1617 = v_1617 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2425.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 63) xor0 = v8 ^ v16; @@ -3337,11 +3486,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v2 + v10; - o1 = v3 + v11; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1011.x = v10; + v_1011.y = v11; + v_23 = v_23 + v_1011 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1011.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v10 = v_1011.x; + v11 = v_1011.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -3373,11 +3526,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v11 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v2 + v10; - o1 = v3 + v11; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1011.x = v10; + v_1011.y = v11; + v_23 = v_23 + v_1011 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1011.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v10 = v_1011.x; + v11 = v_1011.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -3889,9 +4046,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - v1 = v1 + v9; - v1 = v1 + select(0u, 1u, v0 + v8 < v0); - v0 = v0 + v8; + v_01.x = v0; + v_01.y = v1; + v_89.x = v8; + v_89.y = v9; + v_01 = v_01 + v_89 + select(vec2(0u), vec2(0u, 1u), v_01.x + v_89.x < v_01.x); + v0 = v_01.x; + v1 = v_01.y; + v8 = v_89.x; + v9 = v_89.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -3910,11 +4073,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = xor0; // c = c + d - o0 = v16 + v24; - o1 = v17 + v25; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2425.x = v24; + v_2425.y = v25; + v_1617 = v_1617 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2425.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 24) xor0 = v8 ^ v16; @@ -3923,9 +4090,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v9 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - v1 = v1 + v9; - v1 = v1 + select(0u, 1u, v0 + v8 < v0); - v0 = v0 + v8; + v_01.x = v0; + v_01.y = v1; + v_89.x = v8; + v_89.y = v9; + v_01 = v_01 + v_89 + select(vec2(0u), vec2(0u, 1u), v_01.x + v_89.x < v_01.x); + v0 = v_01.x; + v1 = v_01.y; + v8 = v_89.x; + v9 = v_89.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -3944,11 +4117,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v16 + v24; - o1 = v17 + v25; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2425.x = v24; + v_2425.y = v25; + v_1617 = v_1617 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2425.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 63) xor0 = v8 ^ v16; @@ -3965,11 +4142,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v2 + v10; - o1 = v3 + v11; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1011.x = v10; + v_1011.y = v11; + v_23 = v_23 + v_1011 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1011.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v10 = v_1011.x; + v11 = v_1011.y; // a = a + m[sigma[r][2*i+0]] o0 = v2 + m2; @@ -3998,11 +4179,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v11 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v2 + v10; - o1 = v3 + v11; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1011.x = v10; + v_1011.y = v11; + v_23 = v_23 + v_1011 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1011.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v10 = v_1011.x; + v11 = v_1011.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -4514,9 +4699,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - v1 = v1 + v9; - v1 = v1 + select(0u, 1u, v0 + v8 < v0); - v0 = v0 + v8; + v_01.x = v0; + v_01.y = v1; + v_89.x = v8; + v_89.y = v9; + v_01 = v_01 + v_89 + select(vec2(0u), vec2(0u, 1u), v_01.x + v_89.x < v_01.x); + v0 = v_01.x; + v1 = v_01.y; + v8 = v_89.x; + v9 = v_89.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -4535,11 +4726,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = xor0; // c = c + d - o0 = v16 + v24; - o1 = v17 + v25; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2425.x = v24; + v_2425.y = v25; + v_1617 = v_1617 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2425.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 24) xor0 = v8 ^ v16; @@ -4548,9 +4743,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v9 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - v1 = v1 + v9; - v1 = v1 + select(0u, 1u, v0 + v8 < v0); - v0 = v0 + v8; + v_01.x = v0; + v_01.y = v1; + v_89.x = v8; + v_89.y = v9; + v_01 = v_01 + v_89 + select(vec2(0u), vec2(0u, 1u), v_01.x + v_89.x < v_01.x); + v0 = v_01.x; + v1 = v_01.y; + v8 = v_89.x; + v9 = v_89.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -4569,11 +4770,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v16 + v24; - o1 = v17 + v25; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2425.x = v24; + v_2425.y = v25; + v_1617 = v_1617 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2425.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 63) xor0 = v8 ^ v16; @@ -4590,11 +4795,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v2 + v10; - o1 = v3 + v11; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1011.x = v10; + v_1011.y = v11; + v_23 = v_23 + v_1011 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1011.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v10 = v_1011.x; + v11 = v_1011.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -4626,11 +4835,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v11 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v2 + v10; - o1 = v3 + v11; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1011.x = v10; + v_1011.y = v11; + v_23 = v_23 + v_1011 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1011.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v10 = v_1011.x; + v11 = v_1011.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -5139,9 +5352,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - v1 = v1 + v9; - v1 = v1 + select(0u, 1u, v0 + v8 < v0); - v0 = v0 + v8; + v_01.x = v0; + v_01.y = v1; + v_89.x = v8; + v_89.y = v9; + v_01 = v_01 + v_89 + select(vec2(0u), vec2(0u, 1u), v_01.x + v_89.x < v_01.x); + v0 = v_01.x; + v1 = v_01.y; + v8 = v_89.x; + v9 = v_89.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -5160,11 +5379,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = xor0; // c = c + d - o0 = v16 + v24; - o1 = v17 + v25; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2425.x = v24; + v_2425.y = v25; + v_1617 = v_1617 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2425.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 24) xor0 = v8 ^ v16; @@ -5173,9 +5396,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v9 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - v1 = v1 + v9; - v1 = v1 + select(0u, 1u, v0 + v8 < v0); - v0 = v0 + v8; + v_01.x = v0; + v_01.y = v1; + v_89.x = v8; + v_89.y = v9; + v_01 = v_01 + v_89 + select(vec2(0u), vec2(0u, 1u), v_01.x + v_89.x < v_01.x); + v0 = v_01.x; + v1 = v_01.y; + v8 = v_89.x; + v9 = v_89.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -5194,11 +5423,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v16 + v24; - o1 = v17 + v25; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2425.x = v24; + v_2425.y = v25; + v_1617 = v_1617 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2425.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 63) xor0 = v8 ^ v16; @@ -5215,11 +5448,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v2 + v10; - o1 = v3 + v11; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1011.x = v10; + v_1011.y = v11; + v_23 = v_23 + v_1011 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1011.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v10 = v_1011.x; + v11 = v_1011.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -5251,11 +5488,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v11 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v2 + v10; - o1 = v3 + v11; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1011.x = v10; + v_1011.y = v11; + v_23 = v_23 + v_1011 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1011.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v10 = v_1011.x; + v11 = v_1011.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -5764,9 +6005,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - v1 = v1 + v9; - v1 = v1 + select(0u, 1u, v0 + v8 < v0); - v0 = v0 + v8; + v_01.x = v0; + v_01.y = v1; + v_89.x = v8; + v_89.y = v9; + v_01 = v_01 + v_89 + select(vec2(0u), vec2(0u, 1u), v_01.x + v_89.x < v_01.x); + v0 = v_01.x; + v1 = v_01.y; + v8 = v_89.x; + v9 = v_89.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -5785,11 +6032,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = xor0; // c = c + d - o0 = v16 + v24; - o1 = v17 + v25; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2425.x = v24; + v_2425.y = v25; + v_1617 = v_1617 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2425.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 24) xor0 = v8 ^ v16; @@ -5798,9 +6049,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v9 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - v1 = v1 + v9; - v1 = v1 + select(0u, 1u, v0 + v8 < v0); - v0 = v0 + v8; + v_01.x = v0; + v_01.y = v1; + v_89.x = v8; + v_89.y = v9; + v_01 = v_01 + v_89 + select(vec2(0u), vec2(0u, 1u), v_01.x + v_89.x < v_01.x); + v0 = v_01.x; + v1 = v_01.y; + v8 = v_89.x; + v9 = v_89.y; // a = a + m[sigma[r][2*i+1]] o0 = v0 + m4; @@ -5816,11 +6073,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v16 + v24; - o1 = v17 + v25; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2425.x = v24; + v_2425.y = v25; + v_1617 = v_1617 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2425.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 63) xor0 = v8 ^ v16; @@ -5837,11 +6098,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v2 + v10; - o1 = v3 + v11; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1011.x = v10; + v_1011.y = v11; + v_23 = v_23 + v_1011 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1011.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v10 = v_1011.x; + v11 = v_1011.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -5873,11 +6138,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v11 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v2 + v10; - o1 = v3 + v11; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1011.x = v10; + v_1011.y = v11; + v_23 = v_23 + v_1011 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1011.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v10 = v_1011.x; + v11 = v_1011.y; // a = a + m[sigma[r][2*i+1]] o0 = v2 + m8; @@ -6389,9 +6658,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - v1 = v1 + v9; - v1 = v1 + select(0u, 1u, v0 + v8 < v0); - v0 = v0 + v8; + v_01.x = v0; + v_01.y = v1; + v_89.x = v8; + v_89.y = v9; + v_01 = v_01 + v_89 + select(vec2(0u), vec2(0u, 1u), v_01.x + v_89.x < v_01.x); + v0 = v_01.x; + v1 = v_01.y; + v8 = v_89.x; + v9 = v_89.y; // a = a + m[sigma[r][2*i+0]] o0 = v0 + m0; @@ -6407,11 +6682,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = xor0; // c = c + d - o0 = v16 + v24; - o1 = v17 + v25; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2425.x = v24; + v_2425.y = v25; + v_1617 = v_1617 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2425.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 24) xor0 = v8 ^ v16; @@ -6420,9 +6699,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v9 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - v1 = v1 + v9; - v1 = v1 + select(0u, 1u, v0 + v8 < v0); - v0 = v0 + v8; + v_01.x = v0; + v_01.y = v1; + v_89.x = v8; + v_89.y = v9; + v_01 = v_01 + v_89 + select(vec2(0u), vec2(0u, 1u), v_01.x + v_89.x < v_01.x); + v0 = v_01.x; + v1 = v_01.y; + v8 = v_89.x; + v9 = v_89.y; // a = a + m[sigma[r][2*i+1]] o0 = v0 + m2; @@ -6438,11 +6723,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v16 + v24; - o1 = v17 + v25; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2425.x = v24; + v_2425.y = v25; + v_1617 = v_1617 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2425.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 63) xor0 = v8 ^ v16; @@ -6459,11 +6748,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v2 + v10; - o1 = v3 + v11; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1011.x = v10; + v_1011.y = v11; + v_23 = v_23 + v_1011 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1011.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v10 = v_1011.x; + v11 = v_1011.y; // a = a + m[sigma[r][2*i+0]] o0 = v2 + m4; @@ -6492,11 +6785,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v11 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v2 + v10; - o1 = v3 + v11; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1011.x = v10; + v_1011.y = v11; + v_23 = v_23 + v_1011 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1011.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v10 = v_1011.x; + v11 = v_1011.y; // a = a + m[sigma[r][2*i+1]] o0 = v2 + m6; @@ -7014,9 +7311,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - v1 = v1 + v9; - v1 = v1 + select(0u, 1u, v0 + v8 < v0); - v0 = v0 + v8; + v_01.x = v0; + v_01.y = v1; + v_89.x = v8; + v_89.y = v9; + v_01 = v_01 + v_89 + select(vec2(0u), vec2(0u, 1u), v_01.x + v_89.x < v_01.x); + v0 = v_01.x; + v1 = v_01.y; + v8 = v_89.x; + v9 = v_89.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -7035,11 +7338,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = xor0; // c = c + d - o0 = v16 + v24; - o1 = v17 + v25; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2425.x = v24; + v_2425.y = v25; + v_1617 = v_1617 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2425.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 24) xor0 = v8 ^ v16; @@ -7048,9 +7355,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v9 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - v1 = v1 + v9; - v1 = v1 + select(0u, 1u, v0 + v8 < v0); - v0 = v0 + v8; + v_01.x = v0; + v_01.y = v1; + v_89.x = v8; + v_89.y = v9; + v_01 = v_01 + v_89 + select(vec2(0u), vec2(0u, 1u), v_01.x + v_89.x < v_01.x); + v0 = v_01.x; + v1 = v_01.y; + v8 = v_89.x; + v9 = v_89.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -7069,11 +7382,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v16 + v24; - o1 = v17 + v25; - o1 = o1 + select(0u, 1u, o0 < v16); - v16 = o0; - v17 = o1; + v_1617.x = v16; + v_1617.y = v17; + v_2425.x = v24; + v_2425.y = v25; + v_1617 = v_1617 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_1617.x + v_2425.x < v_1617.x); + v16 = v_1617.x; + v17 = v_1617.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 63) xor0 = v8 ^ v16; @@ -7090,11 +7407,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v2 + v10; - o1 = v3 + v11; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1011.x = v10; + v_1011.y = v11; + v_23 = v_23 + v_1011 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1011.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v10 = v_1011.x; + v11 = v_1011.y; // a = a + m[sigma[r][2*i+0]] o0 = v2 + m8; @@ -7123,11 +7444,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v11 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v2 + v10; - o1 = v3 + v11; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_1011.x = v10; + v_1011.y = v11; + v_23 = v_23 + v_1011 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1011.x < v_23.x); + v2 = v_23.x; + v3 = v_23.y; + v10 = v_1011.x; + v11 = v_1011.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing