From: Chris Duncan Date: Tue, 14 Jan 2025 16:31:19 +0000 (-0800) Subject: Replace more scalar addition with vector addition. X-Git-Tag: v2.0.0~132 X-Git-Url: https://zoso.dev/?a=commitdiff_plain;h=a23ab317276d500215b2dbf60e0068f3470031e3;p=nano-pow.git Replace more scalar addition with vector addition. --- diff --git a/src/shaders/compute.wgsl b/src/shaders/compute.wgsl index c3fd9de..d3b1628 100644 --- a/src/shaders/compute.wgsl +++ b/src/shaders/compute.wgsl @@ -619,11 +619,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = xor0; // c = c + d - o0 = v22 + v24; - o1 = v23 + v25; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_2425.x = v24; + v_2425.y = v25; + v_2223 = v_2223 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_2425.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 24) xor0 = v12 ^ v22; @@ -659,11 +663,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v22 + v24; - o1 = v23 + v25; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_2425.x = v24; + v_2425.y = v25; + v_2223 = v_2223 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_2425.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 63) xor0 = v12 ^ v22; @@ -680,11 +688,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v4 + v14; - o1 = v5 + v15; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1415.x = v14; + v_1415.y = v15; + v_45 = v_45 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1415.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v14 = v_1415.x; + v15 = v_1415.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -716,11 +728,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v15 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v4 + v14; - o1 = v5 + v15; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1415.x = v14; + v_1415.y = v15; + v_45 = v_45 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1415.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v14 = v_1415.x; + v15 = v_1415.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -1342,11 +1358,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = xor0; // c = c + d - o0 = v22 + v24; - o1 = v23 + v25; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_2425.x = v24; + v_2425.y = v25; + v_2223 = v_2223 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_2425.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 24) xor0 = v12 ^ v22; @@ -1379,11 +1399,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v22 + v24; - o1 = v23 + v25; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_2425.x = v24; + v_2425.y = v25; + v_2223 = v_2223 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_2425.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 63) xor0 = v12 ^ v22; @@ -1400,11 +1424,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v4 + v14; - o1 = v5 + v15; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1415.x = v14; + v_1415.y = v15; + v_45 = v_45 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1415.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v14 = v_1415.x; + v15 = v_1415.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -1436,11 +1464,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v15 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v4 + v14; - o1 = v5 + v15; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1415.x = v14; + v_1415.y = v15; + v_45 = v_45 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1415.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v14 = v_1415.x; + v15 = v_1415.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -2059,11 +2091,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = xor0; // c = c + d - o0 = v22 + v24; - o1 = v23 + v25; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_2425.x = v24; + v_2425.y = v25; + v_2223 = v_2223 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_2425.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 24) xor0 = v12 ^ v22; @@ -2099,11 +2135,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v22 + v24; - o1 = v23 + v25; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_2425.x = v24; + v_2425.y = v25; + v_2223 = v_2223 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_2425.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 63) xor0 = v12 ^ v22; @@ -2120,11 +2160,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v4 + v14; - o1 = v5 + v15; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1415.x = v14; + v_1415.y = v15; + v_45 = v_45 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1415.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v14 = v_1415.x; + v15 = v_1415.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -2156,11 +2200,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v15 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v4 + v14; - o1 = v5 + v15; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1415.x = v14; + v_1415.y = v15; + v_45 = v_45 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1415.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v14 = v_1415.x; + v15 = v_1415.y; // a = a + m[sigma[r][2*i+1]] o0 = v4 + m2; @@ -2776,11 +2824,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = xor0; // c = c + d - o0 = v22 + v24; - o1 = v23 + v25; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_2425.x = v24; + v_2425.y = v25; + v_2223 = v_2223 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_2425.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 24) xor0 = v12 ^ v22; @@ -2816,11 +2868,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v22 + v24; - o1 = v23 + v25; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_2425.x = v24; + v_2425.y = v25; + v_2223 = v_2223 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_2425.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 63) xor0 = v12 ^ v22; @@ -2837,11 +2893,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v4 + v14; - o1 = v5 + v15; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1415.x = v14; + v_1415.y = v15; + v_45 = v_45 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1415.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v14 = v_1415.x; + v15 = v_1415.y; // a = a + m[sigma[r][2*i+0]] o0 = v4 + m8; @@ -2870,11 +2930,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v15 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v4 + v14; - o1 = v5 + v15; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1415.x = v14; + v_1415.y = v15; + v_45 = v_45 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1415.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v14 = v_1415.x; + v15 = v_1415.y; // a = a + m[sigma[r][2*i+1]] o0 = v4 + m0; @@ -3490,11 +3554,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = xor0; // c = c + d - o0 = v22 + v24; - o1 = v23 + v25; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_2425.x = v24; + v_2425.y = v25; + v_2223 = v_2223 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_2425.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 24) xor0 = v12 ^ v22; @@ -3530,11 +3598,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v22 + v24; - o1 = v23 + v25; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_2425.x = v24; + v_2425.y = v25; + v_2223 = v_2223 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_2425.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 63) xor0 = v12 ^ v22; @@ -3551,11 +3623,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v4 + v14; - o1 = v5 + v15; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1415.x = v14; + v_1415.y = v15; + v_45 = v_45 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1415.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v14 = v_1415.x; + v15 = v_1415.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -3587,11 +3663,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v15 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v4 + v14; - o1 = v5 + v15; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1415.x = v14; + v_1415.y = v15; + v_45 = v_45 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1415.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v14 = v_1415.x; + v15 = v_1415.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -4207,11 +4287,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = xor0; // c = c + d - o0 = v22 + v24; - o1 = v23 + v25; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_2425.x = v24; + v_2425.y = v25; + v_2223 = v_2223 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_2425.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 24) xor0 = v12 ^ v22; @@ -4247,11 +4331,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v22 + v24; - o1 = v23 + v25; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_2425.x = v24; + v_2425.y = v25; + v_2223 = v_2223 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_2425.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 63) xor0 = v12 ^ v22; @@ -4268,11 +4356,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v4 + v14; - o1 = v5 + v15; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1415.x = v14; + v_1415.y = v15; + v_45 = v_45 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1415.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v14 = v_1415.x; + v15 = v_1415.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -4304,11 +4396,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v15 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v4 + v14; - o1 = v5 + v15; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1415.x = v14; + v_1415.y = v15; + v_45 = v_45 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1415.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v14 = v_1415.x; + v15 = v_1415.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -4927,11 +5023,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = xor0; // c = c + d - o0 = v22 + v24; - o1 = v23 + v25; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_2425.x = v24; + v_2425.y = v25; + v_2223 = v_2223 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_2425.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 24) xor0 = v12 ^ v22; @@ -4964,11 +5064,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v22 + v24; - o1 = v23 + v25; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_2425.x = v24; + v_2425.y = v25; + v_2223 = v_2223 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_2425.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 63) xor0 = v12 ^ v22; @@ -4985,11 +5089,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v4 + v14; - o1 = v5 + v15; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1415.x = v14; + v_1415.y = v15; + v_45 = v_45 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1415.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v14 = v_1415.x; + v15 = v_1415.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -5021,11 +5129,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v15 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v4 + v14; - o1 = v5 + v15; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1415.x = v14; + v_1415.y = v15; + v_45 = v_45 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1415.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v14 = v_1415.x; + v15 = v_1415.y; // a = a + m[sigma[r][2*i+1]] o0 = v4 + m4; @@ -5644,11 +5756,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = xor0; // c = c + d - o0 = v22 + v24; - o1 = v23 + v25; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_2425.x = v24; + v_2425.y = v25; + v_2223 = v_2223 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_2425.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 24) xor0 = v12 ^ v22; @@ -5681,11 +5797,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v22 + v24; - o1 = v23 + v25; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_2425.x = v24; + v_2425.y = v25; + v_2223 = v_2223 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_2425.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 63) xor0 = v12 ^ v22; @@ -5702,11 +5822,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v4 + v14; - o1 = v5 + v15; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1415.x = v14; + v_1415.y = v15; + v_45 = v_45 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1415.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v14 = v_1415.x; + v15 = v_1415.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -5738,11 +5862,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v15 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v4 + v14; - o1 = v5 + v15; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1415.x = v14; + v_1415.y = v15; + v_45 = v_45 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1415.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v14 = v_1415.x; + v15 = v_1415.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -6361,11 +6489,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = xor0; // c = c + d - o0 = v22 + v24; - o1 = v23 + v25; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_2425.x = v24; + v_2425.y = v25; + v_2223 = v_2223 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_2425.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 24) xor0 = v12 ^ v22; @@ -6401,11 +6533,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v22 + v24; - o1 = v23 + v25; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_2425.x = v24; + v_2425.y = v25; + v_2223 = v_2223 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_2425.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 63) xor0 = v12 ^ v22; @@ -6422,11 +6558,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v4 + v14; - o1 = v5 + v15; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1415.x = v14; + v_1415.y = v15; + v_45 = v_45 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1415.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v14 = v_1415.x; + v15 = v_1415.y; // a = a + m[sigma[r][2*i+0]] o0 = v4 + m2; @@ -6455,11 +6595,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v15 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v4 + v14; - o1 = v5 + v15; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1415.x = v14; + v_1415.y = v15; + v_45 = v_45 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1415.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v14 = v_1415.x; + v15 = v_1415.y; // a = a + m[sigma[r][2*i+1]] o0 = v4 + m8; @@ -7078,11 +7222,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = xor0; // c = c + d - o0 = v22 + v24; - o1 = v23 + v25; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_2425.x = v24; + v_2425.y = v25; + v_2223 = v_2223 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_2425.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 24) xor0 = v12 ^ v22; @@ -7118,11 +7266,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v22 + v24; - o1 = v23 + v25; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_2425.x = v24; + v_2425.y = v25; + v_2223 = v_2223 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_2425.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 63) xor0 = v12 ^ v22; @@ -7139,11 +7291,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v4 + v14; - o1 = v5 + v15; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1415.x = v14; + v_1415.y = v15; + v_45 = v_45 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1415.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v14 = v_1415.x; + v15 = v_1415.y; // a = a + m[sigma[r][2*i+0]] o0 = v4 + m6; @@ -7172,11 +7328,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v15 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v4 + v14; - o1 = v5 + v15; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1415.x = v14; + v_1415.y = v15; + v_45 = v_45 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1415.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v14 = v_1415.x; + v15 = v_1415.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -7789,11 +7949,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = xor0; // c = c + d - o0 = v22 + v24; - o1 = v23 + v25; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_2425.x = v24; + v_2425.y = v25; + v_2223 = v_2223 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_2425.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 24) xor0 = v12 ^ v22; @@ -7829,11 +7993,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v22 + v24; - o1 = v23 + v25; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_2425.x = v24; + v_2425.y = v25; + v_2223 = v_2223 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_2425.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 63) xor0 = v12 ^ v22; @@ -7850,11 +8018,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v4 + v14; - o1 = v5 + v15; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1415.x = v14; + v_1415.y = v15; + v_45 = v_45 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1415.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v14 = v_1415.x; + v15 = v_1415.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -7886,11 +8058,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v15 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v4 + v14; - o1 = v5 + v15; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1415.x = v14; + v_1415.y = v15; + v_45 = v_45 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1415.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v14 = v_1415.x; + v15 = v_1415.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing @@ -8512,11 +8688,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = xor0; // c = c + d - o0 = v22 + v24; - o1 = v23 + v25; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_2425.x = v24; + v_2425.y = v25; + v_2223 = v_2223 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_2425.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 24) xor0 = v12 ^ v22; @@ -8549,11 +8729,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v25 = (xor1 >> 16u) ^ (xor0 << 16u); // c = c + d - o0 = v22 + v24; - o1 = v23 + v25; - o1 = o1 + select(0u, 1u, o0 < v22); - v22 = o0; - v23 = o1; + v_2223.x = v22; + v_2223.y = v23; + v_2425.x = v24; + v_2425.y = v25; + v_2223 = v_2223 + v_2425 + select(vec2(0u), vec2(0u, 1u), v_2223.x + v_2425.x < v_2223.x); + v22 = v_2223.x; + v23 = v_2223.y; + v24 = v_2425.x; + v25 = v_2425.y; // b = rotr64(b ^ c, 63) xor0 = v12 ^ v22; @@ -8570,11 +8754,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { */ // a = a + b - o0 = v4 + v14; - o1 = v5 + v15; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1415.x = v14; + v_1415.y = v15; + v_45 = v_45 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1415.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v14 = v_1415.x; + v15 = v_1415.y; // // a = a + m[sigma[r][2*i+0]] // // skip since adding 0u does nothing @@ -8606,11 +8794,15 @@ fn main(@builtin(global_invocation_id) id: vec3) { v15 = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b - o0 = v4 + v14; - o1 = v5 + v15; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_1415.x = v14; + v_1415.y = v15; + v_45 = v_45 + v_1415 + select(vec2(0u), vec2(0u, 1u), v_45.x + v_1415.x < v_45.x); + v4 = v_45.x; + v5 = v_45.y; + v14 = v_1415.x; + v15 = v_1415.y; // // a = a + m[sigma[r][2*i+1]] // // skip since adding 0u does nothing