From c2da559b55aa7f438d0728d71b19672880fa8d78 Mon Sep 17 00:00:00 2001 From: Chris Duncan Date: Wed, 15 Jan 2025 20:57:36 -0800 Subject: [PATCH] Replace selects with simple carry arithmetic since select is still technically a branch. --- src/shaders/compute.wgsl | 888 +++++++++++++++++++-------------------- 1 file changed, 444 insertions(+), 444 deletions(-) diff --git a/src/shaders/compute.wgsl b/src/shaders/compute.wgsl index 2fa1078..a476022 100644 --- a/src/shaders/compute.wgsl +++ b/src/shaders/compute.wgsl @@ -112,34 +112,34 @@ fn main(id: vec3) { */ // a = a + b - v0 = v0 + v4 + select(vec2(0u), vec2(0u, 1u), v0.x + v4.x < v0.x); + v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x)); // a = a + m[sigma[r][2*i+0]] - v0 = v0 + nonce + select(vec2(0u), vec2(0u, 1u), v0.x + nonce.x < v0.x); + v0 = v0 + nonce + vec2(0u, u32(v0.x + nonce.x < v0.x)); // d = rotr64(d ^ a, 32) xor = v12 ^ v0; v12 = vec2(xor.y, xor.x); // c = c + d - v8 = v8 + v12 + select(vec2(0u), vec2(0u, 1u), v8.x + v12.x < v8.x); + v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x)); // b = rotr64(b ^ c, 24) xor = v4 ^ v8; v4 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v0 = v0 + v4 + select(vec2(0u), vec2(0u, 1u), v0.x + v4.x < v0.x); + v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x)); // a = a + m[sigma[r][2*i+1]] - v0 = v0 + m1 + select(vec2(0u), vec2(0u, 1u), v0.x + m1.x < v0.x); + v0 = v0 + m1 + vec2(0u, u32(v0.x + m1.x < v0.x)); // d = rotr64(d ^ a, 16) xor = v12 ^ v0; v12 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v8 = v8 + v12 + select(vec2(0u), vec2(0u, 1u), v8.x + v12.x < v8.x); + v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x)); // b = rotr64(b ^ c, 63) xor = v4 ^ v8; @@ -154,34 +154,34 @@ fn main(id: vec3) { */ // a = a + b - v1 = v1 + v5 + select(vec2(0u), vec2(0u, 1u), v1.x + v5.x < v1.x); + v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x)); // a = a + m[sigma[r][2*i+0]] - v1 = v1 + m2 + select(vec2(0u), vec2(0u, 1u), v1.x + m2.x < v1.x); + v1 = v1 + m2 + vec2(0u, u32(v1.x + m2.x < v1.x)); // d = rotr64(d ^ a, 32) xor = v13 ^ v1; v13 = vec2(xor.y, xor.x); // c = c + d - v9 = v9 + v13 + select(vec2(0u), vec2(0u, 1u), v9.x + v13.x < v9.x); + v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x)); // b = rotr64(b ^ c, 24) xor = v5 ^ v9; v5 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v1 = v1 + v5 + select(vec2(0u), vec2(0u, 1u), v1.x + v5.x < v1.x); + v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x)); // a = a + m[sigma[r][2*i+1]] - v1 = v1 + m3 + select(vec2(0u), vec2(0u, 1u), v1.x + m3.x < v1.x); + v1 = v1 + m3 + vec2(0u, u32(v1.x + m3.x < v1.x)); // d = rotr64(d ^ a, 16) xor = v13 ^ v1; v13 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v9 = v9 + v13 + select(vec2(0u), vec2(0u, 1u), v9.x + v13.x < v9.x); + v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x)); // b = rotr64(b ^ c, 63) xor = v5 ^ v9; @@ -196,24 +196,24 @@ fn main(id: vec3) { */ // a = a + b - v2 = v2 + v6 + select(vec2(0u), vec2(0u, 1u), v2.x + v6.x < v2.x); + v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x)); // a = a + m[sigma[r][2*i+0]] - v2 = v2 + m4 + select(vec2(0u), vec2(0u, 1u), v2.x + m4.x < v2.x); + v2 = v2 + m4 + vec2(0u, u32(v2.x + m4.x < v2.x)); // d = rotr64(d ^ a, 32) xor = v14 ^ v2; v14 = vec2(xor.y, xor.x); // c = c + d - v10 = v10 + v14 + select(vec2(0u), vec2(0u, 1u), v10.x + v14.x < v10.x); + v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x)); // b = rotr64(b ^ c, 24) xor = v6 ^ v10; v6 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v2 = v2 + v6 + select(vec2(0u), vec2(0u, 1u), v2.x + v6.x < v2.x); + v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -223,7 +223,7 @@ fn main(id: vec3) { v14 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v10 = v10 + v14 + select(vec2(0u), vec2(0u, 1u), v10.x + v14.x < v10.x); + v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x)); // b = rotr64(b ^ c, 63) xor = v6 ^ v10; @@ -238,7 +238,7 @@ fn main(id: vec3) { */ // a = a + b - v3 = v3 + v7 + select(vec2(0u), vec2(0u, 1u), v3.x + v7.x < v3.x); + v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -248,14 +248,14 @@ fn main(id: vec3) { v15 = vec2(xor.y, xor.x); // c = c + d - v11 = v11 + v15 + select(vec2(0u), vec2(0u, 1u), v11.x + v15.x < v11.x); + v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x)); // b = rotr64(b ^ c, 24) xor = v7 ^ v11; v7 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v3 = v3 + v7 + select(vec2(0u), vec2(0u, 1u), v3.x + v7.x < v3.x); + v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -265,7 +265,7 @@ fn main(id: vec3) { v15 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v11 = v11 + v15 + select(vec2(0u), vec2(0u, 1u), v11.x + v15.x < v11.x); + v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x)); // b = rotr64(b ^ c, 63) xor = v7 ^ v11; @@ -280,7 +280,7 @@ fn main(id: vec3) { */ // a = a + b - v0 = v0 + v5 + select(vec2(0u), vec2(0u, 1u), v0.x + v5.x < v0.x); + v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -290,14 +290,14 @@ fn main(id: vec3) { v15 = vec2(xor.y, xor.x); // c = c + d - v10 = v10 + v15 + select(vec2(0u), vec2(0u, 1u), v10.x + v15.x < v10.x); + v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x)); // b = rotr64(b ^ c, 24) xor = v5 ^ v10; v5 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v0 = v0 + v5 + select(vec2(0u), vec2(0u, 1u), v0.x + v5.x < v0.x); + v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -307,7 +307,7 @@ fn main(id: vec3) { v15 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v10 = v10 + v15 + select(vec2(0u), vec2(0u, 1u), v10.x + v15.x < v10.x); + v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x)); // b = rotr64(b ^ c, 63) xor = v5 ^ v10; @@ -322,7 +322,7 @@ fn main(id: vec3) { */ // a = a + b - v1 = v1 + v6 + select(vec2(0u), vec2(0u, 1u), v1.x + v6.x < v1.x); + v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -332,14 +332,14 @@ fn main(id: vec3) { v12 = vec2(xor.y, xor.x); // c = c + d - v11 = v11 + v12 + select(vec2(0u), vec2(0u, 1u), v11.x + v12.x < v11.x); + v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x)); // b = rotr64(b ^ c, 24) xor = v6 ^ v11; v6 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v1 = v1 + v6 + select(vec2(0u), vec2(0u, 1u), v1.x + v6.x < v1.x); + v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -349,7 +349,7 @@ fn main(id: vec3) { v12 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v11 = v11 + v12 + select(vec2(0u), vec2(0u, 1u), v11.x + v12.x < v11.x); + v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x)); // b = rotr64(b ^ c, 63) xor = v6 ^ v11; @@ -364,7 +364,7 @@ fn main(id: vec3) { */ // a = a + b - v2 = v2 + v7 + select(vec2(0u), vec2(0u, 1u), v2.x + v7.x < v2.x); + v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -374,14 +374,14 @@ fn main(id: vec3) { v13 = vec2(xor.y, xor.x); // c = c + d - v8 = v8 + v13 + select(vec2(0u), vec2(0u, 1u), v8.x + v13.x < v8.x); + v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x)); // b = rotr64(b ^ c, 24) xor = v7 ^ v8; v7 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v2 = v2 + v7 + select(vec2(0u), vec2(0u, 1u), v2.x + v7.x < v2.x); + v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -391,7 +391,7 @@ fn main(id: vec3) { v13 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v8 = v8 + v13 + select(vec2(0u), vec2(0u, 1u), v8.x + v13.x < v8.x); + v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x)); // b = rotr64(b ^ c, 63) xor = v7 ^ v8; @@ -406,7 +406,7 @@ fn main(id: vec3) { */ // a = a + b - v3 = v3 + v4 + select(vec2(0u), vec2(0u, 1u), v3.x + v4.x < v3.x); + v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -416,14 +416,14 @@ fn main(id: vec3) { v14 = vec2(xor.y, xor.x); // c = c + d - v9 = v9 + v14 + select(vec2(0u), vec2(0u, 1u), v9.x + v14.x < v9.x); + v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x)); // b = rotr64(b ^ c, 24) xor = v4 ^ v9; v4 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v3 = v3 + v4 + select(vec2(0u), vec2(0u, 1u), v3.x + v4.x < v3.x); + v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -433,7 +433,7 @@ fn main(id: vec3) { v14 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v9 = v9 + v14 + select(vec2(0u), vec2(0u, 1u), v9.x + v14.x < v9.x); + v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x)); // b = rotr64(b ^ c, 63) xor = v4 ^ v9; @@ -452,7 +452,7 @@ fn main(id: vec3) { */ // a = a + b - v0 = v0 + v4 + select(vec2(0u), vec2(0u, 1u), v0.x + v4.x < v0.x); + v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -462,14 +462,14 @@ fn main(id: vec3) { v12 = vec2(xor.y, xor.x); // c = c + d - v8 = v8 + v12 + select(vec2(0u), vec2(0u, 1u), v8.x + v12.x < v8.x); + v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x)); // b = rotr64(b ^ c, 24) xor = v4 ^ v8; v4 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v0 = v0 + v4 + select(vec2(0u), vec2(0u, 1u), v0.x + v4.x < v0.x); + v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -479,7 +479,7 @@ fn main(id: vec3) { v12 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v8 = v8 + v12 + select(vec2(0u), vec2(0u, 1u), v8.x + v12.x < v8.x); + v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x)); // b = rotr64(b ^ c, 63) xor = v4 ^ v8; @@ -494,24 +494,24 @@ fn main(id: vec3) { */ // a = a + b - v1 = v1 + v5 + select(vec2(0u), vec2(0u, 1u), v1.x + v5.x < v1.x); + v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x)); // a = a + m[sigma[r][2*i+0]] - v1 = v1 + m4 + select(vec2(0u), vec2(0u, 1u), v1.x + m4.x < v1.x); + v1 = v1 + m4 + vec2(0u, u32(v1.x + m4.x < v1.x)); // d = rotr64(d ^ a, 32) xor = v13 ^ v1; v13 = vec2(xor.y, xor.x); // c = c + d - v9 = v9 + v13 + select(vec2(0u), vec2(0u, 1u), v9.x + v13.x < v9.x); + v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x)); // b = rotr64(b ^ c, 24) xor = v5 ^ v9; v5 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v1 = v1 + v5 + select(vec2(0u), vec2(0u, 1u), v1.x + v5.x < v1.x); + v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -521,7 +521,7 @@ fn main(id: vec3) { v13 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v9 = v9 + v13 + select(vec2(0u), vec2(0u, 1u), v9.x + v13.x < v9.x); + v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x)); // b = rotr64(b ^ c, 63) xor = v5 ^ v9; @@ -536,7 +536,7 @@ fn main(id: vec3) { */ // a = a + b - v2 = v2 + v6 + select(vec2(0u), vec2(0u, 1u), v2.x + v6.x < v2.x); + v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -546,14 +546,14 @@ fn main(id: vec3) { v14 = vec2(xor.y, xor.x); // c = c + d - v10 = v10 + v14 + select(vec2(0u), vec2(0u, 1u), v10.x + v14.x < v10.x); + v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x)); // b = rotr64(b ^ c, 24) xor = v6 ^ v10; v6 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v2 = v2 + v6 + select(vec2(0u), vec2(0u, 1u), v2.x + v6.x < v2.x); + v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -563,7 +563,7 @@ fn main(id: vec3) { v14 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v10 = v10 + v14 + select(vec2(0u), vec2(0u, 1u), v10.x + v14.x < v10.x); + v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x)); // b = rotr64(b ^ c, 63) xor = v6 ^ v10; @@ -578,7 +578,7 @@ fn main(id: vec3) { */ // a = a + b - v3 = v3 + v7 + select(vec2(0u), vec2(0u, 1u), v3.x + v7.x < v3.x); + v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -588,14 +588,14 @@ fn main(id: vec3) { v15 = vec2(xor.y, xor.x); // c = c + d - v11 = v11 + v15 + select(vec2(0u), vec2(0u, 1u), v11.x + v15.x < v11.x); + v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x)); // b = rotr64(b ^ c, 24) xor = v7 ^ v11; v7 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v3 = v3 + v7 + select(vec2(0u), vec2(0u, 1u), v3.x + v7.x < v3.x); + v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -605,7 +605,7 @@ fn main(id: vec3) { v15 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v11 = v11 + v15 + select(vec2(0u), vec2(0u, 1u), v11.x + v15.x < v11.x); + v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x)); // b = rotr64(b ^ c, 63) xor = v7 ^ v11; @@ -620,24 +620,24 @@ fn main(id: vec3) { */ // a = a + b - v0 = v0 + v5 + select(vec2(0u), vec2(0u, 1u), v0.x + v5.x < v0.x); + v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x)); // a = a + m[sigma[r][2*i+0]] - v0 = v0 + m1 + select(vec2(0u), vec2(0u, 1u), v0.x + m1.x < v0.x); + v0 = v0 + m1 + vec2(0u, u32(v0.x + m1.x < v0.x)); // d = rotr64(d ^ a, 32) xor = v15 ^ v0; v15 = vec2(xor.y, xor.x); // c = c + d - v10 = v10 + v15 + select(vec2(0u), vec2(0u, 1u), v10.x + v15.x < v10.x); + v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x)); // b = rotr64(b ^ c, 24) xor = v5 ^ v10; v5 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v0 = v0 + v5 + select(vec2(0u), vec2(0u, 1u), v0.x + v5.x < v0.x); + v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -647,7 +647,7 @@ fn main(id: vec3) { v15 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v10 = v10 + v15 + select(vec2(0u), vec2(0u, 1u), v10.x + v15.x < v10.x); + v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x)); // b = rotr64(b ^ c, 63) xor = v5 ^ v10; @@ -662,34 +662,34 @@ fn main(id: vec3) { */ // a = a + b - v1 = v1 + v6 + select(vec2(0u), vec2(0u, 1u), v1.x + v6.x < v1.x); + v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x)); // a = a + m[sigma[r][2*i+0]] - v1 = v1 + nonce + select(vec2(0u), vec2(0u, 1u), v1.x + nonce.x < v1.x); + v1 = v1 + nonce + vec2(0u, u32(v1.x + nonce.x < v1.x)); // d = rotr64(d ^ a, 32) xor = v12 ^ v1; v12 = vec2(xor.y, xor.x); // c = c + d - v11 = v11 + v12 + select(vec2(0u), vec2(0u, 1u), v11.x + v12.x < v11.x); + v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x)); // b = rotr64(b ^ c, 24) xor = v6 ^ v11; v6 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v1 = v1 + v6 + select(vec2(0u), vec2(0u, 1u), v1.x + v6.x < v1.x); + v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x)); // a = a + m[sigma[r][2*i+1]] - v1 = v1 + m2 + select(vec2(0u), vec2(0u, 1u), v1.x + m2.x < v1.x); + v1 = v1 + m2 + vec2(0u, u32(v1.x + m2.x < v1.x)); // d = rotr64(d ^ a, 16) xor = v12 ^ v1; v12 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v11 = v11 + v12 + select(vec2(0u), vec2(0u, 1u), v11.x + v12.x < v11.x); + v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x)); // b = rotr64(b ^ c, 63) xor = v6 ^ v11; @@ -704,7 +704,7 @@ fn main(id: vec3) { */ // a = a + b - v2 = v2 + v7 + select(vec2(0u), vec2(0u, 1u), v2.x + v7.x < v2.x); + v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -714,14 +714,14 @@ fn main(id: vec3) { v13 = vec2(xor.y, xor.x); // c = c + d - v8 = v8 + v13 + select(vec2(0u), vec2(0u, 1u), v8.x + v13.x < v8.x); + v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x)); // b = rotr64(b ^ c, 24) xor = v7 ^ v8; v7 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v2 = v2 + v7 + select(vec2(0u), vec2(0u, 1u), v2.x + v7.x < v2.x); + v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -731,7 +731,7 @@ fn main(id: vec3) { v13 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v8 = v8 + v13 + select(vec2(0u), vec2(0u, 1u), v8.x + v13.x < v8.x); + v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x)); // b = rotr64(b ^ c, 63) xor = v7 ^ v8; @@ -746,7 +746,7 @@ fn main(id: vec3) { */ // a = a + b - v3 = v3 + v4 + select(vec2(0u), vec2(0u, 1u), v3.x + v4.x < v3.x); + v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -756,24 +756,24 @@ fn main(id: vec3) { v14 = vec2(xor.y, xor.x); // c = c + d - v9 = v9 + v14 + select(vec2(0u), vec2(0u, 1u), v9.x + v14.x < v9.x); + v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x)); // b = rotr64(b ^ c, 24) xor = v4 ^ v9; v4 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v3 = v3 + v4 + select(vec2(0u), vec2(0u, 1u), v3.x + v4.x < v3.x); + v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x)); // a = a + m[sigma[r][2*i+1]] - v3 = v3 + m3 + select(vec2(0u), vec2(0u, 1u), v3.x + m3.x < v3.x); + v3 = v3 + m3 + vec2(0u, u32(v3.x + m3.x < v3.x)); // d = rotr64(d ^ a, 16) xor = v14 ^ v3; v14 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v9 = v9 + v14 + select(vec2(0u), vec2(0u, 1u), v9.x + v14.x < v9.x); + v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x)); // b = rotr64(b ^ c, 63) xor = v4 ^ v9; @@ -792,7 +792,7 @@ fn main(id: vec3) { */ // a = a + b - v0 = v0 + v4 + select(vec2(0u), vec2(0u, 1u), v0.x + v4.x < v0.x); + v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -802,14 +802,14 @@ fn main(id: vec3) { v12 = vec2(xor.y, xor.x); // c = c + d - v8 = v8 + v12 + select(vec2(0u), vec2(0u, 1u), v8.x + v12.x < v8.x); + v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x)); // b = rotr64(b ^ c, 24) xor = v4 ^ v8; v4 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v0 = v0 + v4 + select(vec2(0u), vec2(0u, 1u), v0.x + v4.x < v0.x); + v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -819,7 +819,7 @@ fn main(id: vec3) { v12 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v8 = v8 + v12 + select(vec2(0u), vec2(0u, 1u), v8.x + v12.x < v8.x); + v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x)); // b = rotr64(b ^ c, 63) xor = v4 ^ v8; @@ -834,7 +834,7 @@ fn main(id: vec3) { */ // a = a + b - v1 = v1 + v5 + select(vec2(0u), vec2(0u, 1u), v1.x + v5.x < v1.x); + v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -844,24 +844,24 @@ fn main(id: vec3) { v13 = vec2(xor.y, xor.x); // c = c + d - v9 = v9 + v13 + select(vec2(0u), vec2(0u, 1u), v9.x + v13.x < v9.x); + v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x)); // b = rotr64(b ^ c, 24) xor = v5 ^ v9; v5 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v1 = v1 + v5 + select(vec2(0u), vec2(0u, 1u), v1.x + v5.x < v1.x); + v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x)); // a = a + m[sigma[r][2*i+1]] - v1 = v1 + nonce + select(vec2(0u), vec2(0u, 1u), v1.x + nonce.x < v1.x); + v1 = v1 + nonce + vec2(0u, u32(v1.x + nonce.x < v1.x)); // d = rotr64(d ^ a, 16) xor = v13 ^ v1; v13 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v9 = v9 + v13 + select(vec2(0u), vec2(0u, 1u), v9.x + v13.x < v9.x); + v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x)); // b = rotr64(b ^ c, 63) xor = v5 ^ v9; @@ -876,7 +876,7 @@ fn main(id: vec3) { */ // a = a + b - v2 = v2 + v6 + select(vec2(0u), vec2(0u, 1u), v2.x + v6.x < v2.x); + v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -886,24 +886,24 @@ fn main(id: vec3) { v14 = vec2(xor.y, xor.x); // c = c + d - v10 = v10 + v14 + select(vec2(0u), vec2(0u, 1u), v10.x + v14.x < v10.x); + v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x)); // b = rotr64(b ^ c, 24) xor = v6 ^ v10; v6 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v2 = v2 + v6 + select(vec2(0u), vec2(0u, 1u), v2.x + v6.x < v2.x); + v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x)); // a = a + m[sigma[r][2*i+1]] - v2 = v2 + m2 + select(vec2(0u), vec2(0u, 1u), v2.x + m2.x < v2.x); + v2 = v2 + m2 + vec2(0u, u32(v2.x + m2.x < v2.x)); // d = rotr64(d ^ a, 16) xor = v14 ^ v2; v14 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v10 = v10 + v14 + select(vec2(0u), vec2(0u, 1u), v10.x + v14.x < v10.x); + v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x)); // b = rotr64(b ^ c, 63) xor = v6 ^ v10; @@ -918,7 +918,7 @@ fn main(id: vec3) { */ // a = a + b - v3 = v3 + v7 + select(vec2(0u), vec2(0u, 1u), v3.x + v7.x < v3.x); + v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -928,14 +928,14 @@ fn main(id: vec3) { v15 = vec2(xor.y, xor.x); // c = c + d - v11 = v11 + v15 + select(vec2(0u), vec2(0u, 1u), v11.x + v15.x < v11.x); + v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x)); // b = rotr64(b ^ c, 24) xor = v7 ^ v11; v7 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v3 = v3 + v7 + select(vec2(0u), vec2(0u, 1u), v3.x + v7.x < v3.x); + v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -945,7 +945,7 @@ fn main(id: vec3) { v15 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v11 = v11 + v15 + select(vec2(0u), vec2(0u, 1u), v11.x + v15.x < v11.x); + v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x)); // b = rotr64(b ^ c, 63) xor = v7 ^ v11; @@ -960,7 +960,7 @@ fn main(id: vec3) { */ // a = a + b - v0 = v0 + v5 + select(vec2(0u), vec2(0u, 1u), v0.x + v5.x < v0.x); + v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -970,14 +970,14 @@ fn main(id: vec3) { v15 = vec2(xor.y, xor.x); // c = c + d - v10 = v10 + v15 + select(vec2(0u), vec2(0u, 1u), v10.x + v15.x < v10.x); + v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x)); // b = rotr64(b ^ c, 24) xor = v5 ^ v10; v5 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v0 = v0 + v5 + select(vec2(0u), vec2(0u, 1u), v0.x + v5.x < v0.x); + v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -987,7 +987,7 @@ fn main(id: vec3) { v15 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v10 = v10 + v15 + select(vec2(0u), vec2(0u, 1u), v10.x + v15.x < v10.x); + v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x)); // b = rotr64(b ^ c, 63) xor = v5 ^ v10; @@ -1002,24 +1002,24 @@ fn main(id: vec3) { */ // a = a + b - v1 = v1 + v6 + select(vec2(0u), vec2(0u, 1u), v1.x + v6.x < v1.x); + v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x)); // a = a + m[sigma[r][2*i+0]] - v1 = v1 + m3 + select(vec2(0u), vec2(0u, 1u), v1.x + m3.x < v1.x); + v1 = v1 + m3 + vec2(0u, u32(v1.x + m3.x < v1.x)); // d = rotr64(d ^ a, 32) xor = v12 ^ v1; v12 = vec2(xor.y, xor.x); // c = c + d - v11 = v11 + v12 + select(vec2(0u), vec2(0u, 1u), v11.x + v12.x < v11.x); + v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x)); // b = rotr64(b ^ c, 24) xor = v6 ^ v11; v6 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v1 = v1 + v6 + select(vec2(0u), vec2(0u, 1u), v1.x + v6.x < v1.x); + v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -1029,7 +1029,7 @@ fn main(id: vec3) { v12 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v11 = v11 + v12 + select(vec2(0u), vec2(0u, 1u), v11.x + v12.x < v11.x); + v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x)); // b = rotr64(b ^ c, 63) xor = v6 ^ v11; @@ -1044,7 +1044,7 @@ fn main(id: vec3) { */ // a = a + b - v2 = v2 + v7 + select(vec2(0u), vec2(0u, 1u), v2.x + v7.x < v2.x); + v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -1054,24 +1054,24 @@ fn main(id: vec3) { v13 = vec2(xor.y, xor.x); // c = c + d - v8 = v8 + v13 + select(vec2(0u), vec2(0u, 1u), v8.x + v13.x < v8.x); + v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x)); // b = rotr64(b ^ c, 24) xor = v7 ^ v8; v7 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v2 = v2 + v7 + select(vec2(0u), vec2(0u, 1u), v2.x + v7.x < v2.x); + v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x)); // a = a + m[sigma[r][2*i+1]] - v2 = v2 + m1 + select(vec2(0u), vec2(0u, 1u), v2.x + m1.x < v2.x); + v2 = v2 + m1 + vec2(0u, u32(v2.x + m1.x < v2.x)); // d = rotr64(d ^ a, 16) xor = v13 ^ v2; v13 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v8 = v8 + v13 + select(vec2(0u), vec2(0u, 1u), v8.x + v13.x < v8.x); + v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x)); // b = rotr64(b ^ c, 63) xor = v7 ^ v8; @@ -1086,7 +1086,7 @@ fn main(id: vec3) { */ // a = a + b - v3 = v3 + v4 + select(vec2(0u), vec2(0u, 1u), v3.x + v4.x < v3.x); + v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -1096,24 +1096,24 @@ fn main(id: vec3) { v14 = vec2(xor.y, xor.x); // c = c + d - v9 = v9 + v14 + select(vec2(0u), vec2(0u, 1u), v9.x + v14.x < v9.x); + v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x)); // b = rotr64(b ^ c, 24) xor = v4 ^ v9; v4 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v3 = v3 + v4 + select(vec2(0u), vec2(0u, 1u), v3.x + v4.x < v3.x); + v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x)); // a = a + m[sigma[r][2*i+1]] - v3 = v3 + m4 + select(vec2(0u), vec2(0u, 1u), v3.x + m4.x < v3.x); + v3 = v3 + m4 + vec2(0u, u32(v3.x + m4.x < v3.x)); // d = rotr64(d ^ a, 16) xor = v14 ^ v3; v14 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v9 = v9 + v14 + select(vec2(0u), vec2(0u, 1u), v9.x + v14.x < v9.x); + v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x)); // b = rotr64(b ^ c, 63) xor = v4 ^ v9; @@ -1132,7 +1132,7 @@ fn main(id: vec3) { */ // a = a + b - v0 = v0 + v4 + select(vec2(0u), vec2(0u, 1u), v0.x + v4.x < v0.x); + v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -1142,14 +1142,14 @@ fn main(id: vec3) { v12 = vec2(xor.y, xor.x); // c = c + d - v8 = v8 + v12 + select(vec2(0u), vec2(0u, 1u), v8.x + v12.x < v8.x); + v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x)); // b = rotr64(b ^ c, 24) xor = v4 ^ v8; v4 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v0 = v0 + v4 + select(vec2(0u), vec2(0u, 1u), v0.x + v4.x < v0.x); + v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -1159,7 +1159,7 @@ fn main(id: vec3) { v12 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v8 = v8 + v12 + select(vec2(0u), vec2(0u, 1u), v8.x + v12.x < v8.x); + v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x)); // b = rotr64(b ^ c, 63) xor = v4 ^ v8; @@ -1174,34 +1174,34 @@ fn main(id: vec3) { */ // a = a + b - v1 = v1 + v5 + select(vec2(0u), vec2(0u, 1u), v1.x + v5.x < v1.x); + v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x)); // a = a + m[sigma[r][2*i+0]] - v1 = v1 + m3 + select(vec2(0u), vec2(0u, 1u), v1.x + m3.x < v1.x); + v1 = v1 + m3 + vec2(0u, u32(v1.x + m3.x < v1.x)); // d = rotr64(d ^ a, 32) xor = v13 ^ v1; v13 = vec2(xor.y, xor.x); // c = c + d - v9 = v9 + v13 + select(vec2(0u), vec2(0u, 1u), v9.x + v13.x < v9.x); + v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x)); // b = rotr64(b ^ c, 24) xor = v5 ^ v9; v5 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v1 = v1 + v5 + select(vec2(0u), vec2(0u, 1u), v1.x + v5.x < v1.x); + v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x)); // a = a + m[sigma[r][2*i+1]] - v1 = v1 + m1 + select(vec2(0u), vec2(0u, 1u), v1.x + m1.x < v1.x); + v1 = v1 + m1 + vec2(0u, u32(v1.x + m1.x < v1.x)); // d = rotr64(d ^ a, 16) xor = v13 ^ v1; v13 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v9 = v9 + v13 + select(vec2(0u), vec2(0u, 1u), v9.x + v13.x < v9.x); + v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x)); // b = rotr64(b ^ c, 63) xor = v5 ^ v9; @@ -1216,7 +1216,7 @@ fn main(id: vec3) { */ // a = a + b - v2 = v2 + v6 + select(vec2(0u), vec2(0u, 1u), v2.x + v6.x < v2.x); + v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -1226,14 +1226,14 @@ fn main(id: vec3) { v14 = vec2(xor.y, xor.x); // c = c + d - v10 = v10 + v14 + select(vec2(0u), vec2(0u, 1u), v10.x + v14.x < v10.x); + v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x)); // b = rotr64(b ^ c, 24) xor = v6 ^ v10; v6 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v2 = v2 + v6 + select(vec2(0u), vec2(0u, 1u), v2.x + v6.x < v2.x); + v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -1243,7 +1243,7 @@ fn main(id: vec3) { v14 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v10 = v10 + v14 + select(vec2(0u), vec2(0u, 1u), v10.x + v14.x < v10.x); + v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x)); // b = rotr64(b ^ c, 63) xor = v6 ^ v10; @@ -1258,7 +1258,7 @@ fn main(id: vec3) { */ // a = a + b - v3 = v3 + v7 + select(vec2(0u), vec2(0u, 1u), v3.x + v7.x < v3.x); + v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -1268,14 +1268,14 @@ fn main(id: vec3) { v15 = vec2(xor.y, xor.x); // c = c + d - v11 = v11 + v15 + select(vec2(0u), vec2(0u, 1u), v11.x + v15.x < v11.x); + v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x)); // b = rotr64(b ^ c, 24) xor = v7 ^ v11; v7 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v3 = v3 + v7 + select(vec2(0u), vec2(0u, 1u), v3.x + v7.x < v3.x); + v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -1285,7 +1285,7 @@ fn main(id: vec3) { v15 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v11 = v11 + v15 + select(vec2(0u), vec2(0u, 1u), v11.x + v15.x < v11.x); + v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x)); // b = rotr64(b ^ c, 63) xor = v7 ^ v11; @@ -1300,24 +1300,24 @@ fn main(id: vec3) { */ // a = a + b - v0 = v0 + v5 + select(vec2(0u), vec2(0u, 1u), v0.x + v5.x < v0.x); + v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x)); // a = a + m[sigma[r][2*i+0]] - v0 = v0 + m2 + select(vec2(0u), vec2(0u, 1u), v0.x + m2.x < v0.x); + v0 = v0 + m2 + vec2(0u, u32(v0.x + m2.x < v0.x)); // d = rotr64(d ^ a, 32) xor = v15 ^ v0; v15 = vec2(xor.y, xor.x); // c = c + d - v10 = v10 + v15 + select(vec2(0u), vec2(0u, 1u), v10.x + v15.x < v10.x); + v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x)); // b = rotr64(b ^ c, 24) xor = v5 ^ v10; v5 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v0 = v0 + v5 + select(vec2(0u), vec2(0u, 1u), v0.x + v5.x < v0.x); + v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -1327,7 +1327,7 @@ fn main(id: vec3) { v15 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v10 = v10 + v15 + select(vec2(0u), vec2(0u, 1u), v10.x + v15.x < v10.x); + v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x)); // b = rotr64(b ^ c, 63) xor = v5 ^ v10; @@ -1342,7 +1342,7 @@ fn main(id: vec3) { */ // a = a + b - v1 = v1 + v6 + select(vec2(0u), vec2(0u, 1u), v1.x + v6.x < v1.x); + v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -1352,14 +1352,14 @@ fn main(id: vec3) { v12 = vec2(xor.y, xor.x); // c = c + d - v11 = v11 + v12 + select(vec2(0u), vec2(0u, 1u), v11.x + v12.x < v11.x); + v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x)); // b = rotr64(b ^ c, 24) xor = v6 ^ v11; v6 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v1 = v1 + v6 + select(vec2(0u), vec2(0u, 1u), v1.x + v6.x < v1.x); + v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -1369,7 +1369,7 @@ fn main(id: vec3) { v12 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v11 = v11 + v12 + select(vec2(0u), vec2(0u, 1u), v11.x + v12.x < v11.x); + v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x)); // b = rotr64(b ^ c, 63) xor = v6 ^ v11; @@ -1384,34 +1384,34 @@ fn main(id: vec3) { */ // a = a + b - v2 = v2 + v7 + select(vec2(0u), vec2(0u, 1u), v2.x + v7.x < v2.x); + v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x)); // a = a + m[sigma[r][2*i+0]] - v2 = v2 + m4 + select(vec2(0u), vec2(0u, 1u), v2.x + m4.x < v2.x); + v2 = v2 + m4 + vec2(0u, u32(v2.x + m4.x < v2.x)); // d = rotr64(d ^ a, 32) xor = v13 ^ v2; v13 = vec2(xor.y, xor.x); // c = c + d - v8 = v8 + v13 + select(vec2(0u), vec2(0u, 1u), v8.x + v13.x < v8.x); + v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x)); // b = rotr64(b ^ c, 24) xor = v7 ^ v8; v7 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v2 = v2 + v7 + select(vec2(0u), vec2(0u, 1u), v2.x + v7.x < v2.x); + v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x)); // a = a + m[sigma[r][2*i+1]] - v2 = v2 + nonce + select(vec2(0u), vec2(0u, 1u), v2.x + nonce.x < v2.x); + v2 = v2 + nonce + vec2(0u, u32(v2.x + nonce.x < v2.x)); // d = rotr64(d ^ a, 16) xor = v13 ^ v2; v13 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v8 = v8 + v13 + select(vec2(0u), vec2(0u, 1u), v8.x + v13.x < v8.x); + v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x)); // b = rotr64(b ^ c, 63) xor = v7 ^ v8; @@ -1426,7 +1426,7 @@ fn main(id: vec3) { */ // a = a + b - v3 = v3 + v4 + select(vec2(0u), vec2(0u, 1u), v3.x + v4.x < v3.x); + v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -1436,14 +1436,14 @@ fn main(id: vec3) { v14 = vec2(xor.y, xor.x); // c = c + d - v9 = v9 + v14 + select(vec2(0u), vec2(0u, 1u), v9.x + v14.x < v9.x); + v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x)); // b = rotr64(b ^ c, 24) xor = v4 ^ v9; v4 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v3 = v3 + v4 + select(vec2(0u), vec2(0u, 1u), v3.x + v4.x < v3.x); + v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -1453,7 +1453,7 @@ fn main(id: vec3) { v14 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v9 = v9 + v14 + select(vec2(0u), vec2(0u, 1u), v9.x + v14.x < v9.x); + v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x)); // b = rotr64(b ^ c, 63) xor = v4 ^ v9; @@ -1472,7 +1472,7 @@ fn main(id: vec3) { */ // a = a + b - v0 = v0 + v4 + select(vec2(0u), vec2(0u, 1u), v0.x + v4.x < v0.x); + v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -1482,24 +1482,24 @@ fn main(id: vec3) { v12 = vec2(xor.y, xor.x); // c = c + d - v8 = v8 + v12 + select(vec2(0u), vec2(0u, 1u), v8.x + v12.x < v8.x); + v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x)); // b = rotr64(b ^ c, 24) xor = v4 ^ v8; v4 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v0 = v0 + v4 + select(vec2(0u), vec2(0u, 1u), v0.x + v4.x < v0.x); + v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x)); // a = a + m[sigma[r][2*i+1]] - v0 = v0 + nonce + select(vec2(0u), vec2(0u, 1u), v0.x + nonce.x < v0.x); + v0 = v0 + nonce + vec2(0u, u32(v0.x + nonce.x < v0.x)); // d = rotr64(d ^ a, 16) xor = v12 ^ v0; v12 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v8 = v8 + v12 + select(vec2(0u), vec2(0u, 1u), v8.x + v12.x < v8.x); + v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x)); // b = rotr64(b ^ c, 63) xor = v4 ^ v8; @@ -1514,7 +1514,7 @@ fn main(id: vec3) { */ // a = a + b - v1 = v1 + v5 + select(vec2(0u), vec2(0u, 1u), v1.x + v5.x < v1.x); + v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -1524,14 +1524,14 @@ fn main(id: vec3) { v13 = vec2(xor.y, xor.x); // c = c + d - v9 = v9 + v13 + select(vec2(0u), vec2(0u, 1u), v9.x + v13.x < v9.x); + v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x)); // b = rotr64(b ^ c, 24) xor = v5 ^ v9; v5 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v1 = v1 + v5 + select(vec2(0u), vec2(0u, 1u), v1.x + v5.x < v1.x); + v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -1541,7 +1541,7 @@ fn main(id: vec3) { v13 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v9 = v9 + v13 + select(vec2(0u), vec2(0u, 1u), v9.x + v13.x < v9.x); + v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x)); // b = rotr64(b ^ c, 63) xor = v5 ^ v9; @@ -1556,34 +1556,34 @@ fn main(id: vec3) { */ // a = a + b - v2 = v2 + v6 + select(vec2(0u), vec2(0u, 1u), v2.x + v6.x < v2.x); + v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x)); // a = a + m[sigma[r][2*i+0]] - v2 = v2 + m2 + select(vec2(0u), vec2(0u, 1u), v2.x + m2.x < v2.x); + v2 = v2 + m2 + vec2(0u, u32(v2.x + m2.x < v2.x)); // d = rotr64(d ^ a, 32) xor = v14 ^ v2; v14 = vec2(xor.y, xor.x); // c = c + d - v10 = v10 + v14 + select(vec2(0u), vec2(0u, 1u), v10.x + v14.x < v10.x); + v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x)); // b = rotr64(b ^ c, 24) xor = v6 ^ v10; v6 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v2 = v2 + v6 + select(vec2(0u), vec2(0u, 1u), v2.x + v6.x < v2.x); + v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x)); // a = a + m[sigma[r][2*i+1]] - v2 = v2 + m4 + select(vec2(0u), vec2(0u, 1u), v2.x + m4.x < v2.x); + v2 = v2 + m4 + vec2(0u, u32(v2.x + m4.x < v2.x)); // d = rotr64(d ^ a, 16) xor = v14 ^ v2; v14 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v10 = v10 + v14 + select(vec2(0u), vec2(0u, 1u), v10.x + v14.x < v10.x); + v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x)); // b = rotr64(b ^ c, 63) xor = v6 ^ v10; @@ -1598,7 +1598,7 @@ fn main(id: vec3) { */ // a = a + b - v3 = v3 + v7 + select(vec2(0u), vec2(0u, 1u), v3.x + v7.x < v3.x); + v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -1608,14 +1608,14 @@ fn main(id: vec3) { v15 = vec2(xor.y, xor.x); // c = c + d - v11 = v11 + v15 + select(vec2(0u), vec2(0u, 1u), v11.x + v15.x < v11.x); + v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x)); // b = rotr64(b ^ c, 24) xor = v7 ^ v11; v7 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v3 = v3 + v7 + select(vec2(0u), vec2(0u, 1u), v3.x + v7.x < v3.x); + v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -1625,7 +1625,7 @@ fn main(id: vec3) { v15 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v11 = v11 + v15 + select(vec2(0u), vec2(0u, 1u), v11.x + v15.x < v11.x); + v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x)); // b = rotr64(b ^ c, 63) xor = v7 ^ v11; @@ -1640,7 +1640,7 @@ fn main(id: vec3) { */ // a = a + b - v0 = v0 + v5 + select(vec2(0u), vec2(0u, 1u), v0.x + v5.x < v0.x); + v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -1650,24 +1650,24 @@ fn main(id: vec3) { v15 = vec2(xor.y, xor.x); // c = c + d - v10 = v10 + v15 + select(vec2(0u), vec2(0u, 1u), v10.x + v15.x < v10.x); + v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x)); // b = rotr64(b ^ c, 24) xor = v5 ^ v10; v5 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v0 = v0 + v5 + select(vec2(0u), vec2(0u, 1u), v0.x + v5.x < v0.x); + v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x)); // a = a + m[sigma[r][2*i+1]] - v0 = v0 + m1 + select(vec2(0u), vec2(0u, 1u), v0.x + m1.x < v0.x); + v0 = v0 + m1 + vec2(0u, u32(v0.x + m1.x < v0.x)); // d = rotr64(d ^ a, 16) xor = v15 ^ v0; v15 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v10 = v10 + v15 + select(vec2(0u), vec2(0u, 1u), v10.x + v15.x < v10.x); + v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x)); // b = rotr64(b ^ c, 63) xor = v5 ^ v10; @@ -1682,7 +1682,7 @@ fn main(id: vec3) { */ // a = a + b - v1 = v1 + v6 + select(vec2(0u), vec2(0u, 1u), v1.x + v6.x < v1.x); + v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -1692,14 +1692,14 @@ fn main(id: vec3) { v12 = vec2(xor.y, xor.x); // c = c + d - v11 = v11 + v12 + select(vec2(0u), vec2(0u, 1u), v11.x + v12.x < v11.x); + v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x)); // b = rotr64(b ^ c, 24) xor = v6 ^ v11; v6 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v1 = v1 + v6 + select(vec2(0u), vec2(0u, 1u), v1.x + v6.x < v1.x); + v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -1709,7 +1709,7 @@ fn main(id: vec3) { v12 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v11 = v11 + v12 + select(vec2(0u), vec2(0u, 1u), v11.x + v12.x < v11.x); + v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x)); // b = rotr64(b ^ c, 63) xor = v6 ^ v11; @@ -1724,7 +1724,7 @@ fn main(id: vec3) { */ // a = a + b - v2 = v2 + v7 + select(vec2(0u), vec2(0u, 1u), v2.x + v7.x < v2.x); + v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -1734,14 +1734,14 @@ fn main(id: vec3) { v13 = vec2(xor.y, xor.x); // c = c + d - v8 = v8 + v13 + select(vec2(0u), vec2(0u, 1u), v8.x + v13.x < v8.x); + v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x)); // b = rotr64(b ^ c, 24) xor = v7 ^ v8; v7 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v2 = v2 + v7 + select(vec2(0u), vec2(0u, 1u), v2.x + v7.x < v2.x); + v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -1751,7 +1751,7 @@ fn main(id: vec3) { v13 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v8 = v8 + v13 + select(vec2(0u), vec2(0u, 1u), v8.x + v13.x < v8.x); + v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x)); // b = rotr64(b ^ c, 63) xor = v7 ^ v8; @@ -1766,24 +1766,24 @@ fn main(id: vec3) { */ // a = a + b - v3 = v3 + v4 + select(vec2(0u), vec2(0u, 1u), v3.x + v4.x < v3.x); + v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x)); // a = a + m[sigma[r][2*i+0]] - v3 = v3 + m3 + select(vec2(0u), vec2(0u, 1u), v3.x + m3.x < v3.x); + v3 = v3 + m3 + vec2(0u, u32(v3.x + m3.x < v3.x)); // d = rotr64(d ^ a, 32) xor = v14 ^ v3; v14 = vec2(xor.y, xor.x); // c = c + d - v9 = v9 + v14 + select(vec2(0u), vec2(0u, 1u), v9.x + v14.x < v9.x); + v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x)); // b = rotr64(b ^ c, 24) xor = v4 ^ v9; v4 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v3 = v3 + v4 + select(vec2(0u), vec2(0u, 1u), v3.x + v4.x < v3.x); + v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -1793,7 +1793,7 @@ fn main(id: vec3) { v14 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v9 = v9 + v14 + select(vec2(0u), vec2(0u, 1u), v9.x + v14.x < v9.x); + v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x)); // b = rotr64(b ^ c, 63) xor = v4 ^ v9; @@ -1812,24 +1812,24 @@ fn main(id: vec3) { */ // a = a + b - v0 = v0 + v4 + select(vec2(0u), vec2(0u, 1u), v0.x + v4.x < v0.x); + v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x)); // a = a + m[sigma[r][2*i+0]] - v0 = v0 + m2 + select(vec2(0u), vec2(0u, 1u), v0.x + m2.x < v0.x); + v0 = v0 + m2 + vec2(0u, u32(v0.x + m2.x < v0.x)); // d = rotr64(d ^ a, 32) xor = v12 ^ v0; v12 = vec2(xor.y, xor.x); // c = c + d - v8 = v8 + v12 + select(vec2(0u), vec2(0u, 1u), v8.x + v12.x < v8.x); + v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x)); // b = rotr64(b ^ c, 24) xor = v4 ^ v8; v4 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v0 = v0 + v4 + select(vec2(0u), vec2(0u, 1u), v0.x + v4.x < v0.x); + v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -1839,7 +1839,7 @@ fn main(id: vec3) { v12 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v8 = v8 + v12 + select(vec2(0u), vec2(0u, 1u), v8.x + v12.x < v8.x); + v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x)); // b = rotr64(b ^ c, 63) xor = v4 ^ v8; @@ -1854,7 +1854,7 @@ fn main(id: vec3) { */ // a = a + b - v1 = v1 + v5 + select(vec2(0u), vec2(0u, 1u), v1.x + v5.x < v1.x); + v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -1864,14 +1864,14 @@ fn main(id: vec3) { v13 = vec2(xor.y, xor.x); // c = c + d - v9 = v9 + v13 + select(vec2(0u), vec2(0u, 1u), v9.x + v13.x < v9.x); + v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x)); // b = rotr64(b ^ c, 24) xor = v5 ^ v9; v5 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v1 = v1 + v5 + select(vec2(0u), vec2(0u, 1u), v1.x + v5.x < v1.x); + v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -1881,7 +1881,7 @@ fn main(id: vec3) { v13 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v9 = v9 + v13 + select(vec2(0u), vec2(0u, 1u), v9.x + v13.x < v9.x); + v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x)); // b = rotr64(b ^ c, 63) xor = v5 ^ v9; @@ -1896,24 +1896,24 @@ fn main(id: vec3) { */ // a = a + b - v2 = v2 + v6 + select(vec2(0u), vec2(0u, 1u), v2.x + v6.x < v2.x); + v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x)); // a = a + m[sigma[r][2*i+0]] - v2 = v2 + nonce + select(vec2(0u), vec2(0u, 1u), v2.x + nonce.x < v2.x); + v2 = v2 + nonce + vec2(0u, u32(v2.x + nonce.x < v2.x)); // d = rotr64(d ^ a, 32) xor = v14 ^ v2; v14 = vec2(xor.y, xor.x); // c = c + d - v10 = v10 + v14 + select(vec2(0u), vec2(0u, 1u), v10.x + v14.x < v10.x); + v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x)); // b = rotr64(b ^ c, 24) xor = v6 ^ v10; v6 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v2 = v2 + v6 + select(vec2(0u), vec2(0u, 1u), v2.x + v6.x < v2.x); + v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -1923,7 +1923,7 @@ fn main(id: vec3) { v14 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v10 = v10 + v14 + select(vec2(0u), vec2(0u, 1u), v10.x + v14.x < v10.x); + v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x)); // b = rotr64(b ^ c, 63) xor = v6 ^ v10; @@ -1938,7 +1938,7 @@ fn main(id: vec3) { */ // a = a + b - v3 = v3 + v7 + select(vec2(0u), vec2(0u, 1u), v3.x + v7.x < v3.x); + v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -1948,24 +1948,24 @@ fn main(id: vec3) { v15 = vec2(xor.y, xor.x); // c = c + d - v11 = v11 + v15 + select(vec2(0u), vec2(0u, 1u), v11.x + v15.x < v11.x); + v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x)); // b = rotr64(b ^ c, 24) xor = v7 ^ v11; v7 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v3 = v3 + v7 + select(vec2(0u), vec2(0u, 1u), v3.x + v7.x < v3.x); + v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x)); // a = a + m[sigma[r][2*i+1]] - v3 = v3 + m3 + select(vec2(0u), vec2(0u, 1u), v3.x + m3.x < v3.x); + v3 = v3 + m3 + vec2(0u, u32(v3.x + m3.x < v3.x)); // d = rotr64(d ^ a, 16) xor = v15 ^ v3; v15 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v11 = v11 + v15 + select(vec2(0u), vec2(0u, 1u), v11.x + v15.x < v11.x); + v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x)); // b = rotr64(b ^ c, 63) xor = v7 ^ v11; @@ -1980,24 +1980,24 @@ fn main(id: vec3) { */ // a = a + b - v0 = v0 + v5 + select(vec2(0u), vec2(0u, 1u), v0.x + v5.x < v0.x); + v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x)); // a = a + m[sigma[r][2*i+0]] - v0 = v0 + m4 + select(vec2(0u), vec2(0u, 1u), v0.x + m4.x < v0.x); + v0 = v0 + m4 + vec2(0u, u32(v0.x + m4.x < v0.x)); // d = rotr64(d ^ a, 32) xor = v15 ^ v0; v15 = vec2(xor.y, xor.x); // c = c + d - v10 = v10 + v15 + select(vec2(0u), vec2(0u, 1u), v10.x + v15.x < v10.x); + v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x)); // b = rotr64(b ^ c, 24) xor = v5 ^ v10; v5 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v0 = v0 + v5 + select(vec2(0u), vec2(0u, 1u), v0.x + v5.x < v0.x); + v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -2007,7 +2007,7 @@ fn main(id: vec3) { v15 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v10 = v10 + v15 + select(vec2(0u), vec2(0u, 1u), v10.x + v15.x < v10.x); + v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x)); // b = rotr64(b ^ c, 63) xor = v5 ^ v10; @@ -2022,7 +2022,7 @@ fn main(id: vec3) { */ // a = a + b - v1 = v1 + v6 + select(vec2(0u), vec2(0u, 1u), v1.x + v6.x < v1.x); + v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -2032,14 +2032,14 @@ fn main(id: vec3) { v12 = vec2(xor.y, xor.x); // c = c + d - v11 = v11 + v12 + select(vec2(0u), vec2(0u, 1u), v11.x + v12.x < v11.x); + v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x)); // b = rotr64(b ^ c, 24) xor = v6 ^ v11; v6 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v1 = v1 + v6 + select(vec2(0u), vec2(0u, 1u), v1.x + v6.x < v1.x); + v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -2049,7 +2049,7 @@ fn main(id: vec3) { v12 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v11 = v11 + v12 + select(vec2(0u), vec2(0u, 1u), v11.x + v12.x < v11.x); + v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x)); // b = rotr64(b ^ c, 63) xor = v6 ^ v11; @@ -2064,7 +2064,7 @@ fn main(id: vec3) { */ // a = a + b - v2 = v2 + v7 + select(vec2(0u), vec2(0u, 1u), v2.x + v7.x < v2.x); + v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -2074,14 +2074,14 @@ fn main(id: vec3) { v13 = vec2(xor.y, xor.x); // c = c + d - v8 = v8 + v13 + select(vec2(0u), vec2(0u, 1u), v8.x + v13.x < v8.x); + v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x)); // b = rotr64(b ^ c, 24) xor = v7 ^ v8; v7 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v2 = v2 + v7 + select(vec2(0u), vec2(0u, 1u), v2.x + v7.x < v2.x); + v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -2091,7 +2091,7 @@ fn main(id: vec3) { v13 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v8 = v8 + v13 + select(vec2(0u), vec2(0u, 1u), v8.x + v13.x < v8.x); + v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x)); // b = rotr64(b ^ c, 63) xor = v7 ^ v8; @@ -2106,24 +2106,24 @@ fn main(id: vec3) { */ // a = a + b - v3 = v3 + v4 + select(vec2(0u), vec2(0u, 1u), v3.x + v4.x < v3.x); + v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x)); // a = a + m[sigma[r][2*i+0]] - v3 = v3 + m1 + select(vec2(0u), vec2(0u, 1u), v3.x + m1.x < v3.x); + v3 = v3 + m1 + vec2(0u, u32(v3.x + m1.x < v3.x)); // d = rotr64(d ^ a, 32) xor = v14 ^ v3; v14 = vec2(xor.y, xor.x); // c = c + d - v9 = v9 + v14 + select(vec2(0u), vec2(0u, 1u), v9.x + v14.x < v9.x); + v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x)); // b = rotr64(b ^ c, 24) xor = v4 ^ v9; v4 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v3 = v3 + v4 + select(vec2(0u), vec2(0u, 1u), v3.x + v4.x < v3.x); + v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -2133,7 +2133,7 @@ fn main(id: vec3) { v14 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v9 = v9 + v14 + select(vec2(0u), vec2(0u, 1u), v9.x + v14.x < v9.x); + v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x)); // b = rotr64(b ^ c, 63) xor = v4 ^ v9; @@ -2152,7 +2152,7 @@ fn main(id: vec3) { */ // a = a + b - v0 = v0 + v4 + select(vec2(0u), vec2(0u, 1u), v0.x + v4.x < v0.x); + v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -2162,14 +2162,14 @@ fn main(id: vec3) { v12 = vec2(xor.y, xor.x); // c = c + d - v8 = v8 + v12 + select(vec2(0u), vec2(0u, 1u), v8.x + v12.x < v8.x); + v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x)); // b = rotr64(b ^ c, 24) xor = v4 ^ v8; v4 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v0 = v0 + v4 + select(vec2(0u), vec2(0u, 1u), v0.x + v4.x < v0.x); + v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -2179,7 +2179,7 @@ fn main(id: vec3) { v12 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v8 = v8 + v12 + select(vec2(0u), vec2(0u, 1u), v8.x + v12.x < v8.x); + v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x)); // b = rotr64(b ^ c, 63) xor = v4 ^ v8; @@ -2194,24 +2194,24 @@ fn main(id: vec3) { */ // a = a + b - v1 = v1 + v5 + select(vec2(0u), vec2(0u, 1u), v1.x + v5.x < v1.x); + v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x)); // a = a + m[sigma[r][2*i+0]] - v1 = v1 + m1 + select(vec2(0u), vec2(0u, 1u), v1.x + m1.x < v1.x); + v1 = v1 + m1 + vec2(0u, u32(v1.x + m1.x < v1.x)); // d = rotr64(d ^ a, 32) xor = v13 ^ v1; v13 = vec2(xor.y, xor.x); // c = c + d - v9 = v9 + v13 + select(vec2(0u), vec2(0u, 1u), v9.x + v13.x < v9.x); + v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x)); // b = rotr64(b ^ c, 24) xor = v5 ^ v9; v5 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v1 = v1 + v5 + select(vec2(0u), vec2(0u, 1u), v1.x + v5.x < v1.x); + v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -2221,7 +2221,7 @@ fn main(id: vec3) { v13 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v9 = v9 + v13 + select(vec2(0u), vec2(0u, 1u), v9.x + v13.x < v9.x); + v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x)); // b = rotr64(b ^ c, 63) xor = v5 ^ v9; @@ -2236,7 +2236,7 @@ fn main(id: vec3) { */ // a = a + b - v2 = v2 + v6 + select(vec2(0u), vec2(0u, 1u), v2.x + v6.x < v2.x); + v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -2246,14 +2246,14 @@ fn main(id: vec3) { v14 = vec2(xor.y, xor.x); // c = c + d - v10 = v10 + v14 + select(vec2(0u), vec2(0u, 1u), v10.x + v14.x < v10.x); + v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x)); // b = rotr64(b ^ c, 24) xor = v6 ^ v10; v6 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v2 = v2 + v6 + select(vec2(0u), vec2(0u, 1u), v2.x + v6.x < v2.x); + v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -2263,7 +2263,7 @@ fn main(id: vec3) { v14 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v10 = v10 + v14 + select(vec2(0u), vec2(0u, 1u), v10.x + v14.x < v10.x); + v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x)); // b = rotr64(b ^ c, 63) xor = v6 ^ v10; @@ -2278,24 +2278,24 @@ fn main(id: vec3) { */ // a = a + b - v3 = v3 + v7 + select(vec2(0u), vec2(0u, 1u), v3.x + v7.x < v3.x); + v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x)); // a = a + m[sigma[r][2*i+0]] - v3 = v3 + m4 + select(vec2(0u), vec2(0u, 1u), v3.x + m4.x < v3.x); + v3 = v3 + m4 + vec2(0u, u32(v3.x + m4.x < v3.x)); // d = rotr64(d ^ a, 32) xor = v15 ^ v3; v15 = vec2(xor.y, xor.x); // c = c + d - v11 = v11 + v15 + select(vec2(0u), vec2(0u, 1u), v11.x + v15.x < v11.x); + v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x)); // b = rotr64(b ^ c, 24) xor = v7 ^ v11; v7 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v3 = v3 + v7 + select(vec2(0u), vec2(0u, 1u), v3.x + v7.x < v3.x); + v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -2305,7 +2305,7 @@ fn main(id: vec3) { v15 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v11 = v11 + v15 + select(vec2(0u), vec2(0u, 1u), v11.x + v15.x < v11.x); + v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x)); // b = rotr64(b ^ c, 63) xor = v7 ^ v11; @@ -2320,24 +2320,24 @@ fn main(id: vec3) { */ // a = a + b - v0 = v0 + v5 + select(vec2(0u), vec2(0u, 1u), v0.x + v5.x < v0.x); + v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x)); // a = a + m[sigma[r][2*i+0]] - v0 = v0 + nonce + select(vec2(0u), vec2(0u, 1u), v0.x + nonce.x < v0.x); + v0 = v0 + nonce + vec2(0u, u32(v0.x + nonce.x < v0.x)); // d = rotr64(d ^ a, 32) xor = v15 ^ v0; v15 = vec2(xor.y, xor.x); // c = c + d - v10 = v10 + v15 + select(vec2(0u), vec2(0u, 1u), v10.x + v15.x < v10.x); + v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x)); // b = rotr64(b ^ c, 24) xor = v5 ^ v10; v5 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v0 = v0 + v5 + select(vec2(0u), vec2(0u, 1u), v0.x + v5.x < v0.x); + v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -2347,7 +2347,7 @@ fn main(id: vec3) { v15 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v10 = v10 + v15 + select(vec2(0u), vec2(0u, 1u), v10.x + v15.x < v10.x); + v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x)); // b = rotr64(b ^ c, 63) xor = v5 ^ v10; @@ -2362,7 +2362,7 @@ fn main(id: vec3) { */ // a = a + b - v1 = v1 + v6 + select(vec2(0u), vec2(0u, 1u), v1.x + v6.x < v1.x); + v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -2372,24 +2372,24 @@ fn main(id: vec3) { v12 = vec2(xor.y, xor.x); // c = c + d - v11 = v11 + v12 + select(vec2(0u), vec2(0u, 1u), v11.x + v12.x < v11.x); + v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x)); // b = rotr64(b ^ c, 24) xor = v6 ^ v11; v6 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v1 = v1 + v6 + select(vec2(0u), vec2(0u, 1u), v1.x + v6.x < v1.x); + v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x)); // a = a + m[sigma[r][2*i+1]] - v1 = v1 + m3 + select(vec2(0u), vec2(0u, 1u), v1.x + m3.x < v1.x); + v1 = v1 + m3 + vec2(0u, u32(v1.x + m3.x < v1.x)); // d = rotr64(d ^ a, 16) xor = v12 ^ v1; v12 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v11 = v11 + v12 + select(vec2(0u), vec2(0u, 1u), v11.x + v12.x < v11.x); + v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x)); // b = rotr64(b ^ c, 63) xor = v6 ^ v11; @@ -2404,7 +2404,7 @@ fn main(id: vec3) { */ // a = a + b - v2 = v2 + v7 + select(vec2(0u), vec2(0u, 1u), v2.x + v7.x < v2.x); + v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -2414,24 +2414,24 @@ fn main(id: vec3) { v13 = vec2(xor.y, xor.x); // c = c + d - v8 = v8 + v13 + select(vec2(0u), vec2(0u, 1u), v8.x + v13.x < v8.x); + v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x)); // b = rotr64(b ^ c, 24) xor = v7 ^ v8; v7 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v2 = v2 + v7 + select(vec2(0u), vec2(0u, 1u), v2.x + v7.x < v2.x); + v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x)); // a = a + m[sigma[r][2*i+1]] - v2 = v2 + m2 + select(vec2(0u), vec2(0u, 1u), v2.x + m2.x < v2.x); + v2 = v2 + m2 + vec2(0u, u32(v2.x + m2.x < v2.x)); // d = rotr64(d ^ a, 16) xor = v13 ^ v2; v13 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v8 = v8 + v13 + select(vec2(0u), vec2(0u, 1u), v8.x + v13.x < v8.x); + v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x)); // b = rotr64(b ^ c, 63) xor = v7 ^ v8; @@ -2446,7 +2446,7 @@ fn main(id: vec3) { */ // a = a + b - v3 = v3 + v4 + select(vec2(0u), vec2(0u, 1u), v3.x + v4.x < v3.x); + v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -2456,14 +2456,14 @@ fn main(id: vec3) { v14 = vec2(xor.y, xor.x); // c = c + d - v9 = v9 + v14 + select(vec2(0u), vec2(0u, 1u), v9.x + v14.x < v9.x); + v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x)); // b = rotr64(b ^ c, 24) xor = v4 ^ v9; v4 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v3 = v3 + v4 + select(vec2(0u), vec2(0u, 1u), v3.x + v4.x < v3.x); + v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -2473,7 +2473,7 @@ fn main(id: vec3) { v14 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v9 = v9 + v14 + select(vec2(0u), vec2(0u, 1u), v9.x + v14.x < v9.x); + v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x)); // b = rotr64(b ^ c, 63) xor = v4 ^ v9; @@ -2492,7 +2492,7 @@ fn main(id: vec3) { */ // a = a + b - v0 = v0 + v4 + select(vec2(0u), vec2(0u, 1u), v0.x + v4.x < v0.x); + v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -2502,14 +2502,14 @@ fn main(id: vec3) { v12 = vec2(xor.y, xor.x); // c = c + d - v8 = v8 + v12 + select(vec2(0u), vec2(0u, 1u), v8.x + v12.x < v8.x); + v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x)); // b = rotr64(b ^ c, 24) xor = v4 ^ v8; v4 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v0 = v0 + v4 + select(vec2(0u), vec2(0u, 1u), v0.x + v4.x < v0.x); + v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -2519,7 +2519,7 @@ fn main(id: vec3) { v12 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v8 = v8 + v12 + select(vec2(0u), vec2(0u, 1u), v8.x + v12.x < v8.x); + v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x)); // b = rotr64(b ^ c, 63) xor = v4 ^ v8; @@ -2534,7 +2534,7 @@ fn main(id: vec3) { */ // a = a + b - v1 = v1 + v5 + select(vec2(0u), vec2(0u, 1u), v1.x + v5.x < v1.x); + v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -2544,14 +2544,14 @@ fn main(id: vec3) { v13 = vec2(xor.y, xor.x); // c = c + d - v9 = v9 + v13 + select(vec2(0u), vec2(0u, 1u), v9.x + v13.x < v9.x); + v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x)); // b = rotr64(b ^ c, 24) xor = v5 ^ v9; v5 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v1 = v1 + v5 + select(vec2(0u), vec2(0u, 1u), v1.x + v5.x < v1.x); + v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -2561,7 +2561,7 @@ fn main(id: vec3) { v13 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v9 = v9 + v13 + select(vec2(0u), vec2(0u, 1u), v9.x + v13.x < v9.x); + v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x)); // b = rotr64(b ^ c, 63) xor = v5 ^ v9; @@ -2576,7 +2576,7 @@ fn main(id: vec3) { */ // a = a + b - v2 = v2 + v6 + select(vec2(0u), vec2(0u, 1u), v2.x + v6.x < v2.x); + v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -2586,24 +2586,24 @@ fn main(id: vec3) { v14 = vec2(xor.y, xor.x); // c = c + d - v10 = v10 + v14 + select(vec2(0u), vec2(0u, 1u), v10.x + v14.x < v10.x); + v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x)); // b = rotr64(b ^ c, 24) xor = v6 ^ v10; v6 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v2 = v2 + v6 + select(vec2(0u), vec2(0u, 1u), v2.x + v6.x < v2.x); + v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x)); // a = a + m[sigma[r][2*i+1]] - v2 = v2 + m1 + select(vec2(0u), vec2(0u, 1u), v2.x + m1.x < v2.x); + v2 = v2 + m1 + vec2(0u, u32(v2.x + m1.x < v2.x)); // d = rotr64(d ^ a, 16) xor = v14 ^ v2; v14 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v10 = v10 + v14 + select(vec2(0u), vec2(0u, 1u), v10.x + v14.x < v10.x); + v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x)); // b = rotr64(b ^ c, 63) xor = v6 ^ v10; @@ -2618,24 +2618,24 @@ fn main(id: vec3) { */ // a = a + b - v3 = v3 + v7 + select(vec2(0u), vec2(0u, 1u), v3.x + v7.x < v3.x); + v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x)); // a = a + m[sigma[r][2*i+0]] - v3 = v3 + m3 + select(vec2(0u), vec2(0u, 1u), v3.x + m3.x < v3.x); + v3 = v3 + m3 + vec2(0u, u32(v3.x + m3.x < v3.x)); // d = rotr64(d ^ a, 32) xor = v15 ^ v3; v15 = vec2(xor.y, xor.x); // c = c + d - v11 = v11 + v15 + select(vec2(0u), vec2(0u, 1u), v11.x + v15.x < v11.x); + v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x)); // b = rotr64(b ^ c, 24) xor = v7 ^ v11; v7 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v3 = v3 + v7 + select(vec2(0u), vec2(0u, 1u), v3.x + v7.x < v3.x); + v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -2645,7 +2645,7 @@ fn main(id: vec3) { v15 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v11 = v11 + v15 + select(vec2(0u), vec2(0u, 1u), v11.x + v15.x < v11.x); + v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x)); // b = rotr64(b ^ c, 63) xor = v7 ^ v11; @@ -2660,7 +2660,7 @@ fn main(id: vec3) { */ // a = a + b - v0 = v0 + v5 + select(vec2(0u), vec2(0u, 1u), v0.x + v5.x < v0.x); + v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -2670,24 +2670,24 @@ fn main(id: vec3) { v15 = vec2(xor.y, xor.x); // c = c + d - v10 = v10 + v15 + select(vec2(0u), vec2(0u, 1u), v10.x + v15.x < v10.x); + v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x)); // b = rotr64(b ^ c, 24) xor = v5 ^ v10; v5 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v0 = v0 + v5 + select(vec2(0u), vec2(0u, 1u), v0.x + v5.x < v0.x); + v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x)); // a = a + m[sigma[r][2*i+1]] - v0 = v0 + nonce + select(vec2(0u), vec2(0u, 1u), v0.x + nonce.x < v0.x); + v0 = v0 + nonce + vec2(0u, u32(v0.x + nonce.x < v0.x)); // d = rotr64(d ^ a, 16) xor = v15 ^ v0; v15 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v10 = v10 + v15 + select(vec2(0u), vec2(0u, 1u), v10.x + v15.x < v10.x); + v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x)); // b = rotr64(b ^ c, 63) xor = v5 ^ v10; @@ -2702,7 +2702,7 @@ fn main(id: vec3) { */ // a = a + b - v1 = v1 + v6 + select(vec2(0u), vec2(0u, 1u), v1.x + v6.x < v1.x); + v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -2712,24 +2712,24 @@ fn main(id: vec3) { v12 = vec2(xor.y, xor.x); // c = c + d - v11 = v11 + v12 + select(vec2(0u), vec2(0u, 1u), v11.x + v12.x < v11.x); + v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x)); // b = rotr64(b ^ c, 24) xor = v6 ^ v11; v6 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v1 = v1 + v6 + select(vec2(0u), vec2(0u, 1u), v1.x + v6.x < v1.x); + v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x)); // a = a + m[sigma[r][2*i+1]] - v1 = v1 + m4 + select(vec2(0u), vec2(0u, 1u), v1.x + m4.x < v1.x); + v1 = v1 + m4 + vec2(0u, u32(v1.x + m4.x < v1.x)); // d = rotr64(d ^ a, 16) xor = v12 ^ v1; v12 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v11 = v11 + v12 + select(vec2(0u), vec2(0u, 1u), v11.x + v12.x < v11.x); + v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x)); // b = rotr64(b ^ c, 63) xor = v6 ^ v11; @@ -2744,7 +2744,7 @@ fn main(id: vec3) { */ // a = a + b - v2 = v2 + v7 + select(vec2(0u), vec2(0u, 1u), v2.x + v7.x < v2.x); + v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -2754,14 +2754,14 @@ fn main(id: vec3) { v13 = vec2(xor.y, xor.x); // c = c + d - v8 = v8 + v13 + select(vec2(0u), vec2(0u, 1u), v8.x + v13.x < v8.x); + v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x)); // b = rotr64(b ^ c, 24) xor = v7 ^ v8; v7 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v2 = v2 + v7 + select(vec2(0u), vec2(0u, 1u), v2.x + v7.x < v2.x); + v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -2771,7 +2771,7 @@ fn main(id: vec3) { v13 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v8 = v8 + v13 + select(vec2(0u), vec2(0u, 1u), v8.x + v13.x < v8.x); + v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x)); // b = rotr64(b ^ c, 63) xor = v7 ^ v8; @@ -2786,24 +2786,24 @@ fn main(id: vec3) { */ // a = a + b - v3 = v3 + v4 + select(vec2(0u), vec2(0u, 1u), v3.x + v4.x < v3.x); + v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x)); // a = a + m[sigma[r][2*i+0]] - v3 = v3 + m2 + select(vec2(0u), vec2(0u, 1u), v3.x + m2.x < v3.x); + v3 = v3 + m2 + vec2(0u, u32(v3.x + m2.x < v3.x)); // d = rotr64(d ^ a, 32) xor = v14 ^ v3; v14 = vec2(xor.y, xor.x); // c = c + d - v9 = v9 + v14 + select(vec2(0u), vec2(0u, 1u), v9.x + v14.x < v9.x); + v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x)); // b = rotr64(b ^ c, 24) xor = v4 ^ v9; v4 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v3 = v3 + v4 + select(vec2(0u), vec2(0u, 1u), v3.x + v4.x < v3.x); + v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -2813,7 +2813,7 @@ fn main(id: vec3) { v14 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v9 = v9 + v14 + select(vec2(0u), vec2(0u, 1u), v9.x + v14.x < v9.x); + v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x)); // b = rotr64(b ^ c, 63) xor = v4 ^ v9; @@ -2832,7 +2832,7 @@ fn main(id: vec3) { */ // a = a + b - v0 = v0 + v4 + select(vec2(0u), vec2(0u, 1u), v0.x + v4.x < v0.x); + v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -2842,14 +2842,14 @@ fn main(id: vec3) { v12 = vec2(xor.y, xor.x); // c = c + d - v8 = v8 + v12 + select(vec2(0u), vec2(0u, 1u), v8.x + v12.x < v8.x); + v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x)); // b = rotr64(b ^ c, 24) xor = v4 ^ v8; v4 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v0 = v0 + v4 + select(vec2(0u), vec2(0u, 1u), v0.x + v4.x < v0.x); + v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -2859,7 +2859,7 @@ fn main(id: vec3) { v12 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v8 = v8 + v12 + select(vec2(0u), vec2(0u, 1u), v8.x + v12.x < v8.x); + v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x)); // b = rotr64(b ^ c, 63) xor = v4 ^ v8; @@ -2874,7 +2874,7 @@ fn main(id: vec3) { */ // a = a + b - v1 = v1 + v5 + select(vec2(0u), vec2(0u, 1u), v1.x + v5.x < v1.x); + v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -2884,14 +2884,14 @@ fn main(id: vec3) { v13 = vec2(xor.y, xor.x); // c = c + d - v9 = v9 + v13 + select(vec2(0u), vec2(0u, 1u), v9.x + v13.x < v9.x); + v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x)); // b = rotr64(b ^ c, 24) xor = v5 ^ v9; v5 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v1 = v1 + v5 + select(vec2(0u), vec2(0u, 1u), v1.x + v5.x < v1.x); + v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -2901,7 +2901,7 @@ fn main(id: vec3) { v13 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v9 = v9 + v13 + select(vec2(0u), vec2(0u, 1u), v9.x + v13.x < v9.x); + v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x)); // b = rotr64(b ^ c, 63) xor = v5 ^ v9; @@ -2916,7 +2916,7 @@ fn main(id: vec3) { */ // a = a + b - v2 = v2 + v6 + select(vec2(0u), vec2(0u, 1u), v2.x + v6.x < v2.x); + v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -2926,24 +2926,24 @@ fn main(id: vec3) { v14 = vec2(xor.y, xor.x); // c = c + d - v10 = v10 + v14 + select(vec2(0u), vec2(0u, 1u), v10.x + v14.x < v10.x); + v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x)); // b = rotr64(b ^ c, 24) xor = v6 ^ v10; v6 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v2 = v2 + v6 + select(vec2(0u), vec2(0u, 1u), v2.x + v6.x < v2.x); + v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x)); // a = a + m[sigma[r][2*i+1]] - v2 = v2 + m3 + select(vec2(0u), vec2(0u, 1u), v2.x + m3.x < v2.x); + v2 = v2 + m3 + vec2(0u, u32(v2.x + m3.x < v2.x)); // d = rotr64(d ^ a, 16) xor = v14 ^ v2; v14 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v10 = v10 + v14 + select(vec2(0u), vec2(0u, 1u), v10.x + v14.x < v10.x); + v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x)); // b = rotr64(b ^ c, 63) xor = v6 ^ v10; @@ -2958,24 +2958,24 @@ fn main(id: vec3) { */ // a = a + b - v3 = v3 + v7 + select(vec2(0u), vec2(0u, 1u), v3.x + v7.x < v3.x); + v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x)); // a = a + m[sigma[r][2*i+0]] - v3 = v3 + nonce + select(vec2(0u), vec2(0u, 1u), v3.x + nonce.x < v3.x); + v3 = v3 + nonce + vec2(0u, u32(v3.x + nonce.x < v3.x)); // d = rotr64(d ^ a, 32) xor = v15 ^ v3; v15 = vec2(xor.y, xor.x); // c = c + d - v11 = v11 + v15 + select(vec2(0u), vec2(0u, 1u), v11.x + v15.x < v11.x); + v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x)); // b = rotr64(b ^ c, 24) xor = v7 ^ v11; v7 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v3 = v3 + v7 + select(vec2(0u), vec2(0u, 1u), v3.x + v7.x < v3.x); + v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -2985,7 +2985,7 @@ fn main(id: vec3) { v15 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v11 = v11 + v15 + select(vec2(0u), vec2(0u, 1u), v11.x + v15.x < v11.x); + v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x)); // b = rotr64(b ^ c, 63) xor = v7 ^ v11; @@ -3000,7 +3000,7 @@ fn main(id: vec3) { */ // a = a + b - v0 = v0 + v5 + select(vec2(0u), vec2(0u, 1u), v0.x + v5.x < v0.x); + v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -3010,24 +3010,24 @@ fn main(id: vec3) { v15 = vec2(xor.y, xor.x); // c = c + d - v10 = v10 + v15 + select(vec2(0u), vec2(0u, 1u), v10.x + v15.x < v10.x); + v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x)); // b = rotr64(b ^ c, 24) xor = v5 ^ v10; v5 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v0 = v0 + v5 + select(vec2(0u), vec2(0u, 1u), v0.x + v5.x < v0.x); + v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x)); // a = a + m[sigma[r][2*i+1]] - v0 = v0 + m2 + select(vec2(0u), vec2(0u, 1u), v0.x + m2.x < v0.x); + v0 = v0 + m2 + vec2(0u, u32(v0.x + m2.x < v0.x)); // d = rotr64(d ^ a, 16) xor = v15 ^ v0; v15 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v10 = v10 + v15 + select(vec2(0u), vec2(0u, 1u), v10.x + v15.x < v10.x); + v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x)); // b = rotr64(b ^ c, 63) xor = v5 ^ v10; @@ -3042,7 +3042,7 @@ fn main(id: vec3) { */ // a = a + b - v1 = v1 + v6 + select(vec2(0u), vec2(0u, 1u), v1.x + v6.x < v1.x); + v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -3052,14 +3052,14 @@ fn main(id: vec3) { v12 = vec2(xor.y, xor.x); // c = c + d - v11 = v11 + v12 + select(vec2(0u), vec2(0u, 1u), v11.x + v12.x < v11.x); + v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x)); // b = rotr64(b ^ c, 24) xor = v6 ^ v11; v6 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v1 = v1 + v6 + select(vec2(0u), vec2(0u, 1u), v1.x + v6.x < v1.x); + v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -3069,7 +3069,7 @@ fn main(id: vec3) { v12 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v11 = v11 + v12 + select(vec2(0u), vec2(0u, 1u), v11.x + v12.x < v11.x); + v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x)); // b = rotr64(b ^ c, 63) xor = v6 ^ v11; @@ -3084,34 +3084,34 @@ fn main(id: vec3) { */ // a = a + b - v2 = v2 + v7 + select(vec2(0u), vec2(0u, 1u), v2.x + v7.x < v2.x); + v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x)); // a = a + m[sigma[r][2*i+0]] - v2 = v2 + m1 + select(vec2(0u), vec2(0u, 1u), v2.x + m1.x < v2.x); + v2 = v2 + m1 + vec2(0u, u32(v2.x + m1.x < v2.x)); // d = rotr64(d ^ a, 32) xor = v13 ^ v2; v13 = vec2(xor.y, xor.x); // c = c + d - v8 = v8 + v13 + select(vec2(0u), vec2(0u, 1u), v8.x + v13.x < v8.x); + v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x)); // b = rotr64(b ^ c, 24) xor = v7 ^ v8; v7 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v2 = v2 + v7 + select(vec2(0u), vec2(0u, 1u), v2.x + v7.x < v2.x); + v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x)); // a = a + m[sigma[r][2*i+1]] - v2 = v2 + m4 + select(vec2(0u), vec2(0u, 1u), v2.x + m4.x < v2.x); + v2 = v2 + m4 + vec2(0u, u32(v2.x + m4.x < v2.x)); // d = rotr64(d ^ a, 16) xor = v13 ^ v2; v13 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v8 = v8 + v13 + select(vec2(0u), vec2(0u, 1u), v8.x + v13.x < v8.x); + v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x)); // b = rotr64(b ^ c, 63) xor = v7 ^ v8; @@ -3126,7 +3126,7 @@ fn main(id: vec3) { */ // a = a + b - v3 = v3 + v4 + select(vec2(0u), vec2(0u, 1u), v3.x + v4.x < v3.x); + v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -3136,14 +3136,14 @@ fn main(id: vec3) { v14 = vec2(xor.y, xor.x); // c = c + d - v9 = v9 + v14 + select(vec2(0u), vec2(0u, 1u), v9.x + v14.x < v9.x); + v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x)); // b = rotr64(b ^ c, 24) xor = v4 ^ v9; v4 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v3 = v3 + v4 + select(vec2(0u), vec2(0u, 1u), v3.x + v4.x < v3.x); + v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -3153,7 +3153,7 @@ fn main(id: vec3) { v14 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v9 = v9 + v14 + select(vec2(0u), vec2(0u, 1u), v9.x + v14.x < v9.x); + v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x)); // b = rotr64(b ^ c, 63) xor = v4 ^ v9; @@ -3172,7 +3172,7 @@ fn main(id: vec3) { */ // a = a + b - v0 = v0 + v4 + select(vec2(0u), vec2(0u, 1u), v0.x + v4.x < v0.x); + v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -3182,24 +3182,24 @@ fn main(id: vec3) { v12 = vec2(xor.y, xor.x); // c = c + d - v8 = v8 + v12 + select(vec2(0u), vec2(0u, 1u), v8.x + v12.x < v8.x); + v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x)); // b = rotr64(b ^ c, 24) xor = v4 ^ v8; v4 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v0 = v0 + v4 + select(vec2(0u), vec2(0u, 1u), v0.x + v4.x < v0.x); + v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x)); // a = a + m[sigma[r][2*i+1]] - v0 = v0 + m2 + select(vec2(0u), vec2(0u, 1u), v0.x + m2.x < v0.x); + v0 = v0 + m2 + vec2(0u, u32(v0.x + m2.x < v0.x)); // d = rotr64(d ^ a, 16) xor = v12 ^ v0; v12 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v8 = v8 + v12 + select(vec2(0u), vec2(0u, 1u), v8.x + v12.x < v8.x); + v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x)); // b = rotr64(b ^ c, 63) xor = v4 ^ v8; @@ -3214,7 +3214,7 @@ fn main(id: vec3) { */ // a = a + b - v1 = v1 + v5 + select(vec2(0u), vec2(0u, 1u), v1.x + v5.x < v1.x); + v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -3224,24 +3224,24 @@ fn main(id: vec3) { v13 = vec2(xor.y, xor.x); // c = c + d - v9 = v9 + v13 + select(vec2(0u), vec2(0u, 1u), v9.x + v13.x < v9.x); + v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x)); // b = rotr64(b ^ c, 24) xor = v5 ^ v9; v5 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v1 = v1 + v5 + select(vec2(0u), vec2(0u, 1u), v1.x + v5.x < v1.x); + v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x)); // a = a + m[sigma[r][2*i+1]] - v1 = v1 + m4 + select(vec2(0u), vec2(0u, 1u), v1.x + m4.x < v1.x); + v1 = v1 + m4 + vec2(0u, u32(v1.x + m4.x < v1.x)); // d = rotr64(d ^ a, 16) xor = v13 ^ v1; v13 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v9 = v9 + v13 + select(vec2(0u), vec2(0u, 1u), v9.x + v13.x < v9.x); + v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x)); // b = rotr64(b ^ c, 63) xor = v5 ^ v9; @@ -3256,7 +3256,7 @@ fn main(id: vec3) { */ // a = a + b - v2 = v2 + v6 + select(vec2(0u), vec2(0u, 1u), v2.x + v6.x < v2.x); + v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -3266,14 +3266,14 @@ fn main(id: vec3) { v14 = vec2(xor.y, xor.x); // c = c + d - v10 = v10 + v14 + select(vec2(0u), vec2(0u, 1u), v10.x + v14.x < v10.x); + v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x)); // b = rotr64(b ^ c, 24) xor = v6 ^ v10; v6 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v2 = v2 + v6 + select(vec2(0u), vec2(0u, 1u), v2.x + v6.x < v2.x); + v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -3283,7 +3283,7 @@ fn main(id: vec3) { v14 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v10 = v10 + v14 + select(vec2(0u), vec2(0u, 1u), v10.x + v14.x < v10.x); + v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x)); // b = rotr64(b ^ c, 63) xor = v6 ^ v10; @@ -3298,24 +3298,24 @@ fn main(id: vec3) { */ // a = a + b - v3 = v3 + v7 + select(vec2(0u), vec2(0u, 1u), v3.x + v7.x < v3.x); + v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x)); // a = a + m[sigma[r][2*i+0]] - v3 = v3 + m1 + select(vec2(0u), vec2(0u, 1u), v3.x + m1.x < v3.x); + v3 = v3 + m1 + vec2(0u, u32(v3.x + m1.x < v3.x)); // d = rotr64(d ^ a, 32) xor = v15 ^ v3; v15 = vec2(xor.y, xor.x); // c = c + d - v11 = v11 + v15 + select(vec2(0u), vec2(0u, 1u), v11.x + v15.x < v11.x); + v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x)); // b = rotr64(b ^ c, 24) xor = v7 ^ v11; v7 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v3 = v3 + v7 + select(vec2(0u), vec2(0u, 1u), v3.x + v7.x < v3.x); + v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -3325,7 +3325,7 @@ fn main(id: vec3) { v15 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v11 = v11 + v15 + select(vec2(0u), vec2(0u, 1u), v11.x + v15.x < v11.x); + v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x)); // b = rotr64(b ^ c, 63) xor = v7 ^ v11; @@ -3340,7 +3340,7 @@ fn main(id: vec3) { */ // a = a + b - v0 = v0 + v5 + select(vec2(0u), vec2(0u, 1u), v0.x + v5.x < v0.x); + v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -3350,14 +3350,14 @@ fn main(id: vec3) { v15 = vec2(xor.y, xor.x); // c = c + d - v10 = v10 + v15 + select(vec2(0u), vec2(0u, 1u), v10.x + v15.x < v10.x); + v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x)); // b = rotr64(b ^ c, 24) xor = v5 ^ v10; v5 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v0 = v0 + v5 + select(vec2(0u), vec2(0u, 1u), v0.x + v5.x < v0.x); + v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -3367,7 +3367,7 @@ fn main(id: vec3) { v15 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v10 = v10 + v15 + select(vec2(0u), vec2(0u, 1u), v10.x + v15.x < v10.x); + v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x)); // b = rotr64(b ^ c, 63) xor = v5 ^ v10; @@ -3382,7 +3382,7 @@ fn main(id: vec3) { */ // a = a + b - v1 = v1 + v6 + select(vec2(0u), vec2(0u, 1u), v1.x + v6.x < v1.x); + v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -3392,14 +3392,14 @@ fn main(id: vec3) { v12 = vec2(xor.y, xor.x); // c = c + d - v11 = v11 + v12 + select(vec2(0u), vec2(0u, 1u), v11.x + v12.x < v11.x); + v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x)); // b = rotr64(b ^ c, 24) xor = v6 ^ v11; v6 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v1 = v1 + v6 + select(vec2(0u), vec2(0u, 1u), v1.x + v6.x < v1.x); + v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -3409,7 +3409,7 @@ fn main(id: vec3) { v12 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v11 = v11 + v12 + select(vec2(0u), vec2(0u, 1u), v11.x + v12.x < v11.x); + v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x)); // b = rotr64(b ^ c, 63) xor = v6 ^ v11; @@ -3424,24 +3424,24 @@ fn main(id: vec3) { */ // a = a + b - v2 = v2 + v7 + select(vec2(0u), vec2(0u, 1u), v2.x + v7.x < v2.x); + v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x)); // a = a + m[sigma[r][2*i+0]] - v2 = v2 + m3 + select(vec2(0u), vec2(0u, 1u), v2.x + m3.x < v2.x); + v2 = v2 + m3 + vec2(0u, u32(v2.x + m3.x < v2.x)); // d = rotr64(d ^ a, 32) xor = v13 ^ v2; v13 = vec2(xor.y, xor.x); // c = c + d - v8 = v8 + v13 + select(vec2(0u), vec2(0u, 1u), v8.x + v13.x < v8.x); + v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x)); // b = rotr64(b ^ c, 24) xor = v7 ^ v8; v7 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v2 = v2 + v7 + select(vec2(0u), vec2(0u, 1u), v2.x + v7.x < v2.x); + v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -3451,7 +3451,7 @@ fn main(id: vec3) { v13 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v8 = v8 + v13 + select(vec2(0u), vec2(0u, 1u), v8.x + v13.x < v8.x); + v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x)); // b = rotr64(b ^ c, 63) xor = v7 ^ v8; @@ -3466,7 +3466,7 @@ fn main(id: vec3) { */ // a = a + b - v3 = v3 + v4 + select(vec2(0u), vec2(0u, 1u), v3.x + v4.x < v3.x); + v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -3476,24 +3476,24 @@ fn main(id: vec3) { v14 = vec2(xor.y, xor.x); // c = c + d - v9 = v9 + v14 + select(vec2(0u), vec2(0u, 1u), v9.x + v14.x < v9.x); + v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x)); // b = rotr64(b ^ c, 24) xor = v4 ^ v9; v4 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v3 = v3 + v4 + select(vec2(0u), vec2(0u, 1u), v3.x + v4.x < v3.x); + v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x)); // a = a + m[sigma[r][2*i+1]] - v3 = v3 + nonce + select(vec2(0u), vec2(0u, 1u), v3.x + nonce.x < v3.x); + v3 = v3 + nonce + vec2(0u, u32(v3.x + nonce.x < v3.x)); // d = rotr64(d ^ a, 16) xor = v14 ^ v3; v14 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v9 = v9 + v14 + select(vec2(0u), vec2(0u, 1u), v9.x + v14.x < v9.x); + v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x)); // b = rotr64(b ^ c, 63) xor = v4 ^ v9; @@ -3512,34 +3512,34 @@ fn main(id: vec3) { */ // a = a + b - v0 = v0 + v4 + select(vec2(0u), vec2(0u, 1u), v0.x + v4.x < v0.x); + v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x)); // a = a + m[sigma[r][2*i+0]] - v0 = v0 + nonce + select(vec2(0u), vec2(0u, 1u), v0.x + nonce.x < v0.x); + v0 = v0 + nonce + vec2(0u, u32(v0.x + nonce.x < v0.x)); // d = rotr64(d ^ a, 32) xor = v12 ^ v0; v12 = vec2(xor.y, xor.x); // c = c + d - v8 = v8 + v12 + select(vec2(0u), vec2(0u, 1u), v8.x + v12.x < v8.x); + v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x)); // b = rotr64(b ^ c, 24) xor = v4 ^ v8; v4 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v0 = v0 + v4 + select(vec2(0u), vec2(0u, 1u), v0.x + v4.x < v0.x); + v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x)); // a = a + m[sigma[r][2*i+1]] - v0 = v0 + m1 + select(vec2(0u), vec2(0u, 1u), v0.x + m1.x < v0.x); + v0 = v0 + m1 + vec2(0u, u32(v0.x + m1.x < v0.x)); // d = rotr64(d ^ a, 16) xor = v12 ^ v0; v12 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v8 = v8 + v12 + select(vec2(0u), vec2(0u, 1u), v8.x + v12.x < v8.x); + v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x)); // b = rotr64(b ^ c, 63) xor = v4 ^ v8; @@ -3554,34 +3554,34 @@ fn main(id: vec3) { */ // a = a + b - v1 = v1 + v5 + select(vec2(0u), vec2(0u, 1u), v1.x + v5.x < v1.x); + v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x)); // a = a + m[sigma[r][2*i+0]] - v1 = v1 + m2 + select(vec2(0u), vec2(0u, 1u), v1.x + m2.x < v1.x); + v1 = v1 + m2 + vec2(0u, u32(v1.x + m2.x < v1.x)); // d = rotr64(d ^ a, 32) xor = v13 ^ v1; v13 = vec2(xor.y, xor.x); // c = c + d - v9 = v9 + v13 + select(vec2(0u), vec2(0u, 1u), v9.x + v13.x < v9.x); + v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x)); // b = rotr64(b ^ c, 24) xor = v5 ^ v9; v5 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v1 = v1 + v5 + select(vec2(0u), vec2(0u, 1u), v1.x + v5.x < v1.x); + v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x)); // a = a + m[sigma[r][2*i+1]] - v1 = v1 + m3 + select(vec2(0u), vec2(0u, 1u), v1.x + m3.x < v1.x); + v1 = v1 + m3 + vec2(0u, u32(v1.x + m3.x < v1.x)); // d = rotr64(d ^ a, 16) xor = v13 ^ v1; v13 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v9 = v9 + v13 + select(vec2(0u), vec2(0u, 1u), v9.x + v13.x < v9.x); + v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x)); // b = rotr64(b ^ c, 63) xor = v5 ^ v9; @@ -3596,24 +3596,24 @@ fn main(id: vec3) { */ // a = a + b - v2 = v2 + v6 + select(vec2(0u), vec2(0u, 1u), v2.x + v6.x < v2.x); + v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x)); // a = a + m[sigma[r][2*i+0]] - v2 = v2 + m4 + select(vec2(0u), vec2(0u, 1u), v2.x + m4.x < v2.x); + v2 = v2 + m4 + vec2(0u, u32(v2.x + m4.x < v2.x)); // d = rotr64(d ^ a, 32) xor = v14 ^ v2; v14 = vec2(xor.y, xor.x); // c = c + d - v10 = v10 + v14 + select(vec2(0u), vec2(0u, 1u), v10.x + v14.x < v10.x); + v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x)); // b = rotr64(b ^ c, 24) xor = v6 ^ v10; v6 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v2 = v2 + v6 + select(vec2(0u), vec2(0u, 1u), v2.x + v6.x < v2.x); + v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -3623,7 +3623,7 @@ fn main(id: vec3) { v14 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v10 = v10 + v14 + select(vec2(0u), vec2(0u, 1u), v10.x + v14.x < v10.x); + v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x)); // b = rotr64(b ^ c, 63) xor = v6 ^ v10; @@ -3638,7 +3638,7 @@ fn main(id: vec3) { */ // a = a + b - v3 = v3 + v7 + select(vec2(0u), vec2(0u, 1u), v3.x + v7.x < v3.x); + v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -3648,14 +3648,14 @@ fn main(id: vec3) { v15 = vec2(xor.y, xor.x); // c = c + d - v11 = v11 + v15 + select(vec2(0u), vec2(0u, 1u), v11.x + v15.x < v11.x); + v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x)); // b = rotr64(b ^ c, 24) xor = v7 ^ v11; v7 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v3 = v3 + v7 + select(vec2(0u), vec2(0u, 1u), v3.x + v7.x < v3.x); + v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -3665,7 +3665,7 @@ fn main(id: vec3) { v15 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v11 = v11 + v15 + select(vec2(0u), vec2(0u, 1u), v11.x + v15.x < v11.x); + v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x)); // b = rotr64(b ^ c, 63) xor = v7 ^ v11; @@ -3680,7 +3680,7 @@ fn main(id: vec3) { */ // a = a + b - v0 = v0 + v5 + select(vec2(0u), vec2(0u, 1u), v0.x + v5.x < v0.x); + v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -3690,14 +3690,14 @@ fn main(id: vec3) { v15 = vec2(xor.y, xor.x); // c = c + d - v10 = v10 + v15 + select(vec2(0u), vec2(0u, 1u), v10.x + v15.x < v10.x); + v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x)); // b = rotr64(b ^ c, 24) xor = v5 ^ v10; v5 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v0 = v0 + v5 + select(vec2(0u), vec2(0u, 1u), v0.x + v5.x < v0.x); + v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -3707,7 +3707,7 @@ fn main(id: vec3) { v15 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v10 = v10 + v15 + select(vec2(0u), vec2(0u, 1u), v10.x + v15.x < v10.x); + v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x)); // b = rotr64(b ^ c, 63) xor = v5 ^ v10; @@ -3722,7 +3722,7 @@ fn main(id: vec3) { */ // a = a + b - v1 = v1 + v6 + select(vec2(0u), vec2(0u, 1u), v1.x + v6.x < v1.x); + v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -3732,14 +3732,14 @@ fn main(id: vec3) { v12 = vec2(xor.y, xor.x); // c = c + d - v11 = v11 + v12 + select(vec2(0u), vec2(0u, 1u), v11.x + v12.x < v11.x); + v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x)); // b = rotr64(b ^ c, 24) xor = v6 ^ v11; v6 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v1 = v1 + v6 + select(vec2(0u), vec2(0u, 1u), v1.x + v6.x < v1.x); + v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -3749,7 +3749,7 @@ fn main(id: vec3) { v12 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v11 = v11 + v12 + select(vec2(0u), vec2(0u, 1u), v11.x + v12.x < v11.x); + v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x)); // b = rotr64(b ^ c, 63) xor = v6 ^ v11; @@ -3764,7 +3764,7 @@ fn main(id: vec3) { */ // a = a + b - v2 = v2 + v7 + select(vec2(0u), vec2(0u, 1u), v2.x + v7.x < v2.x); + v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -3774,14 +3774,14 @@ fn main(id: vec3) { v13 = vec2(xor.y, xor.x); // c = c + d - v8 = v8 + v13 + select(vec2(0u), vec2(0u, 1u), v8.x + v13.x < v8.x); + v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x)); // b = rotr64(b ^ c, 24) xor = v7 ^ v8; v7 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v2 = v2 + v7 + select(vec2(0u), vec2(0u, 1u), v2.x + v7.x < v2.x); + v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -3791,7 +3791,7 @@ fn main(id: vec3) { v13 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v8 = v8 + v13 + select(vec2(0u), vec2(0u, 1u), v8.x + v13.x < v8.x); + v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x)); // b = rotr64(b ^ c, 63) xor = v7 ^ v8; @@ -3806,7 +3806,7 @@ fn main(id: vec3) { */ // a = a + b - v3 = v3 + v4 + select(vec2(0u), vec2(0u, 1u), v3.x + v4.x < v3.x); + v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -3816,14 +3816,14 @@ fn main(id: vec3) { v14 = vec2(xor.y, xor.x); // c = c + d - v9 = v9 + v14 + select(vec2(0u), vec2(0u, 1u), v9.x + v14.x < v9.x); + v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x)); // b = rotr64(b ^ c, 24) xor = v4 ^ v9; v4 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v3 = v3 + v4 + select(vec2(0u), vec2(0u, 1u), v3.x + v4.x < v3.x); + v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -3833,7 +3833,7 @@ fn main(id: vec3) { v14 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v9 = v9 + v14 + select(vec2(0u), vec2(0u, 1u), v9.x + v14.x < v9.x); + v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x)); // b = rotr64(b ^ c, 63) xor = v4 ^ v9; @@ -3852,7 +3852,7 @@ fn main(id: vec3) { */ // a = a + b - v0 = v0 + v4 + select(vec2(0u), vec2(0u, 1u), v0.x + v4.x < v0.x); + v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -3862,14 +3862,14 @@ fn main(id: vec3) { v12 = vec2(xor.y, xor.x); // c = c + d - v8 = v8 + v12 + select(vec2(0u), vec2(0u, 1u), v8.x + v12.x < v8.x); + v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x)); // b = rotr64(b ^ c, 24) xor = v4 ^ v8; v4 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v0 = v0 + v4 + select(vec2(0u), vec2(0u, 1u), v0.x + v4.x < v0.x); + v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -3879,7 +3879,7 @@ fn main(id: vec3) { v12 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v8 = v8 + v12 + select(vec2(0u), vec2(0u, 1u), v8.x + v12.x < v8.x); + v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x)); // b = rotr64(b ^ c, 63) xor = v4 ^ v8; @@ -3894,24 +3894,24 @@ fn main(id: vec3) { */ // a = a + b - v1 = v1 + v5 + select(vec2(0u), vec2(0u, 1u), v1.x + v5.x < v1.x); + v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x)); // a = a + m[sigma[r][2*i+0]] - v1 = v1 + m4 + select(vec2(0u), vec2(0u, 1u), v1.x + m4.x < v1.x); + v1 = v1 + m4 + vec2(0u, u32(v1.x + m4.x < v1.x)); // d = rotr64(d ^ a, 32) xor = v13 ^ v1; v13 = vec2(xor.y, xor.x); // c = c + d - v9 = v9 + v13 + select(vec2(0u), vec2(0u, 1u), v9.x + v13.x < v9.x); + v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x)); // b = rotr64(b ^ c, 24) xor = v5 ^ v9; v5 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v1 = v1 + v5 + select(vec2(0u), vec2(0u, 1u), v1.x + v5.x < v1.x); + v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -3921,7 +3921,7 @@ fn main(id: vec3) { v13 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v9 = v9 + v13 + select(vec2(0u), vec2(0u, 1u), v9.x + v13.x < v9.x); + v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x)); // b = rotr64(b ^ c, 63) xor = v5 ^ v9; @@ -3936,7 +3936,7 @@ fn main(id: vec3) { */ // a = a + b - v2 = v2 + v6 + select(vec2(0u), vec2(0u, 1u), v2.x + v6.x < v2.x); + v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -3946,14 +3946,14 @@ fn main(id: vec3) { v14 = vec2(xor.y, xor.x); // c = c + d - v10 = v10 + v14 + select(vec2(0u), vec2(0u, 1u), v10.x + v14.x < v10.x); + v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x)); // b = rotr64(b ^ c, 24) xor = v6 ^ v10; v6 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v2 = v2 + v6 + select(vec2(0u), vec2(0u, 1u), v2.x + v6.x < v2.x); + v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -3963,7 +3963,7 @@ fn main(id: vec3) { v14 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v10 = v10 + v14 + select(vec2(0u), vec2(0u, 1u), v10.x + v14.x < v10.x); + v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x)); // b = rotr64(b ^ c, 63) xor = v6 ^ v10; @@ -3978,7 +3978,7 @@ fn main(id: vec3) { */ // a = a + b - v3 = v3 + v7 + select(vec2(0u), vec2(0u, 1u), v3.x + v7.x < v3.x); + v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -3988,14 +3988,14 @@ fn main(id: vec3) { v15 = vec2(xor.y, xor.x); // c = c + d - v11 = v11 + v15 + select(vec2(0u), vec2(0u, 1u), v11.x + v15.x < v11.x); + v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x)); // b = rotr64(b ^ c, 24) xor = v7 ^ v11; v7 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v3 = v3 + v7 + select(vec2(0u), vec2(0u, 1u), v3.x + v7.x < v3.x); + v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -4005,7 +4005,7 @@ fn main(id: vec3) { v15 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v11 = v11 + v15 + select(vec2(0u), vec2(0u, 1u), v11.x + v15.x < v11.x); + v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x)); // b = rotr64(b ^ c, 63) xor = v7 ^ v11; @@ -4020,24 +4020,24 @@ fn main(id: vec3) { */ // a = a + b - v0 = v0 + v5 + select(vec2(0u), vec2(0u, 1u), v0.x + v5.x < v0.x); + v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x)); // a = a + m[sigma[r][2*i+0]] - v0 = v0 + m1 + select(vec2(0u), vec2(0u, 1u), v0.x + m1.x < v0.x); + v0 = v0 + m1 + vec2(0u, u32(v0.x + m1.x < v0.x)); // d = rotr64(d ^ a, 32) xor = v15 ^ v0; v15 = vec2(xor.y, xor.x); // c = c + d - v10 = v10 + v15 + select(vec2(0u), vec2(0u, 1u), v10.x + v15.x < v10.x); + v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x)); // b = rotr64(b ^ c, 24) xor = v5 ^ v10; v5 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v0 = v0 + v5 + select(vec2(0u), vec2(0u, 1u), v0.x + v5.x < v0.x); + v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -4047,7 +4047,7 @@ fn main(id: vec3) { v15 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v10 = v10 + v15 + select(vec2(0u), vec2(0u, 1u), v10.x + v15.x < v10.x); + v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x)); // b = rotr64(b ^ c, 63) xor = v5 ^ v10; @@ -4062,34 +4062,34 @@ fn main(id: vec3) { */ // a = a + b - v1 = v1 + v6 + select(vec2(0u), vec2(0u, 1u), v1.x + v6.x < v1.x); + v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x)); // a = a + m[sigma[r][2*i+0]] - v1 = v1 + nonce + select(vec2(0u), vec2(0u, 1u), v1.x + nonce.x < v1.x); + v1 = v1 + nonce + vec2(0u, u32(v1.x + nonce.x < v1.x)); // d = rotr64(d ^ a, 32) xor = v12 ^ v1; v12 = vec2(xor.y, xor.x); // c = c + d - v11 = v11 + v12 + select(vec2(0u), vec2(0u, 1u), v11.x + v12.x < v11.x); + v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x)); // b = rotr64(b ^ c, 24) xor = v6 ^ v11; v6 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v1 = v1 + v6 + select(vec2(0u), vec2(0u, 1u), v1.x + v6.x < v1.x); + v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x)); // a = a + m[sigma[r][2*i+1]] - v1 = v1 + m2 + select(vec2(0u), vec2(0u, 1u), v1.x + m2.x < v1.x); + v1 = v1 + m2 + vec2(0u, u32(v1.x + m2.x < v1.x)); // d = rotr64(d ^ a, 16) xor = v12 ^ v1; v12 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v11 = v11 + v12 + select(vec2(0u), vec2(0u, 1u), v11.x + v12.x < v11.x); + v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x)); // b = rotr64(b ^ c, 63) xor = v6 ^ v11; @@ -4104,7 +4104,7 @@ fn main(id: vec3) { */ // a = a + b - v2 = v2 + v7 + select(vec2(0u), vec2(0u, 1u), v2.x + v7.x < v2.x); + v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -4114,14 +4114,14 @@ fn main(id: vec3) { v13 = vec2(xor.y, xor.x); // c = c + d - v8 = v8 + v13 + select(vec2(0u), vec2(0u, 1u), v8.x + v13.x < v8.x); + v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x)); // b = rotr64(b ^ c, 24) xor = v7 ^ v8; v7 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v2 = v2 + v7 + select(vec2(0u), vec2(0u, 1u), v2.x + v7.x < v2.x); + v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x)); // a = a + m[sigma[r][2*i+1]] // skip since adding 0u does nothing @@ -4131,7 +4131,7 @@ fn main(id: vec3) { v13 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v8 = v8 + v13 + select(vec2(0u), vec2(0u, 1u), v8.x + v13.x < v8.x); + v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x)); // b = rotr64(b ^ c, 63) xor = v7 ^ v8; @@ -4146,7 +4146,7 @@ fn main(id: vec3) { */ // a = a + b - v3 = v3 + v4 + select(vec2(0u), vec2(0u, 1u), v3.x + v4.x < v3.x); + v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x)); // a = a + m[sigma[r][2*i+0]] // skip since adding 0u does nothing @@ -4156,24 +4156,24 @@ fn main(id: vec3) { v14 = vec2(xor.y, xor.x); // c = c + d - v9 = v9 + v14 + select(vec2(0u), vec2(0u, 1u), v9.x + v14.x < v9.x); + v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x)); // b = rotr64(b ^ c, 24) xor = v4 ^ v9; v4 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); // a = a + b - v3 = v3 + v4 + select(vec2(0u), vec2(0u, 1u), v3.x + v4.x < v3.x); + v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x)); // a = a + m[sigma[r][2*i+1]] - v3 = v3 + m3 + select(vec2(0u), vec2(0u, 1u), v3.x + m3.x < v3.x); + v3 = v3 + m3 + vec2(0u, u32(v3.x + m3.x < v3.x)); // d = rotr64(d ^ a, 16) xor = v14 ^ v3; v14 = vec2((xor.x >> 16u) | (xor.y << 16u), (xor.y >> 16u) | (xor.x << 16u)); // c = c + d - v9 = v9 + v14 + select(vec2(0u), vec2(0u, 1u), v9.x + v14.x < v9.x); + v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x)); // b = rotr64(b ^ c, 63) xor = v4 ^ v9; -- 2.34.1