*/
// a = a + b
-
-
v_01 = v_01 + v_89 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_01.x + v_89.x < v_01.x);
-
-
-
-
// a = a + m[sigma[r][2*i+0]]
v_01 = v_01 + vec2(m0, m1) + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_01.x + m0 < v_01.x);
-
-
// d = rotr64(d ^ a, 32)
v24 = v_2425.x;
v25 = v_2425.y;
v25 = v_2425.x;
// c = c + d
-
-
v_2425.x = v24;
v_2425.y = v25;
v_1617 = v_1617 + v_2425 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_1617.x + v_2425.x < v_1617.x);
-
-
v24 = v_2425.x;
v25 = v_2425.y;
// b = rotr64(b ^ c, 24)
-
-
-
-
xor = v_89 ^ v_1617;
v_89 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u));
-
-
// a = a + b
-
-
v_01 = v_01 + v_89 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_01.x + v_89.x < v_01.x);
-
-
-
-
// a = a + m[sigma[r][2*i+1]]
v_01 = v_01 + vec2(m2, m3) + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_01.x + m2 < v_01.x);
-
-
// d = rotr64(d ^ a, 16)
-
-
v_2425.x = v24;
v_2425.y = v25;
xor = v_2425 ^ v_01;
v25 = v_2425.y;
// c = c + d
-
-
v_2425.x = v24;
v_2425.y = v25;
v_1617 = v_1617 + v_2425 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_1617.x + v_2425.x < v_1617.x);
-
-
v24 = v_2425.x;
v25 = v_2425.y;
// b = rotr64(b ^ c, 63)
-
-
-
-
xor = v_89 ^ v_1617;
v_89 = vec2((xor.x << 1u) | (xor.y >> 31u), (xor.y << 1u) | (xor.x >> 31u));
-
-
/**
* r=0, i=1(x2), a=v[2-3], b=v[10-11], c=v[18-19], d=v[26-27]
*/
// a = a + b
-
-
v_23 = v_23 + v_1011 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_23.x + v_1011.x < v_23.x);
-
-
// a = a + m[sigma[r][2*i+0]]
-
-
v_23 = v_23 + vec2(m4, m5) + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_23.x + m4 < v_23.x);
-
-
// d = rotr64(d ^ a, 32)
-
-
v_2627.x = v26;
v_2627.y = v27;
v_2627 = v_2627 ^ v_23;
v27 = v_2627.x;
// c = c + d
-
-
v_2627.x = v26;
v_2627.y = v27;
v_1819 = v_1819 + v_2627 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_1819.x + v_2627.x < v_1819.x);
-
-
v26 = v_2627.x;
v27 = v_2627.y;
// b = rotr64(b ^ c, 24)
-
-
-
-
xor = v_1011 ^ v_1819;
v_1011 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u));
-
-
// a = a + b
-
-
-
-
v_23 = v_23 + v_1011 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_23.x + v_1011.x < v_23.x);
-
-
-
-
// a = a + m[sigma[r][2*i+1]]
-
-
v_23 = v_23 + vec2(m6, m7) + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_23.x + m6 < v_23.x);
-
-
// d = rotr64(d ^ a, 16)
-
-
v_2627.x = v26;
v_2627.y = v27;
xor = v_2627 ^ v_23;
v27 = v_2627.y;
// c = c + d
-
-
v_2627.x = v26;
v_2627.y = v27;
v_1819 = v_1819 + v_2627 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_1819.x + v_2627.x < v_1819.x);
-
-
v26 = v_2627.x;
v27 = v_2627.y;
// b = rotr64(b ^ c, 63)
-
-
-
-
xor = v_1011 ^ v_1819;
v_1011 = vec2((xor.x << 1u) | (xor.y >> 31u), (xor.y << 1u) | (xor.x >> 31u));
-
-
/**
* r=0, i=2(x2), a=v[2-3], b=v[10-11], c=v[18-19], d=v[26-27]
*/
// a = a + b
-
-
-
-
v_45 = v_45 + v_1213 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_45.x + v_1213.x < v_45.x);
-
-
-
-
// a = a + m[sigma[r][2*i+0]]
-
-
v_45 = v_45 + vec2(m8, m9) + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_45.x + m8 < v_45.x);
-
-
// d = rotr64(d ^ a, 32)
-
-
v_2829.x = v28;
v_2829.y = v29;
v_2829 = v_2829 ^ v_45;
v29 = v_2829.x;
// c = c + d
-
-
v_2829.x = v28;
v_2829.y = v29;
v_2021 = v_2021 + v_2829 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_2021.x + v_2829.x < v_2021.x);
-
-
v28 = v_2829.x;
v29 = v_2829.y;
// b = rotr64(b ^ c, 24)
-
-
-
-
xor = v_1213 ^ v_2021;
v_1213 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u));
-
-
// a = a + b
-
-
-
-
v_45 = v_45 + v_1213 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_45.x + v_1213.x < v_45.x);
-
-
-
-
// a = a + m[sigma[r][2*i+1]]
// skip since adding 0u does nothing
// d = rotr64(d ^ a, 16)
-
-
v_2829.x = v28;
v_2829.y = v29;
xor = v_2829 ^ v_45;
v29 = v_2829.y;
// c = c + d
-
-
v_2829.x = v28;
v_2829.y = v29;
v_2021 = v_2021 + v_2829 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_2021.x + v_2829.x < v_2021.x);
-
-
v28 = v_2829.x;
v29 = v_2829.y;
// b = rotr64(b ^ c, 63)
-
-
-
-
xor = v_1213 ^ v_2021;
v_1213 = vec2((xor.x << 1u) | (xor.y >> 31u), (xor.y << 1u) | (xor.x >> 31u));
-
-
/**
* r=0, i=3(x2), a=v[6-7], b=v[14-15], c=v[22-23], d=v[30-31]
*/
// a = a + b
-
-
-
-
v_67 = v_67 + v_1415 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_67.x + v_1415.x < v_67.x);
-
-
-
-
// a = a + m[sigma[r][2*i+0]]
// skip since adding 0u does nothing
// d = rotr64(d ^ a, 32)
-
-
v_3031.x = v30;
v_3031.y = v31;
v_3031 = v_3031 ^ v_67;
v31 = v_3031.x;
// c = c + d
-
-
v_3031.x = v30;
v_3031.y = v31;
v_2223 = v_2223 + v_3031 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_2223.x + v_3031.x < v_2223.x);
-
-
v30 = v_3031.x;
v31 = v_3031.y;
// b = rotr64(b ^ c, 24)
-
-
-
-
xor = v_1415 ^ v_2223;
v_1415 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u));
-
-
// a = a + b
-
-
-
-
v_67 = v_67 + v_1415 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_67.x + v_1415.x < v_67.x);
-
-
-
-
// a = a + m[sigma[r][2*i+1]]
// skip since adding 0u does nothing
// d = rotr64(d ^ a, 16)
-
-
v_3031.x = v30;
v_3031.y = v31;
xor = v_3031 ^ v_67;
v31 = v_3031.y;
// c = c + d
-
-
v_3031.x = v30;
v_3031.y = v31;
v_2223 = v_2223 + v_3031 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_2223.x + v_3031.x < v_2223.x);
-
-
v30 = v_3031.x;
v31 = v_3031.y;
// b = rotr64(b ^ c, 63)
-
-
-
-
xor = v_1415 ^ v_2223;
v_1415 = vec2((xor.x << 1u) | (xor.y >> 31u), (xor.y << 1u) | (xor.x >> 31u));
-
-
/**
* r=0, i=4(x2), a=v[0-1], b=v[10-11], c=v[20-21], d=v[30-31]
*/
// a = a + b
-
-
v_01 = v_01 + v_1011 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_01.x + v_1011.x < v_01.x);
-
-
-
-
// a = a + m[sigma[r][2*i+0]]
// skip since adding 0u does nothing
// d = rotr64(d ^ a, 32)
-
-
v_3031.x = v30;
v_3031.y = v31;
v_3031 = v_3031 ^ v_01;
v31 = v_3031.x;
// c = c + d
-
-
v_3031.x = v30;
v_3031.y = v31;
v_2021 = v_2021 + v_3031 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_2021.x + v_3031.x < v_2021.x);
-
-
v30 = v_3031.x;
v31 = v_3031.y;
// b = rotr64(b ^ c, 24)
-
-
-
-
xor = v_1011 ^ v_2021;
v_1011 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u));
-
-
// a = a + b
-
-
v_01 = v_01 + v_1011 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_01.x + v_1011.x < v_01.x);
-
-
-
-
// a = a + m[sigma[r][2*i+1]]
// skip since adding 0u does nothing
// d = rotr64(d ^ a, 16)
-
-
v_3031.x = v30;
v_3031.y = v31;
xor = v_3031 ^ v_01;
v31 = v_3031.y;
// c = c + d
-
-
v_3031.x = v30;
v_3031.y = v31;
v_2021 = v_2021 + v_3031 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_2021.x + v_3031.x < v_2021.x);
-
-
v30 = v_3031.x;
v31 = v_3031.y;
// b = rotr64(b ^ c, 63)
-
-
-
-
xor = v_1011 ^ v_2021;
v_1011 = vec2((xor.x << 1u) | (xor.y >> 31u), (xor.y << 1u) | (xor.x >> 31u));
-
-
/**
* r=0, i=5(x2), a=v[2-3], b=v[12-13], c=v[22-23], d=v[24-25]
*/
// a = a + b
-
-
-
-
v_23 = v_23 + v_1213 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_23.x + v_1213.x < v_23.x);
-
-
-
-
// a = a + m[sigma[r][2*i+0]]
// skip since adding 0u does nothing
// d = rotr64(d ^ a, 32)
-
-
v_2425.x = v24;
v_2425.y = v25;
v_2425 = v_2425 ^ v_23;
v25 = v_2425.x;
// c = c + d
-
-
v_2425.x = v24;
v_2425.y = v25;
v_2223 = v_2223 + v_2425 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_2223.x + v_2425.x < v_2223.x);
-
-
v24 = v_2425.x;
v25 = v_2425.y;
// b = rotr64(b ^ c, 24)
-
-
-
-
xor = v_1213 ^ v_2223;
v_1213 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u));
-
-
// a = a + b
-
-
-
-
v_23 = v_23 + v_1213 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_23.x + v_1213.x < v_23.x);
-
-
-
-
// a = a + m[sigma[r][2*i+1]]
// skip since adding 0u does nothing
// d = rotr64(d ^ a, 16)
-
-
v_2425.x = v24;
v_2425.y = v25;
xor = v_2425 ^ v_23;
v25 = v_2425.y;
// c = c + d
-
-
v_2425.x = v24;
v_2425.y = v25;
v_2223 = v_2223 + v_2425 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_2223.x + v_2425.x < v_2223.x);
-
-
v24 = v_2425.x;
v25 = v_2425.y;
// b = rotr64(b ^ c, 63)
-
-
-
-
xor = v_1213 ^ v_2223;
v_1213 = vec2((xor.x << 1u) | (xor.y >> 31u), (xor.y << 1u) | (xor.x >> 31u));
-
-
/**
* r=0, i=6(x2), a=v[4-6], b=v[14-15], c=v[16-17], d=v[26-27]
*/
// a = a + b
-
-
-
-
v_45 = v_45 + v_1415 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_45.x + v_1415.x < v_45.x);
-
-
-
-
// a = a + m[sigma[r][2*i+0]]
// skip since adding 0u does nothing
// d = rotr64(d ^ a, 32)
-
-
v_2627.x = v26;
v_2627.y = v27;
v_2627 = v_2627 ^ v_45;
v27 = v_2627.x;
// c = c + d
-
-
v_2627.x = v26;
v_2627.y = v27;
v_1617 = v_1617 + v_2627 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_1617.x + v_2627.x < v_1617.x);
-
-
v26 = v_2627.x;
v27 = v_2627.y;
// b = rotr64(b ^ c, 24)
-
-
-
-
xor = v_1415 ^ v_1617;
v_1415 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u));
-
-
// a = a + b
-
-
-
-
v_45 = v_45 + v_1415 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_45.x + v_1415.x < v_45.x);
-
-
-
-
// a = a + m[sigma[r][2*i+1]]
// skip since adding 0u does nothing
// d = rotr64(d ^ a, 16)
-
-
v_2627.x = v26;
v_2627.y = v27;
xor = v_2627 ^ v_45;
v27 = v_2627.y;
// c = c + d
-
-
v_2627.x = v26;
v_2627.y = v27;
v_1617 = v_1617 + v_2627 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_1617.x + v_2627.x < v_1617.x);
-
-
v26 = v_2627.x;
v27 = v_2627.y;
// b = rotr64(b ^ c, 63)
-
-
-
-
xor = v_1415 ^ v_1617;
v_1415 = vec2((xor.x << 1u) | (xor.y >> 31u), (xor.y << 1u) | (xor.x >> 31u));
-
-
/**
* r=0, i=7(x2), a=v[6-7], b=v[8-9], c=v[18-19], d=v[28-29]
*/
// a = a + b
-
-
-
-
v_67 = v_67 + v_89 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_67.x + v_89.x < v_67.x);
-
-
-
-
// a = a + m[sigma[r][2*i+0]]
// skip since adding 0u does nothing
// d = rotr64(d ^ a, 32)
-
-
v_2829.x = v28;
v_2829.y = v29;
v_2829 = v_2829 ^ v_67;
v29 = v_2829.x;
// c = c + d
-
-
v_2829.x = v28;
v_2829.y = v29;
v_1819 = v_1819 + v_2829 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_1819.x + v_2829.x < v_1819.x);
-
-
v28 = v_2829.x;
v29 = v_2829.y;
// b = rotr64(b ^ c, 24)
-
-
-
-
xor = v_89 ^ v_1819;
v_89 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u));
-
-
// a = a + b
-
-
-
-
v_67 = v_67 + v_89 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_67.x + v_89.x < v_67.x);
-
-
-
-
// a = a + m[sigma[r][2*i+1]]
// skip since adding 0u does nothing
// d = rotr64(d ^ a, 16)
-
-
v_2829.x = v28;
v_2829.y = v29;
xor = v_2829 ^ v_67;
v29 = v_2829.y;
// c = c + d
-
-
v_2829.x = v28;
v_2829.y = v29;
v_1819 = v_1819 + v_2829 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_1819.x + v_2829.x < v_1819.x);
-
-
v28 = v_2829.x;
v29 = v_2829.y;
// b = rotr64(b ^ c, 63)
-
-
-
-
xor = v_89 ^ v_1819;
v_89 = vec2((xor.x << 1u) | (xor.y >> 31u), (xor.y << 1u) | (xor.x >> 31u));
-
-
/****************************************************************************
* ROUND(1) *
****************************************************************************/