*/
// a = a + b
- o0 = v0 + v10;
- o1 = v1 + v11;
- o1 = o1 + select(0u, 1u, o0 < v0);
- v0 = o0;
- v1 = o1;
+ v_01.x = v0;
+ v_01.y = v1;
+ v_1011.x = v10;
+ v_1011.y = v11;
+ v_01 = v_01 + v_1011 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_01.x + v_1011.x < v_01.x);
+ v0 = v_01.x;
+ v1 = v_01.y;
+ v10 = v_1011.x;
+ v11 = v_1011.y;
// // a = a + m[sigma[r][2*i+0]]
// // skip since adding 0u does nothing
v11 = (xor1 >> 24u) ^ (xor0 << 8u);
// a = a + b
- o0 = v0 + v10;
- o1 = v1 + v11;
- o1 = o1 + select(0u, 1u, o0 < v0);
- v0 = o0;
- v1 = o1;
+ v_01.x = v0;
+ v_01.y = v1;
+ v_1011.x = v10;
+ v_1011.y = v11;
+ v_01 = v_01 + v_1011 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_01.x + v_1011.x < v_01.x);
+ v0 = v_01.x;
+ v1 = v_01.y;
+ v10 = v_1011.x;
+ v11 = v_1011.y;
// // a = a + m[sigma[r][2*i+1]]
// // skip since adding 0u does nothing
*/
// a = a + b
- o0 = v0 + v10;
- o1 = v1 + v11;
- o1 = o1 + select(0u, 1u, o0 < v0);
- v0 = o0;
- v1 = o1;
+ v_01.x = v0;
+ v_01.y = v1;
+ v_1011.x = v10;
+ v_1011.y = v11;
+ v_01 = v_01 + v_1011 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_01.x + v_1011.x < v_01.x);
+ v0 = v_01.x;
+ v1 = v_01.y;
+ v10 = v_1011.x;
+ v11 = v_1011.y;
// a = a + m[sigma[r][2*i+0]]
o0 = v0 + m2;
v11 = (xor1 >> 24u) ^ (xor0 << 8u);
// a = a + b
- o0 = v0 + v10;
- o1 = v1 + v11;
- o1 = o1 + select(0u, 1u, o0 < v0);
- v0 = o0;
- v1 = o1;
+ v_01.x = v0;
+ v_01.y = v1;
+ v_1011.x = v10;
+ v_1011.y = v11;
+ v_01 = v_01 + v_1011 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_01.x + v_1011.x < v_01.x);
+ v0 = v_01.x;
+ v1 = v_01.y;
+ v10 = v_1011.x;
+ v11 = v_1011.y;
// // a = a + m[sigma[r][2*i+1]]
// // skip since adding 0u does nothing
*/
// a = a + b
- o0 = v0 + v10;
- o1 = v1 + v11;
- o1 = o1 + select(0u, 1u, o0 < v0);
- v0 = o0;
- v1 = o1;
+ v_01.x = v0;
+ v_01.y = v1;
+ v_1011.x = v10;
+ v_1011.y = v11;
+ v_01 = v_01 + v_1011 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_01.x + v_1011.x < v_01.x);
+ v0 = v_01.x;
+ v1 = v_01.y;
+ v10 = v_1011.x;
+ v11 = v_1011.y;
// // a = a + m[sigma[r][2*i+0]]
// // skip since adding 0u does nothing
v11 = (xor1 >> 24u) ^ (xor0 << 8u);
// a = a + b
- o0 = v0 + v10;
- o1 = v1 + v11;
- o1 = o1 + select(0u, 1u, o0 < v0);
- v0 = o0;
- v1 = o1;
+ v_01.x = v0;
+ v_01.y = v1;
+ v_1011.x = v10;
+ v_1011.y = v11;
+ v_01 = v_01 + v_1011 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_01.x + v_1011.x < v_01.x);
+ v0 = v_01.x;
+ v1 = v_01.y;
+ v10 = v_1011.x;
+ v11 = v_1011.y;
// // a = a + m[sigma[r][2*i+1]]
// // skip since adding 0u does nothing
*/
// a = a + b
- o0 = v0 + v10;
- o1 = v1 + v11;
- o1 = o1 + select(0u, 1u, o0 < v0);
- v0 = o0;
- v1 = o1;
+ v_01.x = v0;
+ v_01.y = v1;
+ v_1011.x = v10;
+ v_1011.y = v11;
+ v_01 = v_01 + v_1011 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_01.x + v_1011.x < v_01.x);
+ v0 = v_01.x;
+ v1 = v_01.y;
+ v10 = v_1011.x;
+ v11 = v_1011.y;
// a = a + m[sigma[r][2*i+0]]
o0 = v0 + m4;
v11 = (xor1 >> 24u) ^ (xor0 << 8u);
// a = a + b
- o0 = v0 + v10;
- o1 = v1 + v11;
- o1 = o1 + select(0u, 1u, o0 < v0);
- v0 = o0;
- v1 = o1;
+ v_01.x = v0;
+ v_01.y = v1;
+ v_1011.x = v10;
+ v_1011.y = v11;
+ v_01 = v_01 + v_1011 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_01.x + v_1011.x < v_01.x);
+ v0 = v_01.x;
+ v1 = v_01.y;
+ v10 = v_1011.x;
+ v11 = v_1011.y;
// // a = a + m[sigma[r][2*i+1]]
// // skip since adding 0u does nothing
*/
// a = a + b
- o0 = v0 + v10;
- o1 = v1 + v11;
- o1 = o1 + select(0u, 1u, o0 < v0);
- v0 = o0;
- v1 = o1;
+ v_01.x = v0;
+ v_01.y = v1;
+ v_1011.x = v10;
+ v_1011.y = v11;
+ v_01 = v_01 + v_1011 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_01.x + v_1011.x < v_01.x);
+ v0 = v_01.x;
+ v1 = v_01.y;
+ v10 = v_1011.x;
+ v11 = v_1011.y;
// // a = a + m[sigma[r][2*i+0]]
// // skip since adding 0u does nothing
v11 = (xor1 >> 24u) ^ (xor0 << 8u);
// a = a + b
- o0 = v0 + v10;
- o1 = v1 + v11;
- o1 = o1 + select(0u, 1u, o0 < v0);
- v0 = o0;
- v1 = o1;
+ v_01.x = v0;
+ v_01.y = v1;
+ v_1011.x = v10;
+ v_1011.y = v11;
+ v_01 = v_01 + v_1011 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_01.x + v_1011.x < v_01.x);
+ v0 = v_01.x;
+ v1 = v_01.y;
+ v10 = v_1011.x;
+ v11 = v_1011.y;
// a = a + m[sigma[r][2*i+1]]
o0 = v0 + m2;
*/
// a = a + b
- o0 = v0 + v10;
- o1 = v1 + v11;
- o1 = o1 + select(0u, 1u, o0 < v0);
- v0 = o0;
- v1 = o1;
+ v_01.x = v0;
+ v_01.y = v1;
+ v_1011.x = v10;
+ v_1011.y = v11;
+ v_01 = v_01 + v_1011 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_01.x + v_1011.x < v_01.x);
+ v0 = v_01.x;
+ v1 = v_01.y;
+ v10 = v_1011.x;
+ v11 = v_1011.y;
// a = a + m[sigma[r][2*i+0]]
o0 = v0 + m8;
v11 = (xor1 >> 24u) ^ (xor0 << 8u);
// a = a + b
- o0 = v0 + v10;
- o1 = v1 + v11;
- o1 = o1 + select(0u, 1u, o0 < v0);
- v0 = o0;
- v1 = o1;
+ v_01.x = v0;
+ v_01.y = v1;
+ v_1011.x = v10;
+ v_1011.y = v11;
+ v_01 = v_01 + v_1011 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_01.x + v_1011.x < v_01.x);
+ v0 = v_01.x;
+ v1 = v_01.y;
+ v10 = v_1011.x;
+ v11 = v_1011.y;
// // a = a + m[sigma[r][2*i+1]]
// // skip since adding 0u does nothing
*/
// a = a + b
- o0 = v0 + v10;
- o1 = v1 + v11;
- o1 = o1 + select(0u, 1u, o0 < v0);
- v0 = o0;
- v1 = o1;
+ v_01.x = v0;
+ v_01.y = v1;
+ v_1011.x = v10;
+ v_1011.y = v11;
+ v_01 = v_01 + v_1011 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_01.x + v_1011.x < v_01.x);
+ v0 = v_01.x;
+ v1 = v_01.y;
+ v10 = v_1011.x;
+ v11 = v_1011.y;
// a = a + m[sigma[r][2*i+0]]
o0 = v0 + m0;
v11 = (xor1 >> 24u) ^ (xor0 << 8u);
// a = a + b
- o0 = v0 + v10;
- o1 = v1 + v11;
- o1 = o1 + select(0u, 1u, o0 < v0);
- v0 = o0;
- v1 = o1;
+ v_01.x = v0;
+ v_01.y = v1;
+ v_1011.x = v10;
+ v_1011.y = v11;
+ v_01 = v_01 + v_1011 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_01.x + v_1011.x < v_01.x);
+ v0 = v_01.x;
+ v1 = v_01.y;
+ v10 = v_1011.x;
+ v11 = v_1011.y;
// // a = a + m[sigma[r][2*i+1]]
// // skip since adding 0u does nothing
*/
// a = a + b
- o0 = v0 + v10;
- o1 = v1 + v11;
- o1 = o1 + select(0u, 1u, o0 < v0);
- v0 = o0;
- v1 = o1;
+ v_01.x = v0;
+ v_01.y = v1;
+ v_1011.x = v10;
+ v_1011.y = v11;
+ v_01 = v_01 + v_1011 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_01.x + v_1011.x < v_01.x);
+ v0 = v_01.x;
+ v1 = v_01.y;
+ v10 = v_1011.x;
+ v11 = v_1011.y;
// // a = a + m[sigma[r][2*i+0]]
// // skip since adding 0u does nothing
v11 = (xor1 >> 24u) ^ (xor0 << 8u);
// a = a + b
- o0 = v0 + v10;
- o1 = v1 + v11;
- o1 = o1 + select(0u, 1u, o0 < v0);
- v0 = o0;
- v1 = o1;
+ v_01.x = v0;
+ v_01.y = v1;
+ v_1011.x = v10;
+ v_1011.y = v11;
+ v_01 = v_01 + v_1011 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_01.x + v_1011.x < v_01.x);
+ v0 = v_01.x;
+ v1 = v_01.y;
+ v10 = v_1011.x;
+ v11 = v_1011.y;
// a = a + m[sigma[r][2*i+1]]
o0 = v0 + m0;
*/
// a = a + b
- o0 = v0 + v10;
- o1 = v1 + v11;
- o1 = o1 + select(0u, 1u, o0 < v0);
- v0 = o0;
- v1 = o1;
+ v_01.x = v0;
+ v_01.y = v1;
+ v_1011.x = v10;
+ v_1011.y = v11;
+ v_01 = v_01 + v_1011 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_01.x + v_1011.x < v_01.x);
+ v0 = v_01.x;
+ v1 = v_01.y;
+ v10 = v_1011.x;
+ v11 = v_1011.y;
// // a = a + m[sigma[r][2*i+0]]
// // skip since adding 0u does nothing
v11 = (xor1 >> 24u) ^ (xor0 << 8u);
// a = a + b
- o0 = v0 + v10;
- o1 = v1 + v11;
- o1 = o1 + select(0u, 1u, o0 < v0);
- v0 = o0;
- v1 = o1;
+ v_01.x = v0;
+ v_01.y = v1;
+ v_1011.x = v10;
+ v_1011.y = v11;
+ v_01 = v_01 + v_1011 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_01.x + v_1011.x < v_01.x);
+ v0 = v_01.x;
+ v1 = v_01.y;
+ v10 = v_1011.x;
+ v11 = v_1011.y;
// a = a + m[sigma[r][2*i+1]]
o0 = v0 + m4;
*/
// a = a + b
- o0 = v0 + v10;
- o1 = v1 + v11;
- o1 = o1 + select(0u, 1u, o0 < v0);
- v0 = o0;
- v1 = o1;
+ v_01.x = v0;
+ v_01.y = v1;
+ v_1011.x = v10;
+ v_1011.y = v11;
+ v_01 = v_01 + v_1011 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_01.x + v_1011.x < v_01.x);
+ v0 = v_01.x;
+ v1 = v_01.y;
+ v10 = v_1011.x;
+ v11 = v_1011.y;
// // a = a + m[sigma[r][2*i+0]]
// // skip since adding 0u does nothing
v11 = (xor1 >> 24u) ^ (xor0 << 8u);
// a = a + b
- o0 = v0 + v10;
- o1 = v1 + v11;
- o1 = o1 + select(0u, 1u, o0 < v0);
- v0 = o0;
- v1 = o1;
+ v_01.x = v0;
+ v_01.y = v1;
+ v_1011.x = v10;
+ v_1011.y = v11;
+ v_01 = v_01 + v_1011 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_01.x + v_1011.x < v_01.x);
+ v0 = v_01.x;
+ v1 = v_01.y;
+ v10 = v_1011.x;
+ v11 = v_1011.y;
// // a = a + m[sigma[r][2*i+1]]
// // skip since adding 0u does nothing
*/
// a = a + b
- o0 = v0 + v10;
- o1 = v1 + v11;
- o1 = o1 + select(0u, 1u, o0 < v0);
- v0 = o0;
- v1 = o1;
+ v_01.x = v0;
+ v_01.y = v1;
+ v_1011.x = v10;
+ v_1011.y = v11;
+ v_01 = v_01 + v_1011 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_01.x + v_1011.x < v_01.x);
+ v0 = v_01.x;
+ v1 = v_01.y;
+ v10 = v_1011.x;
+ v11 = v_1011.y;
// // a = a + m[sigma[r][2*i+0]]
// // skip since adding 0u does nothing
v11 = (xor1 >> 24u) ^ (xor0 << 8u);
// a = a + b
- o0 = v0 + v10;
- o1 = v1 + v11;
- o1 = o1 + select(0u, 1u, o0 < v0);
- v0 = o0;
- v1 = o1;
+ v_01.x = v0;
+ v_01.y = v1;
+ v_1011.x = v10;
+ v_1011.y = v11;
+ v_01 = v_01 + v_1011 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_01.x + v_1011.x < v_01.x);
+ v0 = v_01.x;
+ v1 = v_01.y;
+ v10 = v_1011.x;
+ v11 = v_1011.y;
// // a = a + m[sigma[r][2*i+1]]
// // skip since adding 0u does nothing
*/
// a = a + b
- o0 = v0 + v10;
- o1 = v1 + v11;
- o1 = o1 + select(0u, 1u, o0 < v0);
- v0 = o0;
- v1 = o1;
+ v_01.x = v0;
+ v_01.y = v1;
+ v_1011.x = v10;
+ v_1011.y = v11;
+ v_01 = v_01 + v_1011 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_01.x + v_1011.x < v_01.x);
+ v0 = v_01.x;
+ v1 = v_01.y;
+ v10 = v_1011.x;
+ v11 = v_1011.y;
// a = a + m[sigma[r][2*i+0]]
o0 = v0 + m2;
v11 = (xor1 >> 24u) ^ (xor0 << 8u);
// a = a + b
- o0 = v0 + v10;
- o1 = v1 + v11;
- o1 = o1 + select(0u, 1u, o0 < v0);
- v0 = o0;
- v1 = o1;
+ v_01.x = v0;
+ v_01.y = v1;
+ v_1011.x = v10;
+ v_1011.y = v11;
+ v_01 = v_01 + v_1011 + select(vec2<u32>(0u), vec2<u32>(0u, 1u), v_01.x + v_1011.x < v_01.x);
+ v0 = v_01.x;
+ v1 = v_01.y;
+ v10 = v_1011.x;
+ v11 = v_1011.y;
// // a = a + m[sigma[r][2*i+1]]
// // skip since adding 0u does nothing