28u,20u,8u,16u,18u,30u,26u,12u,2u,24u,0u,4u,22u,14u,10u,6u
);
-// 64-bit unsigned addition within the compression buffer
-// Sets v[a,a+1] += b
-// b0 is the low 32 bits of b, b1 represents the high 32 bits
-void add_uint64 (uint a, uint b0, uint b1) {
- uint o0 = v[a] + b0;
- uint o1 = v[a+1u] + b1;
- if (v[a] > 0xFFFFFFFFu - b0) { // did low 32 bits overflow?
- o1++;
+// G mixing function
+void G (uint ix, uint iy, uint a, uint b, uint c, uint d) {
+ uint o0;
+ uint o1;
+ uint xor0;
+ uint xor1;
+
+ // a = a + b;
+ o0 = v[a] + v[b];
+ o1 = v[a+1u] + v[b+1u];
+ if (v[a] > 0xFFFFFFFFu - v[b]) {
+ o1 = o1 + 1u;
}
v[a] = o0;
v[a+1u] = o1;
-}
-// G Mixing function
-void B2B_G (uint a, uint b, uint c, uint d, uint ix, uint iy) {
- add_uint64(a, v[b], v[b+1u]);
- add_uint64(a, m[ix], m[ix+1u]);
+ // a = a + m[sigma[r][2*i+0]];
+ o0 = v[a] + m[ix];
+ o1 = v[a+1u] + m[ix+1u];
+ if (v[a] > 0xFFFFFFFFu - m[ix]) {
+ o1 = o1 + 1u;
+ }
+ v[a] = o0;
+ v[a+1u] = o1;
- // v[d,d+1] = (v[d,d+1] xor v[a,a+1]) rotated to the right by 32 bits
- uint xor0 = v[d] ^ v[a];
- uint xor1 = v[d+1u] ^ v[a+1u];
+ // d = rotr64(d ^ a, 32);
+ xor0 = v[d] ^ v[a];
+ xor1 = v[d+1u] ^ v[a+1u];
v[d] = xor1;
v[d+1u] = xor0;
- add_uint64(c, v[d], v[d+1u]);
+ // c = c + d;
+ o0 = v[c] + v[d];
+ o1 = v[c+1u] + v[d+1u];
+ if (v[c] > 0xFFFFFFFFu - v[d]) {
+ o1 = o1 + 1u;
+ }
+ v[c] = o0;
+ v[c+1u] = o1;
- // v[b,b+1] = (v[b,b+1] xor v[c,c+1]) rotated right by 24 bits
+ // b = rotr64(b ^ c, 24);
xor0 = v[b] ^ v[c];
xor1 = v[b+1u] ^ v[c+1u];
v[b] = (xor0 >> 24u) ^ (xor1 << 8u);
v[b+1u] = (xor1 >> 24u) ^ (xor0 << 8u);
- add_uint64(a, v[b], v[b+1u]);
- add_uint64(a, m[iy], m[iy+1u]);
+ // a = a + b;
+ o0 = v[a] + v[b];
+ o1 = v[a+1u] + v[b+1u];
+ if (v[a] > 0xFFFFFFFFu - v[b]) {
+ o1 = o1 + 1u;
+ }
+ v[a] = o0;
+ v[a+1u] = o1;
- // v[d,d+1] = (v[d,d+1] xor v[a,a+1]) rotated right by 16 bits
+ // a = a + m[sigma[r][2*i+1]];
+ o0 = v[a] + m[iy];
+ o1 = v[a+1u] + m[iy+1u];
+ if (v[a] > 0xFFFFFFFFu - m[iy]) {
+ o1 = o1 + 1u;
+ }
+ v[a] = o0;
+ v[a+1u] = o1;
+
+ // d = rotr64(d ^ a, 16)
xor0 = v[d] ^ v[a];
xor1 = v[d+1u] ^ v[a+1u];
v[d] = (xor0 >> 16u) ^ (xor1 << 16u);
v[d+1u] = (xor1 >> 16u) ^ (xor0 << 16u);
- add_uint64(c, v[d], v[d+1u]);
+ // c = c + d;
+ o0 = v[c] + v[d];
+ o1 = v[c+1u] + v[d+1u];
+ if (v[c] > 0xFFFFFFFFu - v[d]) {
+ o1 = o1 + 1u;
+ }
+ v[c] = o0;
+ v[c+1u] = o1;
- // v[b,b+1] = (v[b,b+1] xor v[c,c+1]) rotated right by 63 bits
+ // b = rotr64(b ^ c, 63)
xor0 = v[b] ^ v[c];
xor1 = v[b+1u] ^ v[c+1u];
v[b] = (xor1 >> 31u) ^ (xor0 << 1u);
// twelve rounds of mixing
for(uint i = 0u; i < 12u; i = i + 1u) {
- B2B_G(0u, 8u, 16u, 24u, SIGMA82[i * 16u + 0u], SIGMA82[i * 16u + 1u]);
- B2B_G(2u, 10u, 18u, 26u, SIGMA82[i * 16u + 2u], SIGMA82[i * 16u + 3u]);
- B2B_G(4u, 12u, 20u, 28u, SIGMA82[i * 16u + 4u], SIGMA82[i * 16u + 5u]);
- B2B_G(6u, 14u, 22u, 30u, SIGMA82[i * 16u + 6u], SIGMA82[i * 16u + 7u]);
- B2B_G(0u, 10u, 20u, 30u, SIGMA82[i * 16u + 8u], SIGMA82[i * 16u + 9u]);
- B2B_G(2u, 12u, 22u, 24u, SIGMA82[i * 16u + 10u], SIGMA82[i * 16u + 11u]);
- B2B_G(4u, 14u, 16u, 26u, SIGMA82[i * 16u + 12u], SIGMA82[i * 16u + 13u]);
- B2B_G(6u, 8u, 18u, 28u, SIGMA82[i * 16u + 14u], SIGMA82[i * 16u + 15u]);
+ G(SIGMA82[i * 16u + 0u], SIGMA82[i * 16u + 1u], 0u, 8u, 16u, 24u);
+ G(SIGMA82[i * 16u + 2u], SIGMA82[i * 16u + 3u], 2u, 10u, 18u, 26u);
+ G(SIGMA82[i * 16u + 4u], SIGMA82[i * 16u + 5u], 4u, 12u, 20u, 28u);
+ G(SIGMA82[i * 16u + 6u], SIGMA82[i * 16u + 7u], 6u, 14u, 22u, 30u);
+ G(SIGMA82[i * 16u + 8u], SIGMA82[i * 16u + 9u], 0u, 10u, 20u, 30u);
+ G(SIGMA82[i * 16u + 10u], SIGMA82[i * 16u + 11u], 2u, 12u, 22u, 24u);
+ G(SIGMA82[i * 16u + 12u], SIGMA82[i * 16u + 13u], 4u, 14u, 16u, 26u);
+ G(SIGMA82[i * 16u + 14u], SIGMA82[i * 16u + 15u], 6u, 8u, 18u, 28u);
}
// Pixel data is multipled by threshold test result (0 or 1)