From: Chris Duncan Date: Sun, 5 Jan 2025 22:40:10 +0000 (-0800) Subject: Inline 64-bit addition to eliminate a function call. X-Git-Url: https://zoso.dev/?a=commitdiff_plain;h=c2bf49af1a8bc8a5472b395efed25a78e2cd44cc;p=libnemo.git Inline 64-bit addition to eliminate a function call. --- diff --git a/src/lib/workers/powgpu.ts b/src/lib/workers/powgpu.ts index e3a8518..67b5105 100644 --- a/src/lib/workers/powgpu.ts +++ b/src/lib/workers/powgpu.ts @@ -92,23 +92,44 @@ export class PowGpu extends WorkerInterface { * LSb is the Least-Significant (32) Bits of b * MSb is the Most-Significant (32) Bits of b * If LSb overflows, increment MSb operand + * + * fn add_uint64 (v: ptr>, i: u32, LSb: u32, MSb: u32) { + * var o0: u32 = (*v)[i] + LSb; + * var o1: u32 = (*v)[i+1u] + MSb; + * if ((*v)[i] > 0xFFFFFFFFu - LSb) { + * o1 = o1 + 1u; + * } + * (*v)[i] = o0; + * (*v)[i+1u] = o1; + * } */ - fn add_uint64 (v: ptr>, i: u32, LSb: u32, MSb: u32) { - var o0: u32 = (*v)[i] + LSb; - var o1: u32 = (*v)[i+1u] + MSb; - if ((*v)[i] > 0xFFFFFFFFu - LSb) { - o1 = o1 + 1u; - } - (*v)[i] = o0; - (*v)[i+1u] = o1; - } /** * G Mixing function */ fn G (v: ptr>, m: ptr>, a: u32, b: u32, c: u32, d: u32, ix: u32, iy: u32) { - add_uint64(v, a, (*v)[b], (*v)[b+1u]); - add_uint64(v, a, (*m)[ix], (*m)[ix+1u]); + var o0: u32; + var o1: u32; + + // add_uint64(v, a, (*v)[b], (*v)[b+1u]); + o0 = (*v)[a] + (*v)[b]; + o1 = (*v)[a+1u] + (*v)[b+1u]; + if ((*v)[a] > 0xFFFFFFFFu - (*v)[b]) { + o1 = o1 + 1u; + } + (*v)[a] = o0; + (*v)[a+1u] = o1; + + // add_uint64(v, a, (*m)[ix], (*m)[ix+1u]); + o0 = (*v)[a] + (*m)[ix]; + o1 = (*v)[a+1u] + (*m)[ix+1u]; + if ((*v)[a] > 0xFFFFFFFFu - (*m)[ix]) { + o1 = o1 + 1u; + } + (*v)[a] = o0; + (*v)[a+1u] = o1; + + // v[d,d+1] = (v[d,d+1] xor v[a,a+1]) rotated to the right by 32 bits var xor0: u32 = (*v)[d] ^ (*v)[a]; @@ -116,7 +137,18 @@ export class PowGpu extends WorkerInterface { (*v)[d] = xor1; (*v)[d+1u] = xor0; - add_uint64(v, c, (*v)[d], (*v)[d+1u]); + + + // add_uint64(v, c, (*v)[d], (*v)[d+1u]); + o0 = (*v)[c] + (*v)[d]; + o1 = (*v)[c+1u] + (*v)[d+1u]; + if ((*v)[c] > 0xFFFFFFFFu - (*v)[d]) { + o1 = o1 + 1u; + } + (*v)[c] = o0; + (*v)[c+1u] = o1; + + // v[b,b+1] = (v[b,b+1] xor v[c,c+1]) rotated right by 24 bits xor0 = (*v)[b] ^ (*v)[c]; @@ -124,8 +156,27 @@ export class PowGpu extends WorkerInterface { (*v)[b] = (xor0 >> 24u) ^ (xor1 << 8u); (*v)[b+1u] = (xor1 >> 24u) ^ (xor0 << 8u); - add_uint64(v, a, (*v)[b], (*v)[b+1u]); - add_uint64(v, a, (*m)[iy], (*m)[iy+1u]); + + + // add_uint64(v, a, (*v)[b], (*v)[b+1u]); + o0 = (*v)[a] + (*v)[b]; + o1 = (*v)[a+1u] + (*v)[b+1u]; + if ((*v)[a] > 0xFFFFFFFFu - (*v)[b]) { + o1 = o1 + 1u; + } + (*v)[a] = o0; + (*v)[a+1u] = o1; + + // add_uint64(v, a, (*m)[iy], (*m)[iy+1u]); + o0 = (*v)[a] + (*m)[iy]; + o1 = (*v)[a+1u] + (*m)[iy+1u]; + if ((*v)[a] > 0xFFFFFFFFu - (*m)[iy]) { + o1 = o1 + 1u; + } + (*v)[a] = o0; + (*v)[a+1u] = o1; + + // v[d,d+1] = (v[d,d+1] xor v[a,a+1]) rotated right by 16 bits xor0 = (*v)[d] ^ (*v)[a]; @@ -133,7 +184,18 @@ export class PowGpu extends WorkerInterface { (*v)[d] = (xor0 >> 16u) ^ (xor1 << 16u); (*v)[d+1u] = (xor1 >> 16u) ^ (xor0 << 16u); - add_uint64(v, c, (*v)[d], (*v)[d+1u]); + + + // add_uint64(v, c, (*v)[d], (*v)[d+1u]); + o0 = (*v)[c] + (*v)[d]; + o1 = (*v)[c+1u] + (*v)[d+1u]; + if ((*v)[c] > 0xFFFFFFFFu - (*v)[d]) { + o1 = o1 + 1u; + } + (*v)[c] = o0; + (*v)[c+1u] = o1; + + // v[b,b+1] = (v[b,b+1] xor v[c,c+1]) rotated right by 63 bits xor0 = (*v)[b] ^ (*v)[c];