]> zoso.dev Git - libnemo.git/commitdiff
Inline 64-bit addition to eliminate a function call.
authorChris Duncan <chris@zoso.dev>
Sun, 5 Jan 2025 22:40:10 +0000 (14:40 -0800)
committerChris Duncan <chris@zoso.dev>
Sun, 5 Jan 2025 22:40:10 +0000 (14:40 -0800)
src/lib/workers/powgpu.ts

index e3a851802d38a4ee17093f7dd4b4fc6acc901a92..67b5105af7785e3328e089842a8f77dc7b938013 100644 (file)
@@ -92,23 +92,44 @@ export class PowGpu extends WorkerInterface {
                * LSb is the Least-Significant (32) Bits of b
                * MSb is the Most-Significant (32) Bits of b
                * If LSb overflows, increment MSb operand
+               *
+               *       fn add_uint64 (v: ptr<function, array<u32, 32>>, i: u32, LSb: u32, MSb: u32) {
+               *               var o0: u32 = (*v)[i] + LSb;
+               *               var o1: u32 = (*v)[i+1u] + MSb;
+               *               if ((*v)[i] > 0xFFFFFFFFu - LSb) {
+               *                       o1 = o1 + 1u;
+               *               }
+               *               (*v)[i] = o0;
+               *               (*v)[i+1u] = o1;
+               *       }
                */
-               fn add_uint64 (v: ptr<function, array<u32, 32>>, i: u32, LSb: u32, MSb: u32) {
-                       var o0: u32 = (*v)[i] + LSb;
-                       var o1: u32 = (*v)[i+1u] + MSb;
-                       if ((*v)[i] > 0xFFFFFFFFu - LSb) {
-                               o1 = o1 + 1u;
-                       }
-                       (*v)[i] = o0;
-                       (*v)[i+1u] = o1;
-               }
 
                /**
                * G Mixing function
                */
                fn G (v: ptr<function, array<u32, 32>>, m: ptr<function, array<u32, 16>>, a: u32, b: u32, c: u32, d: u32, ix: u32, iy: u32) {
-                       add_uint64(v, a, (*v)[b], (*v)[b+1u]);
-                       add_uint64(v, a, (*m)[ix], (*m)[ix+1u]);
+                       var o0: u32;
+                       var o1: u32;
+
+                       // add_uint64(v, a, (*v)[b], (*v)[b+1u]);
+                       o0 = (*v)[a] + (*v)[b];
+                       o1 = (*v)[a+1u] + (*v)[b+1u];
+                       if ((*v)[a] > 0xFFFFFFFFu - (*v)[b]) {
+                               o1 = o1 + 1u;
+                       }
+                       (*v)[a] = o0;
+                       (*v)[a+1u] = o1;
+
+                       // add_uint64(v, a, (*m)[ix], (*m)[ix+1u]);
+                       o0 = (*v)[a] + (*m)[ix];
+                       o1 = (*v)[a+1u] + (*m)[ix+1u];
+                       if ((*v)[a] > 0xFFFFFFFFu - (*m)[ix]) {
+                               o1 = o1 + 1u;
+                       }
+                       (*v)[a] = o0;
+                       (*v)[a+1u] = o1;
+
+
 
                        // v[d,d+1] = (v[d,d+1] xor v[a,a+1]) rotated to the right by 32 bits
                        var xor0: u32 = (*v)[d] ^ (*v)[a];
@@ -116,7 +137,18 @@ export class PowGpu extends WorkerInterface {
                        (*v)[d] = xor1;
                        (*v)[d+1u] = xor0;
 
-                       add_uint64(v, c, (*v)[d], (*v)[d+1u]);
+
+
+                       // add_uint64(v, c, (*v)[d], (*v)[d+1u]);
+                       o0 = (*v)[c] + (*v)[d];
+                       o1 = (*v)[c+1u] + (*v)[d+1u];
+                       if ((*v)[c] > 0xFFFFFFFFu - (*v)[d]) {
+                               o1 = o1 + 1u;
+                       }
+                       (*v)[c] = o0;
+                       (*v)[c+1u] = o1;
+
+
 
                        // v[b,b+1] = (v[b,b+1] xor v[c,c+1]) rotated right by 24 bits
                        xor0 = (*v)[b] ^ (*v)[c];
@@ -124,8 +156,27 @@ export class PowGpu extends WorkerInterface {
                        (*v)[b] = (xor0 >> 24u) ^ (xor1 << 8u);
                        (*v)[b+1u] = (xor1 >> 24u) ^ (xor0 << 8u);
 
-                       add_uint64(v, a, (*v)[b], (*v)[b+1u]);
-                       add_uint64(v, a, (*m)[iy], (*m)[iy+1u]);
+
+
+                       // add_uint64(v, a, (*v)[b], (*v)[b+1u]);
+                       o0 = (*v)[a] + (*v)[b];
+                       o1 = (*v)[a+1u] + (*v)[b+1u];
+                       if ((*v)[a] > 0xFFFFFFFFu - (*v)[b]) {
+                               o1 = o1 + 1u;
+                       }
+                       (*v)[a] = o0;
+                       (*v)[a+1u] = o1;
+
+                       // add_uint64(v, a, (*m)[iy], (*m)[iy+1u]);
+                       o0 = (*v)[a] + (*m)[iy];
+                       o1 = (*v)[a+1u] + (*m)[iy+1u];
+                       if ((*v)[a] > 0xFFFFFFFFu - (*m)[iy]) {
+                               o1 = o1 + 1u;
+                       }
+                       (*v)[a] = o0;
+                       (*v)[a+1u] = o1;
+
+
 
                        // v[d,d+1] = (v[d,d+1] xor v[a,a+1]) rotated right by 16 bits
                        xor0 = (*v)[d] ^ (*v)[a];
@@ -133,7 +184,18 @@ export class PowGpu extends WorkerInterface {
                        (*v)[d] = (xor0 >> 16u) ^ (xor1 << 16u);
                        (*v)[d+1u] = (xor1 >> 16u) ^ (xor0 << 16u);
 
-                       add_uint64(v, c, (*v)[d], (*v)[d+1u]);
+
+
+                       // add_uint64(v, c, (*v)[d], (*v)[d+1u]);
+                       o0 = (*v)[c] + (*v)[d];
+                       o1 = (*v)[c+1u] + (*v)[d+1u];
+                       if ((*v)[c] > 0xFFFFFFFFu - (*v)[d]) {
+                               o1 = o1 + 1u;
+                       }
+                       (*v)[c] = o0;
+                       (*v)[c+1u] = o1;
+
+
 
                        // v[b,b+1] = (v[b,b+1] xor v[c,c+1]) rotated right by 63 bits
                        xor0 = (*v)[b] ^ (*v)[c];