From c6b7473c5104113e356293f7b8300e22176bce14 Mon Sep 17 00:00:00 2001 From: Chris Duncan Date: Mon, 6 Jan 2025 11:24:30 -0800 Subject: [PATCH] Convert first of four v accessors to pointer references. --- src/lib/workers/powgpu.ts | 242 +++++++++++++++++++------------------- 1 file changed, 121 insertions(+), 121 deletions(-) diff --git a/src/lib/workers/powgpu.ts b/src/lib/workers/powgpu.ts index 8a781ec..c6d30b9 100644 --- a/src/lib/workers/powgpu.ts +++ b/src/lib/workers/powgpu.ts @@ -90,33 +90,33 @@ export class PowGpu extends WorkerInterface { /** * G Mixing function */ - fn G (v: ptr>, a: u32, b: u32, c: u32, d: u32, mx0: u32, mx1: u32, my0: u32, my1: u32) { + fn G (v: ptr>, va0: ptr, va1: ptr, b: u32, c: u32, d: u32, mx0: u32, mx1: u32, my0: u32, my1: u32) { var o0: u32; var o1: u32; var xor0: u32; var xor1: u32; // a = a + b; - o0 = (*v)[a] + (*v)[b]; - o1 = (*v)[a+1u] + (*v)[b+1u]; - if ((*v)[a] > 0xFFFFFFFFu - (*v)[b]) { + o0 = *va0 + (*v)[b]; + o1 = *va1 + (*v)[b+1u]; + if (*va0 > 0xFFFFFFFFu - (*v)[b]) { o1 = o1 + 1u; } - (*v)[a] = o0; - (*v)[a+1u] = o1; + *va0 = o0; + *va1 = o1; // a = a + m[sigma[r][2*i+0]]; - o0 = (*v)[a] + mx0; - o1 = (*v)[a+1u] + mx1; - if ((*v)[a] > 0xFFFFFFFFu - mx0) { + o0 = *va0 + mx0; + o1 = *va1 + mx1; + if (*va0 > 0xFFFFFFFFu - mx0) { o1 = o1 + 1u; } - (*v)[a] = o0; - (*v)[a+1u] = o1; + *va0 = o0; + *va1 = o1; // d = rotr64(d ^ a, 32); - xor0 = (*v)[d] ^ (*v)[a]; - xor1 = (*v)[d+1u] ^ (*v)[a+1u]; + xor0 = (*v)[d] ^ *va0; + xor1 = (*v)[d+1u] ^ *va1; (*v)[d] = xor1; (*v)[d+1u] = xor0; @@ -136,26 +136,26 @@ export class PowGpu extends WorkerInterface { (*v)[b+1u] = (xor1 >> 24u) ^ (xor0 << 8u); // a = a + b; - o0 = (*v)[a] + (*v)[b]; - o1 = (*v)[a+1u] + (*v)[b+1u]; - if ((*v)[a] > 0xFFFFFFFFu - (*v)[b]) { + o0 = *va0 + (*v)[b]; + o1 = *va1 + (*v)[b+1u]; + if (*va0 > 0xFFFFFFFFu - (*v)[b]) { o1 = o1 + 1u; } - (*v)[a] = o0; - (*v)[a+1u] = o1; + *va0 = o0; + *va1 = o1; // a = a + m[sigma[r][2*i+1]]; - o0 = (*v)[a] + my0; - o1 = (*v)[a+1u] + my1; - if ((*v)[a] > 0xFFFFFFFFu - my0) { + o0 = *va0 + my0; + o1 = *va1 + my1; + if (*va0 > 0xFFFFFFFFu - my0) { o1 = o1 + 1u; } - (*v)[a] = o0; - (*v)[a+1u] = o1; + *va0 = o0; + *va1 = o1; // d = rotr64(d ^ a, 16) - xor0 = (*v)[d] ^ (*v)[a]; - xor1 = (*v)[d+1u] ^ (*v)[a+1u]; + xor0 = (*v)[d] ^ *va0; + xor1 = (*v)[d+1u] ^ *va1; (*v)[d] = (xor0 >> 16u) ^ (xor1 << 16u); (*v)[d+1u] = (xor1 >> 16u) ^ (xor0 << 16u); @@ -270,124 +270,124 @@ export class PowGpu extends WorkerInterface { * Twelve rounds of mixing as part of BLAKE2b compression step */ // ROUND(0) - G(&v, 0u, 8u, 16u, 24u, m0, m1, m2, m3); - G(&v, 2u, 10u, 18u, 26u, m4, m5, m6, m7); - G(&v, 4u, 12u, 20u, 28u, m8, m9, 0u, 0u); - G(&v, 6u, 14u, 22u, 30u, 0u, 0u, 0u, 0u); - G(&v, 0u, 10u, 20u, 30u, 0u, 0u, 0u, 0u); - G(&v, 2u, 12u, 22u, 24u, 0u, 0u, 0u, 0u); - G(&v, 4u, 14u, 16u, 26u, 0u, 0u, 0u, 0u); - G(&v, 6u, 8u, 18u, 28u, 0u, 0u, 0u, 0u); + G(&v, &v0, &v1, 8u, 16u, 24u, m0, m1, m2, m3); + G(&v, &v2, &v3, 10u, 18u, 26u, m4, m5, m6, m7); + G(&v, &v4, &v5, 12u, 20u, 28u, m8, m9, 0u, 0u); + G(&v, &v6, &v7, 14u, 22u, 30u, 0u, 0u, 0u, 0u); + G(&v, &v0, &v1, 10u, 20u, 30u, 0u, 0u, 0u, 0u); + G(&v, &v2, &v3, 12u, 22u, 24u, 0u, 0u, 0u, 0u); + G(&v, &v4, &v5, 14u, 16u, 26u, 0u, 0u, 0u, 0u); + G(&v, &v6, &v7, 8u, 18u, 28u, 0u, 0u, 0u, 0u); // ROUND(1) - G(&v, 0u, 8u, 16u, 24u, 0u, 0u, 0u, 0u); - G(&v, 2u, 10u, 18u, 26u, m8, m9, 0u, 0u); - G(&v, 4u, 12u, 20u, 28u, 0u, 0u, 0u, 0u); - G(&v, 6u, 14u, 22u, 30u, 0u, 0u, 0u, 0u); - G(&v, 0u, 10u, 20u, 30u, m2, m3, 0u, 0u); - G(&v, 2u, 12u, 22u, 24u, m0, m1, m4, m5); - G(&v, 4u, 14u, 16u, 26u, 0u, 0u, 0u, 0u); - G(&v, 6u, 8u, 18u, 28u, 0u, 0u, m6, m7); + G(&v, &v0, &v1, 8u, 16u, 24u, 0u, 0u, 0u, 0u); + G(&v, &v2, &v3, 10u, 18u, 26u, m8, m9, 0u, 0u); + G(&v, &v4, &v5, 12u, 20u, 28u, 0u, 0u, 0u, 0u); + G(&v, &v6, &v7, 14u, 22u, 30u, 0u, 0u, 0u, 0u); + G(&v, &v0, &v1, 10u, 20u, 30u, m2, m3, 0u, 0u); + G(&v, &v2, &v3, 12u, 22u, 24u, m0, m1, m4, m5); + G(&v, &v4, &v5, 14u, 16u, 26u, 0u, 0u, 0u, 0u); + G(&v, &v6, &v7, 8u, 18u, 28u, 0u, 0u, m6, m7); // ROUND(2) - G(&v, 0u, 8u, 16u, 24u, 0u, 0u, 0u, 0u); - G(&v, 2u, 10u, 18u, 26u, 0u, 0u, m0, m1); - G(&v, 4u, 12u, 20u, 28u, 0u, 0u, m4, m5); - G(&v, 6u, 14u, 22u, 30u, 0u, 0u, 0u, 0u); - G(&v, 0u, 10u, 20u, 30u, 0u, 0u, 0u, 0u); - G(&v, 2u, 12u, 22u, 24u, m6, m7, 0u, 0u); - G(&v, 4u, 14u, 16u, 26u, 0u, 0u, m2, m3); - G(&v, 6u, 8u, 18u, 28u, 0u, 0u, m8, m9); + G(&v, &v0, &v1, 8u, 16u, 24u, 0u, 0u, 0u, 0u); + G(&v, &v2, &v3, 10u, 18u, 26u, 0u, 0u, m0, m1); + G(&v, &v4, &v5, 12u, 20u, 28u, 0u, 0u, m4, m5); + G(&v, &v6, &v7, 14u, 22u, 30u, 0u, 0u, 0u, 0u); + G(&v, &v0, &v1, 10u, 20u, 30u, 0u, 0u, 0u, 0u); + G(&v, &v2, &v3, 12u, 22u, 24u, m6, m7, 0u, 0u); + G(&v, &v4, &v5, 14u, 16u, 26u, 0u, 0u, m2, m3); + G(&v, &v6, &v7, 8u, 18u, 28u, 0u, 0u, m8, m9); // ROUND(3) - G(&v, 0u, 8u, 16u, 24u, 0u, 0u, 0u, 0u); - G(&v, 2u, 10u, 18u, 26u, m6, m7, m2, m3); - G(&v, 4u, 12u, 20u, 28u, 0u, 0u, 0u, 0u); - G(&v, 6u, 14u, 22u, 30u, 0u, 0u, 0u, 0u); - G(&v, 0u, 10u, 20u, 30u, m4, m5, 0u, 0u); - G(&v, 2u, 12u, 22u, 24u, 0u, 0u, 0u, 0u); - G(&v, 4u, 14u, 16u, 26u, m8, m9, m0, m1); - G(&v, 6u, 8u, 18u, 28u, 0u, 0u, 0u, 0u); + G(&v, &v0, &v1, 8u, 16u, 24u, 0u, 0u, 0u, 0u); + G(&v, &v2, &v3, 10u, 18u, 26u, m6, m7, m2, m3); + G(&v, &v4, &v5, 12u, 20u, 28u, 0u, 0u, 0u, 0u); + G(&v, &v6, &v7, 14u, 22u, 30u, 0u, 0u, 0u, 0u); + G(&v, &v0, &v1, 10u, 20u, 30u, m4, m5, 0u, 0u); + G(&v, &v2, &v3, 12u, 22u, 24u, 0u, 0u, 0u, 0u); + G(&v, &v4, &v5, 14u, 16u, 26u, m8, m9, m0, m1); + G(&v, &v6, &v7, 8u, 18u, 28u, 0u, 0u, 0u, 0u); // ROUND(4) - G(&v, 0u, 8u, 16u, 24u, 0u, 0u, m0, m1); - G(&v, 2u, 10u, 18u, 26u, 0u, 0u, 0u, 0u); - G(&v, 4u, 12u, 20u, 28u, m4, m5, m8, m9); - G(&v, 6u, 14u, 22u, 30u, 0u, 0u, 0u, 0u); - G(&v, 0u, 10u, 20u, 30u, 0u, 0u, m2, m3); - G(&v, 2u, 12u, 22u, 24u, 0u, 0u, 0u, 0u); - G(&v, 4u, 14u, 16u, 26u, 0u, 0u, 0u, 0u); - G(&v, 6u, 8u, 18u, 28u, m6, m7, 0u, 0u); + G(&v, &v0, &v1, 8u, 16u, 24u, 0u, 0u, m0, m1); + G(&v, &v2, &v3, 10u, 18u, 26u, 0u, 0u, 0u, 0u); + G(&v, &v4, &v5, 12u, 20u, 28u, m4, m5, m8, m9); + G(&v, &v6, &v7, 14u, 22u, 30u, 0u, 0u, 0u, 0u); + G(&v, &v0, &v1, 10u, 20u, 30u, 0u, 0u, m2, m3); + G(&v, &v2, &v3, 12u, 22u, 24u, 0u, 0u, 0u, 0u); + G(&v, &v4, &v5, 14u, 16u, 26u, 0u, 0u, 0u, 0u); + G(&v, &v6, &v7, 8u, 18u, 28u, m6, m7, 0u, 0u); // ROUND(5) - G(&v, 0u, 8u, 16u, 24u, m4, m5, 0u, 0u); - G(&v, 2u, 10u, 18u, 26u, 0u, 0u, 0u, 0u); - G(&v, 4u, 12u, 20u, 28u, m0, m1, 0u, 0u); - G(&v, 6u, 14u, 22u, 30u, 0u, 0u, m6, m7); - G(&v, 0u, 10u, 20u, 30u, m8, m9, 0u, 0u); - G(&v, 2u, 12u, 22u, 24u, 0u, 0u, 0u, 0u); - G(&v, 4u, 14u, 16u, 26u, 0u, 0u, 0u, 0u); - G(&v, 6u, 8u, 18u, 28u, m2, m3, 0u, 0u); + G(&v, &v0, &v1, 8u, 16u, 24u, m4, m5, 0u, 0u); + G(&v, &v2, &v3, 10u, 18u, 26u, 0u, 0u, 0u, 0u); + G(&v, &v4, &v5, 12u, 20u, 28u, m0, m1, 0u, 0u); + G(&v, &v6, &v7, 14u, 22u, 30u, 0u, 0u, m6, m7); + G(&v, &v0, &v1, 10u, 20u, 30u, m8, m9, 0u, 0u); + G(&v, &v2, &v3, 12u, 22u, 24u, 0u, 0u, 0u, 0u); + G(&v, &v4, &v5, 14u, 16u, 26u, 0u, 0u, 0u, 0u); + G(&v, &v6, &v7, 8u, 18u, 28u, m2, m3, 0u, 0u); // ROUND(6) - G(&v, 0u, 8u, 16u, 24u, 0u, 0u, 0u, 0u); - G(&v, 2u, 10u, 18u, 26u, m2, m3, 0u, 0u); - G(&v, 4u, 12u, 20u, 28u, 0u, 0u, 0u, 0u); - G(&v, 6u, 14u, 22u, 30u, m8, m9, 0u, 0u); - G(&v, 0u, 10u, 20u, 30u, m0, m1, 0u, 0u); - G(&v, 2u, 12u, 22u, 24u, 0u, 0u, m6, m7); - G(&v, 4u, 14u, 16u, 26u, 0u, 0u, m4, m5); - G(&v, 6u, 8u, 18u, 28u, 0u, 0u, 0u, 0u); + G(&v, &v0, &v1, 8u, 16u, 24u, 0u, 0u, 0u, 0u); + G(&v, &v2, &v3, 10u, 18u, 26u, m2, m3, 0u, 0u); + G(&v, &v4, &v5, 12u, 20u, 28u, 0u, 0u, 0u, 0u); + G(&v, &v6, &v7, 14u, 22u, 30u, m8, m9, 0u, 0u); + G(&v, &v0, &v1, 10u, 20u, 30u, m0, m1, 0u, 0u); + G(&v, &v2, &v3, 12u, 22u, 24u, 0u, 0u, m6, m7); + G(&v, &v4, &v5, 14u, 16u, 26u, 0u, 0u, m4, m5); + G(&v, &v6, &v7, 8u, 18u, 28u, 0u, 0u, 0u, 0u); // ROUND(7) - G(&v, 0u, 8u, 16u, 24u, 0u, 0u, 0u, 0u); - G(&v, 2u, 10u, 18u, 26u, 0u, 0u, 0u, 0u); - G(&v, 4u, 12u, 20u, 28u, 0u, 0u, m2, m3); - G(&v, 6u, 14u, 22u, 30u, m6, m7, 0u, 0u); - G(&v, 0u, 10u, 20u, 30u, 0u, 0u, m0, m1); - G(&v, 2u, 12u, 22u, 24u, 0u, 0u, m8, m9); - G(&v, 4u, 14u, 16u, 26u, 0u, 0u, 0u, 0u); - G(&v, 6u, 8u, 18u, 28u, m4, m5, 0u, 0u); + G(&v, &v0, &v1, 8u, 16u, 24u, 0u, 0u, 0u, 0u); + G(&v, &v2, &v3, 10u, 18u, 26u, 0u, 0u, 0u, 0u); + G(&v, &v4, &v5, 12u, 20u, 28u, 0u, 0u, m2, m3); + G(&v, &v6, &v7, 14u, 22u, 30u, m6, m7, 0u, 0u); + G(&v, &v0, &v1, 10u, 20u, 30u, 0u, 0u, m0, m1); + G(&v, &v2, &v3, 12u, 22u, 24u, 0u, 0u, m8, m9); + G(&v, &v4, &v5, 14u, 16u, 26u, 0u, 0u, 0u, 0u); + G(&v, &v6, &v7, 8u, 18u, 28u, m4, m5, 0u, 0u); // ROUND(8) - G(&v, 0u, 8u, 16u, 24u, 0u, 0u, 0u, 0u); - G(&v, 2u, 10u, 18u, 26u, 0u, 0u, 0u, 0u); - G(&v, 4u, 12u, 20u, 28u, 0u, 0u, m6, m7); - G(&v, 6u, 14u, 22u, 30u, m0, m1, 0u, 0u); - G(&v, 0u, 10u, 20u, 30u, 0u, 0u, m4, m5); - G(&v, 2u, 12u, 22u, 24u, 0u, 0u, 0u, 0u); - G(&v, 4u, 14u, 16u, 26u, m2, m3, m8, m9); - G(&v, 6u, 8u, 18u, 28u, 0u, 0u, 0u, 0u); + G(&v, &v0, &v1, 8u, 16u, 24u, 0u, 0u, 0u, 0u); + G(&v, &v2, &v3, 10u, 18u, 26u, 0u, 0u, 0u, 0u); + G(&v, &v4, &v5, 12u, 20u, 28u, 0u, 0u, m6, m7); + G(&v, &v6, &v7, 14u, 22u, 30u, m0, m1, 0u, 0u); + G(&v, &v0, &v1, 10u, 20u, 30u, 0u, 0u, m4, m5); + G(&v, &v2, &v3, 12u, 22u, 24u, 0u, 0u, 0u, 0u); + G(&v, &v4, &v5, 14u, 16u, 26u, m2, m3, m8, m9); + G(&v, &v6, &v7, 8u, 18u, 28u, 0u, 0u, 0u, 0u); // ROUND(9) - G(&v, 0u, 8u, 16u, 24u, 0u, 0u, m4, m5); - G(&v, 2u, 10u, 18u, 26u, 0u, 0u, m8, m9); - G(&v, 4u, 12u, 20u, 28u, 0u, 0u, 0u, 0u); - G(&v, 6u, 14u, 22u, 30u, m2, m3, 0u, 0u); - G(&v, 0u, 10u, 20u, 30u, 0u, 0u, 0u, 0u); - G(&v, 2u, 12u, 22u, 24u, 0u, 0u, 0u, 0u); - G(&v, 4u, 14u, 16u, 26u, m6, m7, 0u, 0u); - G(&v, 6u, 8u, 18u, 28u, 0u, 0u, m0, m1); + G(&v, &v0, &v1, 8u, 16u, 24u, 0u, 0u, m4, m5); + G(&v, &v2, &v3, 10u, 18u, 26u, 0u, 0u, m8, m9); + G(&v, &v4, &v5, 12u, 20u, 28u, 0u, 0u, 0u, 0u); + G(&v, &v6, &v7, 14u, 22u, 30u, m2, m3, 0u, 0u); + G(&v, &v0, &v1, 10u, 20u, 30u, 0u, 0u, 0u, 0u); + G(&v, &v2, &v3, 12u, 22u, 24u, 0u, 0u, 0u, 0u); + G(&v, &v4, &v5, 14u, 16u, 26u, m6, m7, 0u, 0u); + G(&v, &v6, &v7, 8u, 18u, 28u, 0u, 0u, m0, m1); // ROUND(10) - G(&v, 0u, 8u, 16u, 24u, m0, m1, m2, m3); - G(&v, 2u, 10u, 18u, 26u, m4, m5, m6, m7); - G(&v, 4u, 12u, 20u, 28u, m8, m9, 0u, 0u); - G(&v, 6u, 14u, 22u, 30u, 0u, 0u, 0u, 0u); - G(&v, 0u, 10u, 20u, 30u, 0u, 0u, 0u, 0u); - G(&v, 2u, 12u, 22u, 24u, 0u, 0u, 0u, 0u); - G(&v, 4u, 14u, 16u, 26u, 0u, 0u, 0u, 0u); - G(&v, 6u, 8u, 18u, 28u, 0u, 0u, 0u, 0u); + G(&v, &v0, &v1, 8u, 16u, 24u, m0, m1, m2, m3); + G(&v, &v2, &v3, 10u, 18u, 26u, m4, m5, m6, m7); + G(&v, &v4, &v5, 12u, 20u, 28u, m8, m9, 0u, 0u); + G(&v, &v6, &v7, 14u, 22u, 30u, 0u, 0u, 0u, 0u); + G(&v, &v0, &v1, 10u, 20u, 30u, 0u, 0u, 0u, 0u); + G(&v, &v2, &v3, 12u, 22u, 24u, 0u, 0u, 0u, 0u); + G(&v, &v4, &v5, 14u, 16u, 26u, 0u, 0u, 0u, 0u); + G(&v, &v6, &v7, 8u, 18u, 28u, 0u, 0u, 0u, 0u); // ROUND(11) - G(&v, 0u, 8u, 16u, 24u, 0u, 0u, 0u, 0u); - G(&v, 2u, 10u, 18u, 26u, m8, m9, 0u, 0u); - G(&v, 4u, 12u, 20u, 28u, 0u, 0u, 0u, 0u); - G(&v, 6u, 14u, 22u, 30u, 0u, 0u, 0u, 0u); - G(&v, 0u, 10u, 20u, 30u, m2, m3, 0u, 0u); - G(&v, 2u, 12u, 22u, 24u, m0, m1, m4, m5); - G(&v, 4u, 14u, 16u, 26u, 0u, 0u, 0u, 0u); - G(&v, 6u, 8u, 18u, 28u, 0u, 0u, m6, m7); + G(&v, &v0, &v1, 8u, 16u, 24u, 0u, 0u, 0u, 0u); + G(&v, &v2, &v3, 10u, 18u, 26u, m8, m9, 0u, 0u); + G(&v, &v4, &v5, 12u, 20u, 28u, 0u, 0u, 0u, 0u); + G(&v, &v6, &v7, 14u, 22u, 30u, 0u, 0u, 0u, 0u); + G(&v, &v0, &v1, 10u, 20u, 30u, m2, m3, 0u, 0u); + G(&v, &v2, &v3, 12u, 22u, 24u, m0, m1, m4, m5); + G(&v, &v4, &v5, 14u, 16u, 26u, 0u, 0u, 0u, 0u); + G(&v, &v6, &v7, 8u, 18u, 28u, 0u, 0u, m6, m7); /** * Set nonce if it passes the threshold and no other thread has set it -- 2.34.1