From 3a38b29eef2dc0a631f2d0d1837281e926401d0b Mon Sep 17 00:00:00 2001 From: Chris Duncan Date: Mon, 6 Jan 2025 08:31:20 -0800 Subject: [PATCH] Eliminate m array. Fix nonce construction. --- src/lib/workers/powgpu.ts | 118 +++++++++++++++++--------------------- 1 file changed, 53 insertions(+), 65 deletions(-) diff --git a/src/lib/workers/powgpu.ts b/src/lib/workers/powgpu.ts index 7ed21b1..ab2879a 100644 --- a/src/lib/workers/powgpu.ts +++ b/src/lib/workers/powgpu.ts @@ -202,20 +202,8 @@ export class PowGpu extends WorkerInterface { * Reference implementation uses uint64_t whereas we have u32 * Array length increased from original 16 to 32 to compensate */ - var m: array; - m[0u] = ubo.random; - m[1u] = ubo.random ^ id; - m[2u] = ubo.blockhash[0u].x; - m[3u] = ubo.blockhash[0u].y; - m[4u] = ubo.blockhash[0u].z; - m[5u] = ubo.blockhash[0u].w; - m[6u] = ubo.blockhash[1u].x; - m[7u] = ubo.blockhash[1u].y; - m[8u] = ubo.blockhash[1u].z; - m[9u] = ubo.blockhash[1u].w; - var m0: u32 = ubo.random; - var m1: u32 = ubo.random & id; + var m1: u32 = ubo.random ^ id; var m2: u32 = ubo.blockhash[0u].x; var m3: u32 = ubo.blockhash[0u].y; var m4: u32 = ubo.blockhash[0u].z; @@ -284,9 +272,9 @@ export class PowGpu extends WorkerInterface { * Twelve rounds of mixing as part of BLAKE2b compression step */ // ROUND(0) - G(&v, 0u, 8u, 16u, 24u, m[0u], m[1u], m[2u], m[3u]); - G(&v, 2u, 10u, 18u, 26u, m[4u], m[5u], m[6u], m[7u]); - G(&v, 4u, 12u, 20u, 28u, m[8u], m[9u], 0u, 0u); + G(&v, 0u, 8u, 16u, 24u, m0, m1, m2, m3); + G(&v, 2u, 10u, 18u, 26u, m4, m5, m6, m7); + G(&v, 4u, 12u, 20u, 28u, m8, m9, 0u, 0u); G(&v, 6u, 14u, 22u, 30u, 0u, 0u, 0u, 0u); G(&v, 0u, 10u, 20u, 30u, 0u, 0u, 0u, 0u); G(&v, 2u, 12u, 22u, 24u, 0u, 0u, 0u, 0u); @@ -295,98 +283,98 @@ export class PowGpu extends WorkerInterface { // ROUND(1) G(&v, 0u, 8u, 16u, 24u, 0u, 0u, 0u, 0u); - G(&v, 2u, 10u, 18u, 26u, m[8u], m[9u], 0u, 0u); + G(&v, 2u, 10u, 18u, 26u, m8, m9, 0u, 0u); G(&v, 4u, 12u, 20u, 28u, 0u, 0u, 0u, 0u); G(&v, 6u, 14u, 22u, 30u, 0u, 0u, 0u, 0u); - G(&v, 0u, 10u, 20u, 30u, m[2u], m[3u], 0u, 0u); - G(&v, 2u, 12u, 22u, 24u, m[0u], m[1u], m[4u], m[5u]); + G(&v, 0u, 10u, 20u, 30u, m2, m3, 0u, 0u); + G(&v, 2u, 12u, 22u, 24u, m0, m1, m4, m5); G(&v, 4u, 14u, 16u, 26u, 0u, 0u, 0u, 0u); - G(&v, 6u, 8u, 18u, 28u, 0u, 0u, m[6u], m[7u]); + G(&v, 6u, 8u, 18u, 28u, 0u, 0u, m6, m7); // ROUND(2) G(&v, 0u, 8u, 16u, 24u, 0u, 0u, 0u, 0u); - G(&v, 2u, 10u, 18u, 26u, 0u, 0u, m[0u], m[1u]); - G(&v, 4u, 12u, 20u, 28u, 0u, 0u, m[4u], m[5u]); + G(&v, 2u, 10u, 18u, 26u, 0u, 0u, m0, m1); + G(&v, 4u, 12u, 20u, 28u, 0u, 0u, m4, m5); G(&v, 6u, 14u, 22u, 30u, 0u, 0u, 0u, 0u); G(&v, 0u, 10u, 20u, 30u, 0u, 0u, 0u, 0u); - G(&v, 2u, 12u, 22u, 24u, m[6u], m[7u], 0u, 0u); - G(&v, 4u, 14u, 16u, 26u, 0u, 0u, m[2u], m[3u]); - G(&v, 6u, 8u, 18u, 28u, 0u, 0u, m[8u], m[9u]); + G(&v, 2u, 12u, 22u, 24u, m6, m7, 0u, 0u); + G(&v, 4u, 14u, 16u, 26u, 0u, 0u, m2, m3); + G(&v, 6u, 8u, 18u, 28u, 0u, 0u, m8, m9); // ROUND(3) G(&v, 0u, 8u, 16u, 24u, 0u, 0u, 0u, 0u); - G(&v, 2u, 10u, 18u, 26u, m[6u], m[7u], m[2u], m[3u]); + G(&v, 2u, 10u, 18u, 26u, m6, m7, m2, m3); G(&v, 4u, 12u, 20u, 28u, 0u, 0u, 0u, 0u); G(&v, 6u, 14u, 22u, 30u, 0u, 0u, 0u, 0u); - G(&v, 0u, 10u, 20u, 30u, m[4u], m[5u], 0u, 0u); + G(&v, 0u, 10u, 20u, 30u, m4, m5, 0u, 0u); G(&v, 2u, 12u, 22u, 24u, 0u, 0u, 0u, 0u); - G(&v, 4u, 14u, 16u, 26u, m[8u], m[9u], m[0u], m[1u]); + G(&v, 4u, 14u, 16u, 26u, m8, m9, m0, m1); G(&v, 6u, 8u, 18u, 28u, 0u, 0u, 0u, 0u); // ROUND(4) - G(&v, 0u, 8u, 16u, 24u, 0u, 0u, m[0u], m[1u]); + G(&v, 0u, 8u, 16u, 24u, 0u, 0u, m0, m1); G(&v, 2u, 10u, 18u, 26u, 0u, 0u, 0u, 0u); - G(&v, 4u, 12u, 20u, 28u, m[4u], m[5u], m[8u], m[9u]); + G(&v, 4u, 12u, 20u, 28u, m4, m5, m8, m9); G(&v, 6u, 14u, 22u, 30u, 0u, 0u, 0u, 0u); - G(&v, 0u, 10u, 20u, 30u, 0u, 0u, m[2u], m[3u]); + G(&v, 0u, 10u, 20u, 30u, 0u, 0u, m2, m3); G(&v, 2u, 12u, 22u, 24u, 0u, 0u, 0u, 0u); G(&v, 4u, 14u, 16u, 26u, 0u, 0u, 0u, 0u); - G(&v, 6u, 8u, 18u, 28u, m[6u], m[7u], 0u, 0u); + G(&v, 6u, 8u, 18u, 28u, m6, m7, 0u, 0u); // ROUND(5) - G(&v, 0u, 8u, 16u, 24u, m[4u], m[5u], 0u, 0u); + G(&v, 0u, 8u, 16u, 24u, m4, m5, 0u, 0u); G(&v, 2u, 10u, 18u, 26u, 0u, 0u, 0u, 0u); - G(&v, 4u, 12u, 20u, 28u, m[0u], m[1u], 0u, 0u); - G(&v, 6u, 14u, 22u, 30u, 0u, 0u, m[6u], m[7u]); - G(&v, 0u, 10u, 20u, 30u, m[8u], m[9u], 0u, 0u); + G(&v, 4u, 12u, 20u, 28u, m0, m1, 0u, 0u); + G(&v, 6u, 14u, 22u, 30u, 0u, 0u, m6, m7); + G(&v, 0u, 10u, 20u, 30u, m8, m9, 0u, 0u); G(&v, 2u, 12u, 22u, 24u, 0u, 0u, 0u, 0u); G(&v, 4u, 14u, 16u, 26u, 0u, 0u, 0u, 0u); - G(&v, 6u, 8u, 18u, 28u, m[2u], m[3u], 0u, 0u); + G(&v, 6u, 8u, 18u, 28u, m2, m3, 0u, 0u); // ROUND(6) G(&v, 0u, 8u, 16u, 24u, 0u, 0u, 0u, 0u); - G(&v, 2u, 10u, 18u, 26u, m[2u], m[3u], 0u, 0u); + G(&v, 2u, 10u, 18u, 26u, m2, m3, 0u, 0u); G(&v, 4u, 12u, 20u, 28u, 0u, 0u, 0u, 0u); - G(&v, 6u, 14u, 22u, 30u, m[8u], m[9u], 0u, 0u); - G(&v, 0u, 10u, 20u, 30u, m[0u], m[1u], 0u, 0u); - G(&v, 2u, 12u, 22u, 24u, 0u, 0u, m[6u], m[7u]); - G(&v, 4u, 14u, 16u, 26u, 0u, 0u, m[4u], m[5u]); + G(&v, 6u, 14u, 22u, 30u, m8, m9, 0u, 0u); + G(&v, 0u, 10u, 20u, 30u, m0, m1, 0u, 0u); + G(&v, 2u, 12u, 22u, 24u, 0u, 0u, m6, m7); + G(&v, 4u, 14u, 16u, 26u, 0u, 0u, m4, m5); G(&v, 6u, 8u, 18u, 28u, 0u, 0u, 0u, 0u); // ROUND(7) G(&v, 0u, 8u, 16u, 24u, 0u, 0u, 0u, 0u); G(&v, 2u, 10u, 18u, 26u, 0u, 0u, 0u, 0u); - G(&v, 4u, 12u, 20u, 28u, 0u, 0u, m[2u], m[3u]); - G(&v, 6u, 14u, 22u, 30u, m[6u], m[7u], 0u, 0u); - G(&v, 0u, 10u, 20u, 30u, 0u, 0u, m[0u], m[1u]); - G(&v, 2u, 12u, 22u, 24u, 0u, 0u, m[8u], m[9u]); + G(&v, 4u, 12u, 20u, 28u, 0u, 0u, m2, m3); + G(&v, 6u, 14u, 22u, 30u, m6, m7, 0u, 0u); + G(&v, 0u, 10u, 20u, 30u, 0u, 0u, m0, m1); + G(&v, 2u, 12u, 22u, 24u, 0u, 0u, m8, m9); G(&v, 4u, 14u, 16u, 26u, 0u, 0u, 0u, 0u); - G(&v, 6u, 8u, 18u, 28u, m[4u], m[5u], 0u, 0u); + G(&v, 6u, 8u, 18u, 28u, m4, m5, 0u, 0u); // ROUND(8) G(&v, 0u, 8u, 16u, 24u, 0u, 0u, 0u, 0u); G(&v, 2u, 10u, 18u, 26u, 0u, 0u, 0u, 0u); - G(&v, 4u, 12u, 20u, 28u, 0u, 0u, m[6u], m[7u]); - G(&v, 6u, 14u, 22u, 30u, m[0u], m[1u], 0u, 0u); - G(&v, 0u, 10u, 20u, 30u, 0u, 0u, m[4u], m[5u]); + G(&v, 4u, 12u, 20u, 28u, 0u, 0u, m6, m7); + G(&v, 6u, 14u, 22u, 30u, m0, m1, 0u, 0u); + G(&v, 0u, 10u, 20u, 30u, 0u, 0u, m4, m5); G(&v, 2u, 12u, 22u, 24u, 0u, 0u, 0u, 0u); - G(&v, 4u, 14u, 16u, 26u, m[2u], m[3u], m[8u], m[9u]); + G(&v, 4u, 14u, 16u, 26u, m2, m3, m8, m9); G(&v, 6u, 8u, 18u, 28u, 0u, 0u, 0u, 0u); // ROUND(9) - G(&v, 0u, 8u, 16u, 24u, 0u, 0u, m[4u], m[5u]); - G(&v, 2u, 10u, 18u, 26u, 0u, 0u, m[8u], m[9u]); + G(&v, 0u, 8u, 16u, 24u, 0u, 0u, m4, m5); + G(&v, 2u, 10u, 18u, 26u, 0u, 0u, m8, m9); G(&v, 4u, 12u, 20u, 28u, 0u, 0u, 0u, 0u); - G(&v, 6u, 14u, 22u, 30u, m[2u], m[3u], 0u, 0u); + G(&v, 6u, 14u, 22u, 30u, m2, m3, 0u, 0u); G(&v, 0u, 10u, 20u, 30u, 0u, 0u, 0u, 0u); G(&v, 2u, 12u, 22u, 24u, 0u, 0u, 0u, 0u); - G(&v, 4u, 14u, 16u, 26u, m[6u], m[7u], 0u, 0u); - G(&v, 6u, 8u, 18u, 28u, 0u, 0u, m[0u], m[1u]); + G(&v, 4u, 14u, 16u, 26u, m6, m7, 0u, 0u); + G(&v, 6u, 8u, 18u, 28u, 0u, 0u, m0, m1); // ROUND(10) - G(&v, 0u, 8u, 16u, 24u, m[0u], m[1u], m[2u], m[3u]); - G(&v, 2u, 10u, 18u, 26u, m[4u], m[5u], m[6u], m[7u]); - G(&v, 4u, 12u, 20u, 28u, m[8u], m[9u], 0u, 0u); + G(&v, 0u, 8u, 16u, 24u, m0, m1, m2, m3); + G(&v, 2u, 10u, 18u, 26u, m4, m5, m6, m7); + G(&v, 4u, 12u, 20u, 28u, m8, m9, 0u, 0u); G(&v, 6u, 14u, 22u, 30u, 0u, 0u, 0u, 0u); G(&v, 0u, 10u, 20u, 30u, 0u, 0u, 0u, 0u); G(&v, 2u, 12u, 22u, 24u, 0u, 0u, 0u, 0u); @@ -395,21 +383,21 @@ export class PowGpu extends WorkerInterface { // ROUND(11) G(&v, 0u, 8u, 16u, 24u, 0u, 0u, 0u, 0u); - G(&v, 2u, 10u, 18u, 26u, m[8u], m[9u], 0u, 0u); + G(&v, 2u, 10u, 18u, 26u, m8, m9, 0u, 0u); G(&v, 4u, 12u, 20u, 28u, 0u, 0u, 0u, 0u); G(&v, 6u, 14u, 22u, 30u, 0u, 0u, 0u, 0u); - G(&v, 0u, 10u, 20u, 30u, m[2u], m[3u], 0u, 0u); - G(&v, 2u, 12u, 22u, 24u, m[0u], m[1u], m[4u], m[5u]); + G(&v, 0u, 10u, 20u, 30u, m2, m3, 0u, 0u); + G(&v, 2u, 12u, 22u, 24u, m0, m1, m4, m5); G(&v, 4u, 14u, 16u, 26u, 0u, 0u, 0u, 0u); - G(&v, 6u, 8u, 18u, 28u, 0u, 0u, m[6u], m[7u]); + G(&v, 6u, 8u, 18u, 28u, 0u, 0u, m6, m7); /** * Set nonce if it passes the threshold and no other thread has set it */ if ((BLAKE2B_IV32_1 ^ v[1u] ^ v[17u]) > threshold && atomicLoad(&work.found) == 0u) { atomicStore(&work.found, 1u); - work.nonce.x = m[0]; - work.nonce.y = m[1]; + work.nonce.x = m0; + work.nonce.y = m1; } return; } -- 2.34.1