From: Chris Duncan Date: Mon, 6 Jan 2025 00:10:39 +0000 (-0800) Subject: Start eliminating SIGMA index lookups. X-Git-Url: https://zoso.dev/?a=commitdiff_plain;h=b05f3941c6922f5935a5a540e5cc48c60cc361e8;p=libnemo.git Start eliminating SIGMA index lookups. --- diff --git a/src/lib/workers/powgpu.ts b/src/lib/workers/powgpu.ts index 29f341c..87ca02f 100644 --- a/src/lib/workers/powgpu.ts +++ b/src/lib/workers/powgpu.ts @@ -70,21 +70,22 @@ export class PowGpu extends WorkerInterface { * These are offsets into the input data buffer for each mixing step. * They are multiplied by 2 from the original SIGMA values in * the C reference implementation, which refered to uint64s. + * + * const SIGMA82: array = array( + * 0u,2u,4u,6u,8u,10u,12u,14u,16u,18u,20u,22u,24u,26u,28u,30u, + * 28u,20u,8u,16u,18u,30u,26u,12u,2u,24u,0u,4u,22u,14u,10u,6u, + * 22u,16u,24u,0u,10u,4u,30u,26u,20u,28u,6u,12u,14u,2u,18u,8u, + * 14u,18u,6u,2u,26u,24u,22u,28u,4u,12u,10u,20u,8u,0u,30u,16u, + * 18u,0u,10u,14u,4u,8u,20u,30u,28u,2u,22u,24u,12u,16u,6u,26u, + * 4u,24u,12u,20u,0u,22u,16u,6u,8u,26u,14u,10u,30u,28u,2u,18u, + * 24u,10u,2u,30u,28u,26u,8u,20u,0u,14u,12u,6u,18u,4u,16u,22u, + * 26u,22u,14u,28u,24u,2u,6u,18u,10u,0u,30u,8u,16u,12u,4u,20u, + * 12u,30u,28u,18u,22u,6u,0u,16u,24u,4u,26u,14u,2u,8u,20u,10u, + * 20u,4u,16u,8u,14u,12u,2u,10u,30u,22u,18u,28u,6u,24u,26u,0u, + * 0u,2u,4u,6u,8u,10u,12u,14u,16u,18u,20u,22u,24u,26u,28u,30u, + * 28u,20u,8u,16u,18u,30u,26u,12u,2u,24u,0u,4u,22u,14u,10u,6u + * ); */ - const SIGMA82: array = array( - 0u,2u,4u,6u,8u,10u,12u,14u,16u,18u,20u,22u,24u,26u,28u,30u, - 28u,20u,8u,16u,18u,30u,26u,12u,2u,24u,0u,4u,22u,14u,10u,6u, - 22u,16u,24u,0u,10u,4u,30u,26u,20u,28u,6u,12u,14u,2u,18u,8u, - 14u,18u,6u,2u,26u,24u,22u,28u,4u,12u,10u,20u,8u,0u,30u,16u, - 18u,0u,10u,14u,4u,8u,20u,30u,28u,2u,22u,24u,12u,16u,6u,26u, - 4u,24u,12u,20u,0u,22u,16u,6u,8u,26u,14u,10u,30u,28u,2u,18u, - 24u,10u,2u,30u,28u,26u,8u,20u,0u,14u,12u,6u,18u,4u,16u,22u, - 26u,22u,14u,28u,24u,2u,6u,18u,10u,0u,30u,8u,16u,12u,4u,20u, - 12u,30u,28u,18u,22u,6u,0u,16u,24u,4u,26u,14u,2u,8u,20u,10u, - 20u,4u,16u,8u,14u,12u,2u,10u,30u,22u,18u,28u,6u,24u,26u,0u, - 0u,2u,4u,6u,8u,10u,12u,14u,16u,18u,20u,22u,24u,26u,28u,30u, - 28u,20u,8u,16u,18u,30u,26u,12u,2u,24u,0u,4u,22u,14u,10u,6u - ); /** * G Mixing function @@ -280,16 +281,19 @@ export class PowGpu extends WorkerInterface { /** * Twelve rounds of mixing as part of BLAKE2b compression step */ + // 0-15 20u,22u,24u,26u,28u,30u // ROUND(0) - G(&v, &m, 0u, 8u, 16u, 24u, SIGMA82[0u], SIGMA82[1u]); - G(&v, &m, 2u, 10u, 18u, 26u, SIGMA82[2u], SIGMA82[3u]); - G(&v, &m, 4u, 12u, 20u, 28u, SIGMA82[4u], SIGMA82[5u]); - G(&v, &m, 6u, 14u, 22u, 30u, SIGMA82[6u], SIGMA82[7u]); - G(&v, &m, 0u, 10u, 20u, 30u, SIGMA82[8u], SIGMA82[9u]); - G(&v, &m, 2u, 12u, 22u, 24u, SIGMA82[10u], SIGMA82[11u]); + G(&v, &m, 0u, 8u, 16u, 24u, 0u, 2u); + G(&v, &m, 2u, 10u, 18u, 26u, 4u, 6u); + G(&v, &m, 4u, 12u, 20u, 28u, 8u, 10u); + G(&v, &m, 6u, 14u, 22u, 30u, 12u, 14u); + G(&v, &m, 0u, 10u, 20u, 30u, 16u, 18u); + G(&v, &m, 2u, 12u, 22u, 24u, 20u, 22u); G(&v, &m, 4u, 14u, 16u, 26u, SIGMA82[12u], SIGMA82[13u]); G(&v, &m, 6u, 8u, 18u, 28u, SIGMA82[14u], SIGMA82[15u]); + // 16-31 28u,20u,8u,16u,18u,30u,26u,12u,2u,24u,0u,4u,22u,14u,10u,6u + // ROUND(1) G(&v, &m, 0u, 8u, 16u, 24u, SIGMA82[16u], SIGMA82[17u]); G(&v, &m, 2u, 10u, 18u, 26u, SIGMA82[18u], SIGMA82[19u]); @@ -300,6 +304,9 @@ export class PowGpu extends WorkerInterface { G(&v, &m, 4u, 14u, 16u, 26u, SIGMA82[28u], SIGMA82[29u]); G(&v, &m, 6u, 8u, 18u, 28u, SIGMA82[30u], SIGMA82[31u]); + + // 32-47 22u,16u,24u,0u,10u,4u,30u,26u,20u,28u,6u,12u,14u,2u,18u,8u + // ROUND(2) G(&v, &m, 0u, 8u, 16u, 24u, SIGMA82[32u], SIGMA82[33u]); G(&v, &m, 2u, 10u, 18u, 26u, SIGMA82[34u], SIGMA82[35u]); @@ -310,6 +317,11 @@ export class PowGpu extends WorkerInterface { G(&v, &m, 4u, 14u, 16u, 26u, SIGMA82[44u], SIGMA82[45u]); G(&v, &m, 6u, 8u, 18u, 28u, SIGMA82[46u], SIGMA82[47u]); + + + + // 48-63 14u,18u,6u,2u,26u,24u,22u,28u,4u,12u,10u,20u,8u,0u,30u,16u + // ROUND(3) G(&v, &m, 0u, 8u, 16u, 24u, SIGMA82[48u], SIGMA82[49u]); G(&v, &m, 2u, 10u, 18u, 26u, SIGMA82[50u], SIGMA82[51u]); @@ -320,6 +332,10 @@ export class PowGpu extends WorkerInterface { G(&v, &m, 4u, 14u, 16u, 26u, SIGMA82[60u], SIGMA82[61u]); G(&v, &m, 6u, 8u, 18u, 28u, SIGMA82[62u], SIGMA82[63u]); + + + // 64-79 18u,0u,10u,14u,4u,8u,20u,30u,28u,2u,22u,24u,12u,16u,6u,26u + // ROUND(4) G(&v, &m, 0u, 8u, 16u, 24u, SIGMA82[64u], SIGMA82[65u]); G(&v, &m, 2u, 10u, 18u, 26u, SIGMA82[66u], SIGMA82[67u]); @@ -330,6 +346,10 @@ export class PowGpu extends WorkerInterface { G(&v, &m, 4u, 14u, 16u, 26u, SIGMA82[76u], SIGMA82[77u]); G(&v, &m, 6u, 8u, 18u, 28u, SIGMA82[78u], SIGMA82[79u]); + + + // 80-95 4u,24u,12u,20u,0u,22u,16u,6u,8u,26u,14u,10u,30u,28u,2u,18u + // ROUND(5) G(&v, &m, 0u, 8u, 16u, 24u, SIGMA82[80u], SIGMA82[81u]); G(&v, &m, 2u, 10u, 18u, 26u, SIGMA82[82u], SIGMA82[83u]); @@ -340,6 +360,10 @@ export class PowGpu extends WorkerInterface { G(&v, &m, 4u, 14u, 16u, 26u, SIGMA82[92u], SIGMA82[93u]); G(&v, &m, 6u, 8u, 18u, 28u, SIGMA82[94u], SIGMA82[95u]); + + + // 96-111 24u,10u,2u,30u,28u,26u,8u,20u,0u,14u,12u,6u,18u,4u,16u,22u + // ROUND(6) G(&v, &m, 0u, 8u, 16u, 24u, SIGMA82[96u], SIGMA82[97u]); G(&v, &m, 2u, 10u, 18u, 26u, SIGMA82[98u], SIGMA82[99u]); @@ -350,6 +374,10 @@ export class PowGpu extends WorkerInterface { G(&v, &m, 4u, 14u, 16u, 26u, SIGMA82[108u], SIGMA82[109u]); G(&v, &m, 6u, 8u, 18u, 28u, SIGMA82[110u], SIGMA82[111u]); + + + // 112-127 26u,22u,14u,28u,24u,2u,6u,18u,10u,0u,30u,8u,16u,12u,4u,20u + // ROUND(7) G(&v, &m, 0u, 8u, 16u, 24u, SIGMA82[112u], SIGMA82[113u]); G(&v, &m, 2u, 10u, 18u, 26u, SIGMA82[114u], SIGMA82[115u]); @@ -360,6 +388,10 @@ export class PowGpu extends WorkerInterface { G(&v, &m, 4u, 14u, 16u, 26u, SIGMA82[124u], SIGMA82[125u]); G(&v, &m, 6u, 8u, 18u, 28u, SIGMA82[126u], SIGMA82[127u]); + + + // 128-143 12u,30u,28u,18u,22u,6u,0u,16u,24u,4u,26u,14u,2u,8u,20u,10u + // ROUND(8) G(&v, &m, 0u, 8u, 16u, 24u, SIGMA82[128u], SIGMA82[129u]); G(&v, &m, 2u, 10u, 18u, 26u, SIGMA82[130u], SIGMA82[131u]); @@ -370,6 +402,10 @@ export class PowGpu extends WorkerInterface { G(&v, &m, 4u, 14u, 16u, 26u, SIGMA82[140u], SIGMA82[141u]); G(&v, &m, 6u, 8u, 18u, 28u, SIGMA82[142u], SIGMA82[143u]); + + + // 144-159 20u,4u,16u,8u,14u,12u,2u,10u,30u,22u,18u,28u,6u,24u,26u,0u + // ROUND(9) G(&v, &m, 0u, 8u, 16u, 24u, SIGMA82[144u], SIGMA82[145u]); G(&v, &m, 2u, 10u, 18u, 26u, SIGMA82[146u], SIGMA82[147u]); @@ -380,6 +416,10 @@ export class PowGpu extends WorkerInterface { G(&v, &m, 4u, 14u, 16u, 26u, SIGMA82[156u], SIGMA82[157u]); G(&v, &m, 6u, 8u, 18u, 28u, SIGMA82[158u], SIGMA82[159u]); + + + // 160-175 0u,2u,4u,6u,8u,10u,12u,14u,16u,18u,20u,22u,24u,26u,28u,30u + // ROUND(10) G(&v, &m, 0u, 8u, 16u, 24u, SIGMA82[160u], SIGMA82[161u]); G(&v, &m, 2u, 10u, 18u, 26u, SIGMA82[162u], SIGMA82[163u]); @@ -390,6 +430,10 @@ export class PowGpu extends WorkerInterface { G(&v, &m, 4u, 14u, 16u, 26u, SIGMA82[172u], SIGMA82[173u]); G(&v, &m, 6u, 8u, 18u, 28u, SIGMA82[174u], SIGMA82[175u]); + + + // 176-191 28u,20u,8u,16u,18u,30u,26u,12u,2u,24u,0u,4u,22u,14u,10u,6u + // ROUND(11) G(&v, &m, 0u, 8u, 16u, 24u, SIGMA82[176u], SIGMA82[177u]); G(&v, &m, 2u, 10u, 18u, 26u, SIGMA82[178u], SIGMA82[179u]);