From 28daabfaa4bd742ff62ded0039db333d6253002a Mon Sep 17 00:00:00 2001 From: Chris Duncan Date: Tue, 7 Jan 2025 21:01:07 -0800 Subject: [PATCH] Offload shader code to separate file. --- perf/block.perf.js | 2 +- src/lib/block.ts | 4 +- src/lib/workers.ts | 4 +- src/lib/workers/nano-pow-shaders.ts | 743 ++++++++++++++++++++++++++++ src/lib/workers/nano-pow.ts | 217 ++++++++ src/lib/workers/pow.wgsl | 178 ------- src/lib/workers/powgl.ts | 230 +-------- src/lib/workers/powgpu.ts | 566 --------------------- src/main.ts | 4 +- 9 files changed, 972 insertions(+), 976 deletions(-) create mode 100644 src/lib/workers/nano-pow-shaders.ts create mode 100644 src/lib/workers/nano-pow.ts delete mode 100644 src/lib/workers/pow.wgsl delete mode 100644 src/lib/workers/powgpu.ts diff --git a/perf/block.perf.js b/perf/block.perf.js index 66910aa..3915a9a 100644 --- a/perf/block.perf.js +++ b/perf/block.perf.js @@ -89,7 +89,7 @@ await suite('Block performance', async () => { console.log(`Maximum: ${max} ms`) }) - await test(`nano-webgl-pow: Time to calculate proof-of-work for a send block ${COUNT} times`, async () => { + await skip(`nano-webgl-pow: Time to calculate proof-of-work for a send block ${COUNT} times`, async () => { //@ts-expect-error window.NanoWebglPow.width = 256 * Math.max(1, Math.floor(navigator.hardwareConcurrency)) //@ts-expect-error diff --git a/src/lib/block.ts b/src/lib/block.ts index 11b969f..971c80f 100644 --- a/src/lib/block.ts +++ b/src/lib/block.ts @@ -8,7 +8,7 @@ import { dec, hex } from './convert.js' import { NanoNaCl } from './workers/nano-nacl.js' import { Pool } from './pool.js' import { Rpc } from './rpc.js' -import { PowGl, PowGpu } from './workers.js' +import { PowGl, NanoPowGpu } from './workers.js' /** * Represents a block as defined by the Nano cryptocurrency protocol. The Block @@ -16,7 +16,7 @@ import { PowGl, PowGpu } from './workers.js' * of three derived classes: SendBlock, ReceiveBlock, ChangeBlock. */ abstract class Block { - static #pool: Pool = new Pool(PowGpu) + static #pool: Pool = new Pool(NanoPowGpu) account: Account type: string = 'state' abstract subtype: 'send' | 'receive' | 'change' diff --git a/src/lib/workers.ts b/src/lib/workers.ts index 7271925..327919e 100644 --- a/src/lib/workers.ts +++ b/src/lib/workers.ts @@ -3,6 +3,6 @@ import { default as Bip44Ckd } from './workers/bip44-ckd.js' import { default as NanoNaCl } from './workers/nano-nacl.js' import { default as PowGl } from './workers/powgl.js' -import { default as PowGpu } from './workers/powgpu.js' +import { default as NanoPowGpu } from './workers/nano-pow.js' -export { Bip44Ckd, NanoNaCl, PowGl, PowGpu } +export { Bip44Ckd, NanoNaCl, PowGl, NanoPowGpu } diff --git a/src/lib/workers/nano-pow-shaders.ts b/src/lib/workers/nano-pow-shaders.ts new file mode 100644 index 0000000..8dc7dcb --- /dev/null +++ b/src/lib/workers/nano-pow-shaders.ts @@ -0,0 +1,743 @@ +export const NanoPowGlVertexShader = `#version 300 es +#pragma vscode_glsllint_stage: vert +precision highp float; +layout (location=0) in vec4 position; +layout (location=1) in vec2 uv; + +out vec2 uv_pos; + +void main() { + uv_pos = uv; + gl_Position = position; +} +` + +export const NanoPowGlFragmentShader = `#version 300 es +#pragma vscode_glsllint_stage: frag +precision highp float; +precision highp int; + +in vec2 uv_pos; +out vec4 fragColor; + +// blockhash - array of precalculated block hash components +// threshold - 0xfffffff8 for send/change blocks, 0xfffffe00 for all else +// workload - Defines canvas size +layout(std140) uniform UBO { + uint blockhash[8]; + uint threshold; + float workload; +}; + +// Random work values +// First 2 bytes will be overwritten by texture pixel position +// Second 2 bytes will be modified if the canvas size is greater than 256x256 +// Last 4 bytes remain as generated externally +layout(std140) uniform WORK { + uvec4 work[2]; +}; + +// Defined separately from uint v[32] below as the original value is required +// to calculate the second uint32 of the digest for threshold comparison +const uint BLAKE2B_IV32_1 = 0x6A09E667u; + +// Both buffers represent 16 uint64s as 32 uint32s +// because that's what GLSL offers, just like Javascript + +// Compression buffer, intialized to 2 instances of the initialization vector +// The following values have been modified from the BLAKE2B_IV: +// OUTLEN is constant 8 bytes +// v[0] ^= 0x01010000u ^ uint(OUTLEN); +// INLEN is constant 40 bytes: work value (8) + block hash (32) +// v[24] ^= uint(INLEN); +// It's always the "last" compression at this INLEN +// v[28] = ~v[28]; +// v[29] = ~v[29]; +uint v[32] = uint[32]( + 0xF2BDC900u, 0x6A09E667u, 0x84CAA73Bu, 0xBB67AE85u, + 0xFE94F82Bu, 0x3C6EF372u, 0x5F1D36F1u, 0xA54FF53Au, + 0xADE682D1u, 0x510E527Fu, 0x2B3E6C1Fu, 0x9B05688Cu, + 0xFB41BD6Bu, 0x1F83D9ABu, 0x137E2179u, 0x5BE0CD19u, + 0xF3BCC908u, 0x6A09E667u, 0x84CAA73Bu, 0xBB67AE85u, + 0xFE94F82Bu, 0x3C6EF372u, 0x5F1D36F1u, 0xA54FF53Au, + 0xADE682F9u, 0x510E527Fu, 0x2B3E6C1Fu, 0x9B05688Cu, + 0x04BE4294u, 0xE07C2654u, 0x137E2179u, 0x5BE0CD19u +); +// Input data buffer +uint m[32]; + +// These are offsets into the input data buffer for each mixing step. +// They are multiplied by 2 from the original SIGMA values in +// the C reference implementation, which refered to uint64s. +const uint SIGMA82[192] = uint[192]( + 0u,2u,4u,6u,8u,10u,12u,14u,16u,18u,20u,22u,24u,26u,28u,30u, + 28u,20u,8u,16u,18u,30u,26u,12u,2u,24u,0u,4u,22u,14u,10u,6u, + 22u,16u,24u,0u,10u,4u,30u,26u,20u,28u,6u,12u,14u,2u,18u,8u, + 14u,18u,6u,2u,26u,24u,22u,28u,4u,12u,10u,20u,8u,0u,30u,16u, + 18u,0u,10u,14u,4u,8u,20u,30u,28u,2u,22u,24u,12u,16u,6u,26u, + 4u,24u,12u,20u,0u,22u,16u,6u,8u,26u,14u,10u,30u,28u,2u,18u, + 24u,10u,2u,30u,28u,26u,8u,20u,0u,14u,12u,6u,18u,4u,16u,22u, + 26u,22u,14u,28u,24u,2u,6u,18u,10u,0u,30u,8u,16u,12u,4u,20u, + 12u,30u,28u,18u,22u,6u,0u,16u,24u,4u,26u,14u,2u,8u,20u,10u, + 20u,4u,16u,8u,14u,12u,2u,10u,30u,22u,18u,28u,6u,24u,26u,0u, + 0u,2u,4u,6u,8u,10u,12u,14u,16u,18u,20u,22u,24u,26u,28u,30u, + 28u,20u,8u,16u,18u,30u,26u,12u,2u,24u,0u,4u,22u,14u,10u,6u +); + +// G mixing function +void G (uint ix, uint iy, uint a, uint b, uint c, uint d) { + uint o0; + uint o1; + uint xor0; + uint xor1; + + // a = a + b; + o0 = v[a] + v[b]; + o1 = v[a+1u] + v[b+1u]; + if (v[a] > 0xFFFFFFFFu - v[b]) { + o1 = o1 + 1u; + } + v[a] = o0; + v[a+1u] = o1; + + // a = a + m[sigma[r][2*i+0]]; + o0 = v[a] + m[ix]; + o1 = v[a+1u] + m[ix+1u]; + if (v[a] > 0xFFFFFFFFu - m[ix]) { + o1 = o1 + 1u; + } + v[a] = o0; + v[a+1u] = o1; + + // d = rotr64(d ^ a, 32); + xor0 = v[d] ^ v[a]; + xor1 = v[d+1u] ^ v[a+1u]; + v[d] = xor1; + v[d+1u] = xor0; + + // c = c + d; + o0 = v[c] + v[d]; + o1 = v[c+1u] + v[d+1u]; + if (v[c] > 0xFFFFFFFFu - v[d]) { + o1 = o1 + 1u; + } + v[c] = o0; + v[c+1u] = o1; + + // b = rotr64(b ^ c, 24); + xor0 = v[b] ^ v[c]; + xor1 = v[b+1u] ^ v[c+1u]; + v[b] = (xor0 >> 24u) ^ (xor1 << 8u); + v[b+1u] = (xor1 >> 24u) ^ (xor0 << 8u); + + // a = a + b; + o0 = v[a] + v[b]; + o1 = v[a+1u] + v[b+1u]; + if (v[a] > 0xFFFFFFFFu - v[b]) { + o1 = o1 + 1u; + } + v[a] = o0; + v[a+1u] = o1; + + // a = a + m[sigma[r][2*i+1]]; + o0 = v[a] + m[iy]; + o1 = v[a+1u] + m[iy+1u]; + if (v[a] > 0xFFFFFFFFu - m[iy]) { + o1 = o1 + 1u; + } + v[a] = o0; + v[a+1u] = o1; + + // d = rotr64(d ^ a, 16) + xor0 = v[d] ^ v[a]; + xor1 = v[d+1u] ^ v[a+1u]; + v[d] = (xor0 >> 16u) ^ (xor1 << 16u); + v[d+1u] = (xor1 >> 16u) ^ (xor0 << 16u); + + // c = c + d; + o0 = v[c] + v[d]; + o1 = v[c+1u] + v[d+1u]; + if (v[c] > 0xFFFFFFFFu - v[d]) { + o1 = o1 + 1u; + } + v[c] = o0; + v[c+1u] = o1; + + // b = rotr64(b ^ c, 63) + xor0 = v[b] ^ v[c]; + xor1 = v[b+1u] ^ v[c+1u]; + v[b] = (xor1 >> 31u) ^ (xor0 << 1u); + v[b+1u] = (xor0 >> 31u) ^ (xor1 << 1u); +} + +void main() { + int i; + uvec4 u_work0 = work[0u]; + uvec4 u_work1 = work[1u]; + uint uv_x = uint(uv_pos.x * workload); + uint uv_y = uint(uv_pos.y * workload); + uint x_pos = uv_x % 256u; + uint y_pos = uv_y % 256u; + uint x_index = (uv_x - x_pos) / 256u; + uint y_index = (uv_y - y_pos) / 256u; + + // First 2 work bytes are the x,y pos within the 256x256 area, the next + // two bytes are modified from the random generated value, XOR'd with + // the x,y area index of where this pixel is located + m[0u] = (x_pos ^ (y_pos << 8u) ^ ((u_work0.b ^ x_index) << 16u) ^ ((u_work0.a ^ y_index) << 24u)); + + // Remaining bytes are un-modified from the random generated value + m[1u] = (u_work1.r ^ (u_work1.g << 8u) ^ (u_work1.b << 16u) ^ (u_work1.a << 24u)); + + // Block hash + for (uint i = 0u; i < 8u; i = i + 1u) { + m[i+2u] = blockhash[i]; + } + + // twelve rounds of mixing + for(uint i = 0u; i < 12u; i = i + 1u) { + G(SIGMA82[i * 16u + 0u], SIGMA82[i * 16u + 1u], 0u, 8u, 16u, 24u); + G(SIGMA82[i * 16u + 2u], SIGMA82[i * 16u + 3u], 2u, 10u, 18u, 26u); + G(SIGMA82[i * 16u + 4u], SIGMA82[i * 16u + 5u], 4u, 12u, 20u, 28u); + G(SIGMA82[i * 16u + 6u], SIGMA82[i * 16u + 7u], 6u, 14u, 22u, 30u); + G(SIGMA82[i * 16u + 8u], SIGMA82[i * 16u + 9u], 0u, 10u, 20u, 30u); + G(SIGMA82[i * 16u + 10u], SIGMA82[i * 16u + 11u], 2u, 12u, 22u, 24u); + G(SIGMA82[i * 16u + 12u], SIGMA82[i * 16u + 13u], 4u, 14u, 16u, 26u); + G(SIGMA82[i * 16u + 14u], SIGMA82[i * 16u + 15u], 6u, 8u, 18u, 28u); + } + + // Pixel data is multipled by threshold test result (0 or 1) + // First 4 bytes insignificant, only calculate digest of second 4 bytes + if ((BLAKE2B_IV32_1 ^ v[1u] ^ v[17u]) > threshold) { + fragColor = vec4( + float(x_index + 1u)/255.0, // +1 to distinguish from 0 (unsuccessful) pixels + float(y_index + 1u)/255.0, // Same as previous + float(x_pos)/255.0, // Return the 2 custom bytes used in work value + float(y_pos)/255.0 // Second custom byte + ); + } else { + discard; + } +} +` + +export const NanoPowGpuComputeShader = ` +struct UBO { + blockhash: array, 2>, + random: u32, + threshold: u32 +}; +@group(0) @binding(0) var ubo: UBO; + +struct WORK { + nonce: vec2, + found: atomic +}; +@group(0) @binding(1) var work: WORK; + +/** +* Defined separately from uint v[32] below as the original value is required +* to calculate the second uint32 of the digest for threshold comparison +*/ +const BLAKE2B_IV32_1: u32 = 0x6A09E667u; + +/** +* These are offsets into the input data buffer for each mixing step. +* They are multiplied by 2 from the original SIGMA values in +* the C reference implementation, which refered to uint64s. +* +* const SIGMA82: array = array( +* 0u,2u,4u,6u,8u,10u,12u,14u,16u,18u,20u,22u,24u,26u,28u,30u, +* 28u,20u,8u,16u,18u,30u,26u,12u,2u,24u,0u,4u,22u,14u,10u,6u, +* 22u,16u,24u,0u,10u,4u,30u,26u,20u,28u,6u,12u,14u,2u,18u,8u, +* 14u,18u,6u,2u,26u,24u,22u,28u,4u,12u,10u,20u,8u,0u,30u,16u, +* 18u,0u,10u,14u,4u,8u,20u,30u,28u,2u,22u,24u,12u,16u,6u,26u, +* 4u,24u,12u,20u,0u,22u,16u,6u,8u,26u,14u,10u,30u,28u,2u,18u, +* 24u,10u,2u,30u,28u,26u,8u,20u,0u,14u,12u,6u,18u,4u,16u,22u, +* 26u,22u,14u,28u,24u,2u,6u,18u,10u,0u,30u,8u,16u,12u,4u,20u, +* 12u,30u,28u,18u,22u,6u,0u,16u,24u,4u,26u,14u,2u,8u,20u,10u, +* 20u,4u,16u,8u,14u,12u,2u,10u,30u,22u,18u,28u,6u,24u,26u,0u, +* 0u,2u,4u,6u,8u,10u,12u,14u,16u,18u,20u,22u,24u,26u,28u,30u, +* 28u,20u,8u,16u,18u,30u,26u,12u,2u,24u,0u,4u,22u,14u,10u,6u +* ); +*/ + +/** +* G Mixing function +*/ +fn G ( + va0: ptr, va1: ptr, + vb0: ptr, vb1: ptr, + vc0: ptr, vc1: ptr, + vd0: ptr, vd1: ptr, + mx0: u32, mx1: u32, my0: u32, my1: u32 +) { + var o0: u32; + var o1: u32; + var xor0: u32; + var xor1: u32; + + // a = a + b; + o0 = *va0 + *vb0; + o1 = *va1 + *vb1; + if (*va0 > 0xFFFFFFFFu - *vb0) { + o1 = o1 + 1u; + } + *va0 = o0; + *va1 = o1; + + // a = a + m[sigma[r][2*i+0]]; + o0 = *va0 + mx0; + o1 = *va1 + mx1; + if (*va0 > 0xFFFFFFFFu - mx0) { + o1 = o1 + 1u; + } + *va0 = o0; + *va1 = o1; + + // d = rotr64(d ^ a, 32); + xor0 = *vd0 ^ *va0; + xor1 = *vd1 ^ *va1; + *vd0 = xor1; + *vd1 = xor0; + + // c = c + d; + o0 = *vc0 + *vd0; + o1 = *vc1 + *vd1; + if (*vc0 > 0xFFFFFFFFu - *vd0) { + o1 = o1 + 1u; + } + *vc0 = o0; + *vc1 = o1; + + // b = rotr64(b ^ c, 24); + xor0 = *vb0 ^ *vc0; + xor1 = *vb1 ^ *vc1; + *vb0 = (xor0 >> 24u) ^ (xor1 << 8u); + *vb1 = (xor1 >> 24u) ^ (xor0 << 8u); + + // a = a + b; + o0 = *va0 + *vb0; + o1 = *va1 + *vb1; + if (*va0 > 0xFFFFFFFFu - *vb0) { + o1 = o1 + 1u; + } + *va0 = o0; + *va1 = o1; + + // a = a + m[sigma[r][2*i+1]]; + o0 = *va0 + my0; + o1 = *va1 + my1; + if (*va0 > 0xFFFFFFFFu - my0) { + o1 = o1 + 1u; + } + *va0 = o0; + *va1 = o1; + + // d = rotr64(d ^ a, 16) + xor0 = *vd0 ^ *va0; + xor1 = *vd1 ^ *va1; + *vd0 = (xor0 >> 16u) ^ (xor1 << 16u); + *vd1 = (xor1 >> 16u) ^ (xor0 << 16u); + + // c = c + d; + o0 = *vc0 + *vd0; + o1 = *vc1 + *vd1; + if (*vc0 > 0xFFFFFFFFu - *vd0) { + o1 = o1 + 1u; + } + *vc0 = o0; + *vc1 = o1; + + // b = rotr64(b ^ c, 63) + xor0 = *vb0 ^ *vc0; + xor1 = *vb1 ^ *vc1; + *vb0 = (xor1 >> 31u) ^ (xor0 << 1u); + *vb1 = (xor0 >> 31u) ^ (xor1 << 1u); +} + +/** +* Main compute function +* 8-byte work is split into two 4-byte u32. Low 4 bytes are random u32 from +* UBO. High 4 bytes are the random value XOR'd with index of each thread. +*/ +@compute @workgroup_size(64) +fn main( + @builtin(workgroup_id) workgroup_id: vec3, + @builtin(local_invocation_id) local_id: vec3 +) { + if (atomicLoad(&work.found) != 0u) { return; } + + let threshold: u32 = ubo.threshold; + + /** + * Flatten 3D workgroup and local identifiers into u32 for each thread + */ + var id: u32 = ((workgroup_id.x & 0xFFu) << 24u) | + ((workgroup_id.y & 0xFFu) << 16u) | + ((workgroup_id.z & 0xFFu) << 8u) | + (local_id.x & 0xFFu); + + /** + * Initialize (nonce||blockhash) concatenation + */ + var m0: u32 = ubo.random; + var m1: u32 = ubo.random ^ id; + var m2: u32 = ubo.blockhash[0u].x; + var m3: u32 = ubo.blockhash[0u].y; + var m4: u32 = ubo.blockhash[0u].z; + var m5: u32 = ubo.blockhash[0u].w; + var m6: u32 = ubo.blockhash[1u].x; + var m7: u32 = ubo.blockhash[1u].y; + var m8: u32 = ubo.blockhash[1u].z; + var m9: u32 = ubo.blockhash[1u].w; + + /** + * Compression buffer intialized to 2 instances of initialization vector + * The following values have been modified from the BLAKE2B_IV: + * OUTLEN is constant 8 bytes + * v[0u] ^= 0x01010000u ^ uint(OUTLEN); + * INLEN is constant 40 bytes: work value (8) + block hash (32) + * v[24u] ^= uint(INLEN); + * It is always the "last" compression at this INLEN + * v[28u] = ~v[28u]; + * v[29u] = ~v[29u]; + */ + var v0: u32 = 0xF2BDC900u; + var v1: u32 = 0x6A09E667u; + var v2: u32 = 0x84CAA73Bu; + var v3: u32 = 0xBB67AE85u; + var v4: u32 = 0xFE94F82Bu; + var v5: u32 = 0x3C6EF372u; + var v6: u32 = 0x5F1D36F1u; + var v7: u32 = 0xA54FF53Au; + var v8: u32 = 0xADE682D1u; + var v9: u32 = 0x510E527Fu; + var v10: u32 = 0x2B3E6C1Fu; + var v11: u32 = 0x9B05688Cu; + var v12: u32 = 0xFB41BD6Bu; + var v13: u32 = 0x1F83D9ABu; + var v14: u32 = 0x137E2179u; + var v15: u32 = 0x5BE0CD19u; + var v16: u32 = 0xF3BCC908u; + var v17: u32 = 0x6A09E667u; + var v18: u32 = 0x84CAA73Bu; + var v19: u32 = 0xBB67AE85u; + var v20: u32 = 0xFE94F82Bu; + var v21: u32 = 0x3C6EF372u; + var v22: u32 = 0x5F1D36F1u; + var v23: u32 = 0xA54FF53Au; + var v24: u32 = 0xADE682F9u; + var v25: u32 = 0x510E527Fu; + var v26: u32 = 0x2B3E6C1Fu; + var v27: u32 = 0x9B05688Cu; + var v28: u32 = 0x04BE4294u; + var v29: u32 = 0xE07C2654u; + var v30: u32 = 0x137E2179u; + var v31: u32 = 0x5BE0CD19u; + + /** + * Twelve rounds of mixing as part of BLAKE2b compression step + */ + // ROUND(0) + G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, m0, m1, m2, m3); + G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, m4, m5, m6, m7); + G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, m8, m9, 0u, 0u); + G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, 0u, 0u, 0u, 0u); + G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, 0u, 0u, 0u, 0u); + G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, 0u, 0u, 0u, 0u); + G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, 0u, 0u, 0u, 0u); + G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, 0u, 0u, 0u, 0u); + + // ROUND(1) + G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, 0u, 0u, 0u, 0u); + G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, m8, m9, 0u, 0u); + G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, 0u, 0u, 0u, 0u); + G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, 0u, 0u, 0u, 0u); + G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, m2, m3, 0u, 0u); + G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, m0, m1, m4, m5); + G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, 0u, 0u, 0u, 0u); + G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, 0u, 0u, m6, m7); + + // ROUND(2) + G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, 0u, 0u, 0u, 0u); + G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, 0u, 0u, m0, m1); + G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, 0u, 0u, m4, m5); + G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, 0u, 0u, 0u, 0u); + G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, 0u, 0u, 0u, 0u); + G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, m6, m7, 0u, 0u); + G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, 0u, 0u, m2, m3); + G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, 0u, 0u, m8, m9); + + // ROUND(3) + G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, 0u, 0u, 0u, 0u); + G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, m6, m7, m2, m3); + G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, 0u, 0u, 0u, 0u); + G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, 0u, 0u, 0u, 0u); + G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, m4, m5, 0u, 0u); + G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, 0u, 0u, 0u, 0u); + G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, m8, m9, m0, m1); + G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, 0u, 0u, 0u, 0u); + + // ROUND(4) + G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, 0u, 0u, m0, m1); + G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, 0u, 0u, 0u, 0u); + G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, m4, m5, m8, m9); + G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, 0u, 0u, 0u, 0u); + G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, 0u, 0u, m2, m3); + G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, 0u, 0u, 0u, 0u); + G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, 0u, 0u, 0u, 0u); + G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, m6, m7, 0u, 0u); + + // ROUND(5) + G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, m4, m5, 0u, 0u); + G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, 0u, 0u, 0u, 0u); + G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, m0, m1, 0u, 0u); + G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, 0u, 0u, m6, m7); + G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, m8, m9, 0u, 0u); + G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, 0u, 0u, 0u, 0u); + G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, 0u, 0u, 0u, 0u); + G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, m2, m3, 0u, 0u); + + // ROUND(6) + G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, 0u, 0u, 0u, 0u); + G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, m2, m3, 0u, 0u); + G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, 0u, 0u, 0u, 0u); + G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, m8, m9, 0u, 0u); + G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, m0, m1, 0u, 0u); + G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, 0u, 0u, m6, m7); + G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, 0u, 0u, m4, m5); + G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, 0u, 0u, 0u, 0u); + + // ROUND(7) + G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, 0u, 0u, 0u, 0u); + G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, 0u, 0u, 0u, 0u); + G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, 0u, 0u, m2, m3); + G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, m6, m7, 0u, 0u); + G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, 0u, 0u, m0, m1); + G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, 0u, 0u, m8, m9); + G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, 0u, 0u, 0u, 0u); + G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, m4, m5, 0u, 0u); + + // ROUND(8) + G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, 0u, 0u, 0u, 0u); + G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, 0u, 0u, 0u, 0u); + G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, 0u, 0u, m6, m7); + G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, m0, m1, 0u, 0u); + G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, 0u, 0u, m4, m5); + G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, 0u, 0u, 0u, 0u); + G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, m2, m3, m8, m9); + G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, 0u, 0u, 0u, 0u); + + // ROUND(9) + G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, 0u, 0u, m4, m5); + G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, 0u, 0u, m8, m9); + G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, 0u, 0u, 0u, 0u); + G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, m2, m3, 0u, 0u); + G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, 0u, 0u, 0u, 0u); + G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, 0u, 0u, 0u, 0u); + G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, m6, m7, 0u, 0u); + G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, 0u, 0u, m0, m1); + + // ROUND(10) + G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, m0, m1, m2, m3); + G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, m4, m5, m6, m7); + G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, m8, m9, 0u, 0u); + G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, 0u, 0u, 0u, 0u); + G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, 0u, 0u, 0u, 0u); + G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, 0u, 0u, 0u, 0u); + G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, 0u, 0u, 0u, 0u); + G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, 0u, 0u, 0u, 0u); + + // ROUND(11) + G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, 0u, 0u, 0u, 0u); + G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, m8, m9, 0u, 0u); + G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, 0u, 0u, 0u, 0u); + G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, 0u, 0u, 0u, 0u); + G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, m2, m3, 0u, 0u); + G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, m0, m1, m4, m5); + G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, 0u, 0u, 0u, 0u); + G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, 0u, 0u, m6, m7); + + /** + * Set nonce if it passes the threshold and no other thread has set it + */ + if ((BLAKE2B_IV32_1 ^ v1 ^ v17) > threshold && atomicLoad(&work.found) == 0u) { + atomicStore(&work.found, 1u); + work.nonce.x = m0; + work.nonce.y = m1; + } + return; +} +` + + + +/** +* The original NanoPow compute shader derived from nano-webgl-pow is saved in +* this comment for reference purposes. It is not quite as performant as the +* inlined version exported above. +*/ + +/* +struct UBO { + blockhash: array, 2>, + random: u32, + threshold: u32 +}; +@group(0) @binding(0) var ubo: UBO; + +struct WORK { + nonce: vec2, + found: atomic +}; +@group(0) @binding(1) var work: WORK; + +// Defined separately from uint v[32] below as the original value is required +// to calculate the second uint32 of the digest for threshold comparison +const BLAKE2B_IV32_1: u32 = 0x6A09E667u; + +// These are offsets into the input data buffer for each mixing step. +// They are multiplied by 2 from the original SIGMA values in +// the C reference implementation, which refered to uint64s. + +const SIGMA82: array = array( + 0u,2u,4u,6u,8u,10u,12u,14u,16u,18u,20u,22u,24u,26u,28u,30u, + 28u,20u,8u,16u,18u,30u,26u,12u,2u,24u,0u,4u,22u,14u,10u,6u, + 22u,16u,24u,0u,10u,4u,30u,26u,20u,28u,6u,12u,14u,2u,18u,8u, + 14u,18u,6u,2u,26u,24u,22u,28u,4u,12u,10u,20u,8u,0u,30u,16u, + 18u,0u,10u,14u,4u,8u,20u,30u,28u,2u,22u,24u,12u,16u,6u,26u, + 4u,24u,12u,20u,0u,22u,16u,6u,8u,26u,14u,10u,30u,28u,2u,18u, + 24u,10u,2u,30u,28u,26u,8u,20u,0u,14u,12u,6u,18u,4u,16u,22u, + 26u,22u,14u,28u,24u,2u,6u,18u,10u,0u,30u,8u,16u,12u,4u,20u, + 12u,30u,28u,18u,22u,6u,0u,16u,24u,4u,26u,14u,2u,8u,20u,10u, + 20u,4u,16u,8u,14u,12u,2u,10u,30u,22u,18u,28u,6u,24u,26u,0u, + 0u,2u,4u,6u,8u,10u,12u,14u,16u,18u,20u,22u,24u,26u,28u,30u, + 28u,20u,8u,16u,18u,30u,26u,12u,2u,24u,0u,4u,22u,14u,10u,6u +); + +// 64-bit unsigned addition within the compression buffer +// Sets v[i,i+1] += b +// LSb is the Least-Significant (32) Bits of b +// MSb is the Most-Significant (32) Bits of b +// If LSb overflows, increment MSb operand +fn add_uint64 (v: ptr>, i: u32, LSb: u32, MSb: u32) { + var o0: u32 = (*v)[i] + LSb; + var o1: u32 = (*v)[i+1u] + MSb; + if ((*v)[i] > 0xFFFFFFFFu - LSb) { + o1 = o1 + 1u; + } + (*v)[i] = o0; + (*v)[i+1u] = o1; +} + +// G Mixing function +fn G (v: ptr>, m: ptr>, a: u32, b: u32, c: u32, d: u32, ix: u32, iy: u32) { + add_uint64(v, a, (*v)[b], (*v)[b+1u]); + add_uint64(v, a, (*m)[ix], (*m)[ix+1u]); + + // v[d,d+1] = (v[d,d+1] xor v[a,a+1]) rotated to the right by 32 bits + var xor0: u32 = (*v)[d] ^ (*v)[a]; + var xor1: u32 = (*v)[d+1u] ^ (*v)[a+1u]; + (*v)[d] = xor1; + (*v)[d+1u] = xor0; + + add_uint64(v, c, (*v)[d], (*v)[d+1u]); + + // v[b,b+1] = (v[b,b+1] xor v[c,c+1]) rotated right by 24 bits + xor0 = (*v)[b] ^ (*v)[c]; + xor1 = (*v)[b+1u] ^ (*v)[c+1u]; + (*v)[b] = (xor0 >> 24u) ^ (xor1 << 8u); + (*v)[b+1u] = (xor1 >> 24u) ^ (xor0 << 8u); + + add_uint64(v, a, (*v)[b], (*v)[b+1u]); + add_uint64(v, a, (*m)[iy], (*m)[iy+1u]); + + // v[d,d+1] = (v[d,d+1] xor v[a,a+1]) rotated right by 16 bits + xor0 = (*v)[d] ^ (*v)[a]; + xor1 = (*v)[d+1u] ^ (*v)[a+1u]; + (*v)[d] = (xor0 >> 16u) ^ (xor1 << 16u); + (*v)[d+1u] = (xor1 >> 16u) ^ (xor0 << 16u); + + add_uint64(v, c, (*v)[d], (*v)[d+1u]); + + // v[b,b+1] = (v[b,b+1] xor v[c,c+1]) rotated right by 63 bits + xor0 = (*v)[b] ^ (*v)[c]; + xor1 = (*v)[b+1u] ^ (*v)[c+1u]; + (*v)[b] = (xor1 >> 31u) ^ (xor0 << 1u); + (*v)[b+1u] = (xor0 >> 31u) ^ (xor1 << 1u); +} + +// Main compute function +// 8-byte work is split into two 4-byte u32. Low 4 bytes are random u32 from +// UBO. High 4 bytes are the random value XOR'd with index of each thread. +@compute @workgroup_size(${this.workload}) +fn main( + @builtin(workgroup_id) workgroup_id: vec3, + @builtin(local_invocation_id) local_id: vec3 +) { + if (atomicLoad(&work.found) != 0u) { return; } + + let threshold: u32 = ubo.threshold; + + // Flatten 3D workgroup and local identifiers into u32 for each thread + var id: u32 = ((workgroup_id.x & 0xff) << 24) | + ((workgroup_id.y & 0xff) << 16) | + ((workgroup_id.z & 0xff) << 8) | + (local_id.x & 0xff); + + // Initialize (nonce||blockhash) concatenation + var m: array; + m[0u] = ubo.random; + m[1u] = ubo.random ^ id; + m[2u] = ubo.blockhash[0u].x; + m[3u] = ubo.blockhash[0u].y; + m[4u] = ubo.blockhash[0u].z; + m[5u] = ubo.blockhash[0u].w; + m[6u] = ubo.blockhash[1u].x; + m[7u] = ubo.blockhash[1u].y; + m[8u] = ubo.blockhash[1u].z; + m[9u] = ubo.blockhash[1u].w; + + + // Compression buffer intialized to 2 instances of initialization vector + // The following values have been modified from the BLAKE2B_IV: + // OUTLEN is constant 8 bytes + // v[0u] ^= 0x01010000u ^ uint(OUTLEN); + // INLEN is constant 40 bytes: work value (8) + block hash (32) + // v[24u] ^= uint(INLEN); + // It is always the "last" compression at this INLEN + // v[28u] = ~v[28u]; + // v[29u] = ~v[29u]; + var v = array( + 0xF2BDC900u, 0x6A09E667u, 0x84CAA73Bu, 0xBB67AE85u, + 0xFE94F82Bu, 0x3C6EF372u, 0x5F1D36F1u, 0xA54FF53Au, + 0xADE682D1u, 0x510E527Fu, 0x2B3E6C1Fu, 0x9B05688Cu, + 0xFB41BD6Bu, 0x1F83D9ABu, 0x137E2179u, 0x5BE0CD19u, + 0xF3BCC908u, 0x6A09E667u, 0x84CAA73Bu, 0xBB67AE85u, + 0xFE94F82Bu, 0x3C6EF372u, 0x5F1D36F1u, 0xA54FF53Au, + 0xADE682F9u, 0x510E527Fu, 0x2B3E6C1Fu, 0x9B05688Cu, + 0x04BE4294u, 0xE07C2654u, 0x137E2179u, 0x5BE0CD19u + ); + + // Twelve rounds of mixing as part of BLAKE2b compression step + for (var r: u32 = 0u; r < 12u; r = r + 1u) { + G(&v, &m, 0u, 8u, 16u, 24u, SIGMA82[r * 16u + 0u], SIGMA82[r * 16u + 1u]); + G(&v, &m, 2u, 10u, 18u, 26u, SIGMA82[r * 16u + 2u], SIGMA82[r * 16u + 3u]); + G(&v, &m, 4u, 12u, 20u, 28u, SIGMA82[r * 16u + 4u], SIGMA82[r * 16u + 5u]); + G(&v, &m, 6u, 14u, 22u, 30u, SIGMA82[r * 16u + 6u], SIGMA82[r * 16u + 7u]); + G(&v, &m, 0u, 10u, 20u, 30u, SIGMA82[r * 16u + 8u], SIGMA82[r * 16u + 9u]); + G(&v, &m, 2u, 12u, 22u, 24u, SIGMA82[r * 16u + 10u], SIGMA82[r * 16u + 11u]); + G(&v, &m, 4u, 14u, 16u, 26u, SIGMA82[r * 16u + 12u], SIGMA82[r * 16u + 13u]); + G(&v, &m, 6u, 8u, 18u, 28u, SIGMA82[r * 16u + 14u], SIGMA82[r * 16u + 15u]); + } + + // Set nonce if it passes the threshold and no other thread has set it + if ((BLAKE2B_IV32_1 ^ v[1u] ^ v[17u]) > threshold && atomicLoad(&work.found) == 0u) { + atomicStore(&work.found, 1u); + work.nonce.x = m[0]; + work.nonce.y = m[1]; + } + return; +} +*/ diff --git a/src/lib/workers/nano-pow.ts b/src/lib/workers/nano-pow.ts new file mode 100644 index 0000000..d27169e --- /dev/null +++ b/src/lib/workers/nano-pow.ts @@ -0,0 +1,217 @@ +// SPDX-FileCopyrightText: 2024 Chris Duncan +// SPDX-License-Identifier: GPL-3.0-or-later +// BLAKE2b hashing implementation derived from nano-webgl-pow by Ben Green (https://github.com/numtel/nano-webgl-pow) +/// +import { WorkerInterface } from '../pool.js' +import { NanoPowGpuComputeShader } from './nano-pow-shaders.js' + +/** +* Nano proof-of-work using WebGPU. +*/ +export class NanoPowGpu extends WorkerInterface { + static { + NanoPowGpu.listen() + } + + /** + * Calculates proof-of-work as described by the Nano cryptocurrency protocol. + * + * @param {any[]} data - Array of hashes and minimum thresholds + * @returns Promise for proof-of-work attached to original array objects + */ + static async work (data: any[]): Promise { + return new Promise(async (resolve, reject): Promise => { + for (const d of data) { + try { + d.work = await this.search(d.hash, d.threshold) + } catch (err) { + reject(err) + } + } + resolve(data) + }) + } + + // Initialize WebGPU + static #device: GPUDevice | null = null + static #uboBuffer: GPUBuffer + static #gpuBuffer: GPUBuffer + static #cpuBuffer: GPUBuffer + static #bindGroupLayout: GPUBindGroupLayout + static #pipeline: GPUComputePipeline + + static { + this.init() + } + + // Initialize WebGPU + static init () { + // Request device and adapter + if (navigator.gpu == null) { + throw new Error('WebGPU is not supported in this browser.') + } + navigator.gpu.requestAdapter() + .then(adapter => { + if (adapter == null) { + throw new Error('WebGPU adapter refused by browser.') + } + adapter.requestDevice() + .then(device => { + this.#device = device + this.#device.lost.then(loss => { + console.warn(`(${loss.reason}) ${loss.message}. Reinitializing device.`) + this.init() + }) + + // Create buffers for writing GPU calculations and reading from Javascript + this.#uboBuffer = this.#device.createBuffer({ + size: 48, + usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST + }) + this.#gpuBuffer = this.#device.createBuffer({ + size: 16, + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC + }) + this.#cpuBuffer = this.#device.createBuffer({ + size: 16, + usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ + }) + + // Create binding group data structure and use it later once UBO is known + this.#bindGroupLayout = this.#device.createBindGroupLayout({ + entries: [ + { + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + }, + }, + { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + }, + } + ] + }) + + // Create pipeline to connect compute shader to binding layout + this.#pipeline = this.#device.createComputePipeline({ + layout: this.#device.createPipelineLayout({ + bindGroupLayouts: [this.#bindGroupLayout] + }), + compute: { + entryPoint: 'main', + module: this.#device.createShaderModule({ + code: NanoPowGpuComputeShader + }) + } + }) + }) + }) + .catch(err => { throw new Error(err.message) }) + } + + /** + * Finds a nonce that satisfies the Nano proof-of-work requirements. + * + * @param {string} hash - Hexadecimal hash of previous block, or public key for new accounts + * @param {number} [threshold=0xfffffff8] - Difficulty of proof-of-work calculation + */ + static async search (hash: string, threshold: number = 0xfffffff8): Promise { + if (!/^[A-Fa-f0-9]{64}$/.test(hash)) throw new TypeError(`Invalid hash ${hash}`) + if (typeof threshold !== 'number') throw new TypeError(`Invalid threshold ${threshold}`) + + // Ensure WebGPU is initialized before calculating, up to a max time frame + while (this.#device == null && performance.now() < 8000) { + await new Promise(resolve => { + setTimeout(resolve, 500) + }) + } + if (this.#device == null) throw new Error(`WebGPU device failed to load.`) + + // Set up uniform buffer object + // Note: u32 size is 4, but total alignment must be multiple of 16 + const uboView = new DataView(new ArrayBuffer(48)) + for (let i = 0; i < 64; i += 8) { + const uint32 = hash.slice(i, i + 8) + uboView.setUint32(i / 2, parseInt(uint32, 16)) + } + const random = Math.floor((Math.random() * 0xffffffff)) + uboView.setUint32(32, random, true) + uboView.setUint32(36, threshold, true) + this.#device.queue.writeBuffer(this.#uboBuffer, 0, uboView) + + // Reset `found` flag to 0u in WORK before each calculation + this.#device.queue.writeBuffer(this.#gpuBuffer, 8, new Uint32Array([0])) + + // Bind UBO read and GPU write buffers + const bindGroup = this.#device.createBindGroup({ + layout: this.#bindGroupLayout, + entries: [ + { + binding: 0, + resource: { + buffer: this.#uboBuffer + }, + }, + { + binding: 1, + resource: { + buffer: this.#gpuBuffer + }, + }, + ], + }) + + // Create command encoder to issue commands to GPU and initiate computation + const commandEncoder = this.#device.createCommandEncoder() + const passEncoder = commandEncoder.beginComputePass() + + // Issue commands and end compute pass structure + passEncoder.setPipeline(this.#pipeline) + passEncoder.setBindGroup(0, bindGroup) + passEncoder.dispatchWorkgroups(256, 256, 256) + passEncoder.end() + + // Copy 8-byte nonce and 4-byte found flag from GPU to CPU for reading + commandEncoder.copyBufferToBuffer( + this.#gpuBuffer, + 0, + this.#cpuBuffer, + 0, + 12 + ) + + // End computation by passing array of command buffers to command queue for execution + this.#device.queue.submit([commandEncoder.finish()]) + + // Read results back to Javascript and then unmap buffer after reading + try { + await this.#cpuBuffer.mapAsync(GPUMapMode.READ) + await this.#device.queue.onSubmittedWorkDone() + } catch (err) { + console.warn(`Error getting data from GPU, retrying. ${err}`) + return await this.search(hash, threshold) + } + const data = new DataView(this.#cpuBuffer.getMappedRange()) + const nonce = data.getBigUint64(0, true) + const found = !!data.getUint32(8) + this.#cpuBuffer.unmap() + + if (found) { + const hex = nonce.toString(16).padStart(16, '0') + return hex + } else { + return await this.search(hash, threshold) + } + } +} + +export default ` + const NanoPowGpuComputeShader = \`${NanoPowGpuComputeShader}\` + const WorkerInterface = ${WorkerInterface} + const NanoPowGpu = ${NanoPowGpu} +` diff --git a/src/lib/workers/pow.wgsl b/src/lib/workers/pow.wgsl deleted file mode 100644 index 2509e8e..0000000 --- a/src/lib/workers/pow.wgsl +++ /dev/null @@ -1,178 +0,0 @@ -struct UBO { - blockhash: array, 2>, - random: u32, - threshold: u32 -}; -@group(0) @binding(0) var ubo: UBO; - -struct WORK { - nonce: vec2, - found: atomic -}; -@group(0) @binding(1) var work: WORK; - -/** -* Defined separately from uint v[32] below as the original value is required -* to calculate the second uint32 of the digest for threshold comparison -*/ -const BLAKE2B_IV32_1: u32 = 0x6A09E667u; - -/** -* These are offsets into the input data buffer for each mixing step. -* They are multiplied by 2 from the original SIGMA values in -* the C reference implementation, which refered to uint64s. -*/ -const SIGMA82: array = array( - 0u,2u,4u,6u,8u,10u,12u,14u,16u,18u,20u,22u,24u,26u,28u,30u, - 28u,20u,8u,16u,18u,30u,26u,12u,2u,24u,0u,4u,22u,14u,10u,6u, - 22u,16u,24u,0u,10u,4u,30u,26u,20u,28u,6u,12u,14u,2u,18u,8u, - 14u,18u,6u,2u,26u,24u,22u,28u,4u,12u,10u,20u,8u,0u,30u,16u, - 18u,0u,10u,14u,4u,8u,20u,30u,28u,2u,22u,24u,12u,16u,6u,26u, - 4u,24u,12u,20u,0u,22u,16u,6u,8u,26u,14u,10u,30u,28u,2u,18u, - 24u,10u,2u,30u,28u,26u,8u,20u,0u,14u,12u,6u,18u,4u,16u,22u, - 26u,22u,14u,28u,24u,2u,6u,18u,10u,0u,30u,8u,16u,12u,4u,20u, - 12u,30u,28u,18u,22u,6u,0u,16u,24u,4u,26u,14u,2u,8u,20u,10u, - 20u,4u,16u,8u,14u,12u,2u,10u,30u,22u,18u,28u,6u,24u,26u,0u, - 0u,2u,4u,6u,8u,10u,12u,14u,16u,18u,20u,22u,24u,26u,28u,30u, - 28u,20u,8u,16u,18u,30u,26u,12u,2u,24u,0u,4u,22u,14u,10u,6u -); - -/** -* 64-bit unsigned addition within the compression buffer -* Sets v[i,i+1] += b -* LSb is the Least-Significant (32) Bits of b -* MSb is the Most-Significant (32) Bits of b -* If LSb overflows, increment MSb operand -*/ -fn add_uint64 (v: ptr>, i: u32, LSb: u32, MSb: u32) { - var o0: u32 = (*v)[i] + LSb; - var o1: u32 = (*v)[i+1u] + MSb; - if ((*v)[i] > 0xFFFFFFFFu - LSb) { - o1 = o1 + 1u; - } - (*v)[i] = o0; - (*v)[i+1u] = o1; -} - -/** -* G Mixing function -*/ -fn G (v: ptr>, m: ptr>, a: u32, b: u32, c: u32, d: u32, ix: u32, iy: u32) { - add_uint64(v, a, (*v)[b], (*v)[b+1u]); - add_uint64(v, a, (*m)[ix], (*m)[ix+1u]); - - // v[d,d+1] = (v[d,d+1] xor v[a,a+1]) rotated to the right by 32 bits - var xor0: u32 = (*v)[d] ^ (*v)[a]; - var xor1: u32 = (*v)[d+1u] ^ (*v)[a+1u]; - (*v)[d] = xor1; - (*v)[d+1u] = xor0; - - add_uint64(v, c, (*v)[d], (*v)[d+1u]); - - // v[b,b+1] = (v[b,b+1] xor v[c,c+1]) rotated right by 24 bits - xor0 = (*v)[b] ^ (*v)[c]; - xor1 = (*v)[b+1u] ^ (*v)[c+1u]; - (*v)[b] = (xor0 >> 24u) ^ (xor1 << 8u); - (*v)[b+1u] = (xor1 >> 24u) ^ (xor0 << 8u); - - add_uint64(v, a, (*v)[b], (*v)[b+1u]); - add_uint64(v, a, (*m)[iy], (*m)[iy+1u]); - - // v[d,d+1] = (v[d,d+1] xor v[a,a+1]) rotated right by 16 bits - xor0 = (*v)[d] ^ (*v)[a]; - xor1 = (*v)[d+1u] ^ (*v)[a+1u]; - (*v)[d] = (xor0 >> 16u) ^ (xor1 << 16u); - (*v)[d+1u] = (xor1 >> 16u) ^ (xor0 << 16u); - - add_uint64(v, c, (*v)[d], (*v)[d+1u]); - - // v[b,b+1] = (v[b,b+1] xor v[c,c+1]) rotated right by 63 bits - xor0 = (*v)[b] ^ (*v)[c]; - xor1 = (*v)[b+1u] ^ (*v)[c+1u]; - (*v)[b] = (xor1 >> 31u) ^ (xor0 << 1u); - (*v)[b+1u] = (xor0 >> 31u) ^ (xor1 << 1u); -} - -/** -* Main compute function -* 8-byte work is split into two 4-byte u32. Low 4 bytes are random u32 from -* UBO. High 4 bytes are the random value XOR'd with index of each thread. -*/ -@compute @workgroup_size(${this.workload}) -fn main( - @builtin(workgroup_id) workgroup_id: vec3, - @builtin(local_invocation_id) local_id: vec3 -) { - if (atomicLoad(&work.found) != 0u) { return; } - - let threshold: u32 = ubo.threshold; - - /** - * Flatten 3D workgroup and local identifiers into u32 for each thread - */ - var id: u32 = ((workgroup_id.x & 0xff) << 24) | - ((workgroup_id.y & 0xff) << 16) | - ((workgroup_id.z & 0xff) << 8) | - (local_id.x & 0xff); - - /** - * Initialize (nonce||blockhash) concatenation - */ - var m: array; - m[0u] = ubo.random; - m[1u] = ubo.random ^ id; - m[2u] = ubo.blockhash[0u].x; - m[3u] = ubo.blockhash[0u].y; - m[4u] = ubo.blockhash[0u].z; - m[5u] = ubo.blockhash[0u].w; - m[6u] = ubo.blockhash[1u].x; - m[7u] = ubo.blockhash[1u].y; - m[8u] = ubo.blockhash[1u].z; - m[9u] = ubo.blockhash[1u].w; - - /** - * Compression buffer intialized to 2 instances of initialization vector - * The following values have been modified from the BLAKE2B_IV: - * OUTLEN is constant 8 bytes - * v[0u] ^= 0x01010000u ^ uint(OUTLEN); - * INLEN is constant 40 bytes: work value (8) + block hash (32) - * v[24u] ^= uint(INLEN); - * It is always the "last" compression at this INLEN - * v[28u] = ~v[28u]; - * v[29u] = ~v[29u]; - */ - var v = array( - 0xF2BDC900u, 0x6A09E667u, 0x84CAA73Bu, 0xBB67AE85u, - 0xFE94F82Bu, 0x3C6EF372u, 0x5F1D36F1u, 0xA54FF53Au, - 0xADE682D1u, 0x510E527Fu, 0x2B3E6C1Fu, 0x9B05688Cu, - 0xFB41BD6Bu, 0x1F83D9ABu, 0x137E2179u, 0x5BE0CD19u, - 0xF3BCC908u, 0x6A09E667u, 0x84CAA73Bu, 0xBB67AE85u, - 0xFE94F82Bu, 0x3C6EF372u, 0x5F1D36F1u, 0xA54FF53Au, - 0xADE682F9u, 0x510E527Fu, 0x2B3E6C1Fu, 0x9B05688Cu, - 0x04BE4294u, 0xE07C2654u, 0x137E2179u, 0x5BE0CD19u - ); - - /** - * Twelve rounds of mixing as part of BLAKE2b compression step - */ - for (var r: u32 = 0u; r < 12u; r = r + 1u) { - G(&v, &m, 0u, 8u, 16u, 24u, SIGMA82[r * 16u + 0u], SIGMA82[r * 16u + 1u]); - G(&v, &m, 2u, 10u, 18u, 26u, SIGMA82[r * 16u + 2u], SIGMA82[r * 16u + 3u]); - G(&v, &m, 4u, 12u, 20u, 28u, SIGMA82[r * 16u + 4u], SIGMA82[r * 16u + 5u]); - G(&v, &m, 6u, 14u, 22u, 30u, SIGMA82[r * 16u + 6u], SIGMA82[r * 16u + 7u]); - G(&v, &m, 0u, 10u, 20u, 30u, SIGMA82[r * 16u + 8u], SIGMA82[r * 16u + 9u]); - G(&v, &m, 2u, 12u, 22u, 24u, SIGMA82[r * 16u + 10u], SIGMA82[r * 16u + 11u]); - G(&v, &m, 4u, 14u, 16u, 26u, SIGMA82[r * 16u + 12u], SIGMA82[r * 16u + 13u]); - G(&v, &m, 6u, 8u, 18u, 28u, SIGMA82[r * 16u + 14u], SIGMA82[r * 16u + 15u]); - } - - /** - * Set nonce if it passes the threshold and no other thread has set it - */ - if ((BLAKE2B_IV32_1 ^ v[1u] ^ v[17u]) > threshold && atomicLoad(&work.found) == 0u) { - atomicStore(&work.found, 1u); - work.nonce.x = m[0]; - work.nonce.y = m[1]; - } - return; -} diff --git a/src/lib/workers/powgl.ts b/src/lib/workers/powgl.ts index 6eea95b..4ac0271 100644 --- a/src/lib/workers/powgl.ts +++ b/src/lib/workers/powgl.ts @@ -3,6 +3,7 @@ // Based on nano-webgl-pow by Ben Green (numtel) // https://github.com/numtel/nano-webgl-pow import { WorkerInterface } from '../pool.js' +import { NanoPowGlFragmentShader, NanoPowGlVertexShader } from './nano-pow-shaders.js' export class PowGl extends WorkerInterface { static { @@ -27,229 +28,6 @@ export class PowGl extends WorkerInterface { }) } - // Vertex Shader - static #vsSource = `#version 300 es -#pragma vscode_glsllint_stage: vert -precision highp float; -layout (location=0) in vec4 position; -layout (location=1) in vec2 uv; - -out vec2 uv_pos; - -void main() { - uv_pos = uv; - gl_Position = position; -}` - - // Fragment shader - static #fsSource = `#version 300 es -#pragma vscode_glsllint_stage: frag -precision highp float; -precision highp int; - -in vec2 uv_pos; -out vec4 fragColor; - -// blockhash - array of precalculated block hash components -// threshold - 0xfffffff8 for send/change blocks, 0xfffffe00 for all else -// workload - Defines canvas size -layout(std140) uniform UBO { - uint blockhash[8]; - uint threshold; - float workload; -}; - -// Random work values -// First 2 bytes will be overwritten by texture pixel position -// Second 2 bytes will be modified if the canvas size is greater than 256x256 -// Last 4 bytes remain as generated externally -layout(std140) uniform WORK { - uvec4 work[2]; -}; - -// Defined separately from uint v[32] below as the original value is required -// to calculate the second uint32 of the digest for threshold comparison -const uint BLAKE2B_IV32_1 = 0x6A09E667u; - -// Both buffers represent 16 uint64s as 32 uint32s -// because that's what GLSL offers, just like Javascript - -// Compression buffer, intialized to 2 instances of the initialization vector -// The following values have been modified from the BLAKE2B_IV: -// OUTLEN is constant 8 bytes -// v[0] ^= 0x01010000u ^ uint(OUTLEN); -// INLEN is constant 40 bytes: work value (8) + block hash (32) -// v[24] ^= uint(INLEN); -// It's always the "last" compression at this INLEN -// v[28] = ~v[28]; -// v[29] = ~v[29]; -uint v[32] = uint[32]( - 0xF2BDC900u, 0x6A09E667u, 0x84CAA73Bu, 0xBB67AE85u, - 0xFE94F82Bu, 0x3C6EF372u, 0x5F1D36F1u, 0xA54FF53Au, - 0xADE682D1u, 0x510E527Fu, 0x2B3E6C1Fu, 0x9B05688Cu, - 0xFB41BD6Bu, 0x1F83D9ABu, 0x137E2179u, 0x5BE0CD19u, - 0xF3BCC908u, 0x6A09E667u, 0x84CAA73Bu, 0xBB67AE85u, - 0xFE94F82Bu, 0x3C6EF372u, 0x5F1D36F1u, 0xA54FF53Au, - 0xADE682F9u, 0x510E527Fu, 0x2B3E6C1Fu, 0x9B05688Cu, - 0x04BE4294u, 0xE07C2654u, 0x137E2179u, 0x5BE0CD19u -); -// Input data buffer -uint m[32]; - -// These are offsets into the input data buffer for each mixing step. -// They are multiplied by 2 from the original SIGMA values in -// the C reference implementation, which refered to uint64s. -const uint SIGMA82[192] = uint[192]( - 0u,2u,4u,6u,8u,10u,12u,14u,16u,18u,20u,22u,24u,26u,28u,30u, - 28u,20u,8u,16u,18u,30u,26u,12u,2u,24u,0u,4u,22u,14u,10u,6u, - 22u,16u,24u,0u,10u,4u,30u,26u,20u,28u,6u,12u,14u,2u,18u,8u, - 14u,18u,6u,2u,26u,24u,22u,28u,4u,12u,10u,20u,8u,0u,30u,16u, - 18u,0u,10u,14u,4u,8u,20u,30u,28u,2u,22u,24u,12u,16u,6u,26u, - 4u,24u,12u,20u,0u,22u,16u,6u,8u,26u,14u,10u,30u,28u,2u,18u, - 24u,10u,2u,30u,28u,26u,8u,20u,0u,14u,12u,6u,18u,4u,16u,22u, - 26u,22u,14u,28u,24u,2u,6u,18u,10u,0u,30u,8u,16u,12u,4u,20u, - 12u,30u,28u,18u,22u,6u,0u,16u,24u,4u,26u,14u,2u,8u,20u,10u, - 20u,4u,16u,8u,14u,12u,2u,10u,30u,22u,18u,28u,6u,24u,26u,0u, - 0u,2u,4u,6u,8u,10u,12u,14u,16u,18u,20u,22u,24u,26u,28u,30u, - 28u,20u,8u,16u,18u,30u,26u,12u,2u,24u,0u,4u,22u,14u,10u,6u -); - -// G mixing function -void G (uint ix, uint iy, uint a, uint b, uint c, uint d) { - uint o0; - uint o1; - uint xor0; - uint xor1; - - // a = a + b; - o0 = v[a] + v[b]; - o1 = v[a+1u] + v[b+1u]; - if (v[a] > 0xFFFFFFFFu - v[b]) { - o1 = o1 + 1u; - } - v[a] = o0; - v[a+1u] = o1; - - // a = a + m[sigma[r][2*i+0]]; - o0 = v[a] + m[ix]; - o1 = v[a+1u] + m[ix+1u]; - if (v[a] > 0xFFFFFFFFu - m[ix]) { - o1 = o1 + 1u; - } - v[a] = o0; - v[a+1u] = o1; - - // d = rotr64(d ^ a, 32); - xor0 = v[d] ^ v[a]; - xor1 = v[d+1u] ^ v[a+1u]; - v[d] = xor1; - v[d+1u] = xor0; - - // c = c + d; - o0 = v[c] + v[d]; - o1 = v[c+1u] + v[d+1u]; - if (v[c] > 0xFFFFFFFFu - v[d]) { - o1 = o1 + 1u; - } - v[c] = o0; - v[c+1u] = o1; - - // b = rotr64(b ^ c, 24); - xor0 = v[b] ^ v[c]; - xor1 = v[b+1u] ^ v[c+1u]; - v[b] = (xor0 >> 24u) ^ (xor1 << 8u); - v[b+1u] = (xor1 >> 24u) ^ (xor0 << 8u); - - // a = a + b; - o0 = v[a] + v[b]; - o1 = v[a+1u] + v[b+1u]; - if (v[a] > 0xFFFFFFFFu - v[b]) { - o1 = o1 + 1u; - } - v[a] = o0; - v[a+1u] = o1; - - // a = a + m[sigma[r][2*i+1]]; - o0 = v[a] + m[iy]; - o1 = v[a+1u] + m[iy+1u]; - if (v[a] > 0xFFFFFFFFu - m[iy]) { - o1 = o1 + 1u; - } - v[a] = o0; - v[a+1u] = o1; - - // d = rotr64(d ^ a, 16) - xor0 = v[d] ^ v[a]; - xor1 = v[d+1u] ^ v[a+1u]; - v[d] = (xor0 >> 16u) ^ (xor1 << 16u); - v[d+1u] = (xor1 >> 16u) ^ (xor0 << 16u); - - // c = c + d; - o0 = v[c] + v[d]; - o1 = v[c+1u] + v[d+1u]; - if (v[c] > 0xFFFFFFFFu - v[d]) { - o1 = o1 + 1u; - } - v[c] = o0; - v[c+1u] = o1; - - // b = rotr64(b ^ c, 63) - xor0 = v[b] ^ v[c]; - xor1 = v[b+1u] ^ v[c+1u]; - v[b] = (xor1 >> 31u) ^ (xor0 << 1u); - v[b+1u] = (xor0 >> 31u) ^ (xor1 << 1u); -} - -void main() { - int i; - uvec4 u_work0 = work[0u]; - uvec4 u_work1 = work[1u]; - uint uv_x = uint(uv_pos.x * workload); - uint uv_y = uint(uv_pos.y * workload); - uint x_pos = uv_x % 256u; - uint y_pos = uv_y % 256u; - uint x_index = (uv_x - x_pos) / 256u; - uint y_index = (uv_y - y_pos) / 256u; - - // First 2 work bytes are the x,y pos within the 256x256 area, the next - // two bytes are modified from the random generated value, XOR'd with - // the x,y area index of where this pixel is located - m[0u] = (x_pos ^ (y_pos << 8u) ^ ((u_work0.b ^ x_index) << 16u) ^ ((u_work0.a ^ y_index) << 24u)); - - // Remaining bytes are un-modified from the random generated value - m[1u] = (u_work1.r ^ (u_work1.g << 8u) ^ (u_work1.b << 16u) ^ (u_work1.a << 24u)); - - // Block hash - for (uint i = 0u; i < 8u; i = i + 1u) { - m[i+2u] = blockhash[i]; - } - - // twelve rounds of mixing - for(uint i = 0u; i < 12u; i = i + 1u) { - G(SIGMA82[i * 16u + 0u], SIGMA82[i * 16u + 1u], 0u, 8u, 16u, 24u); - G(SIGMA82[i * 16u + 2u], SIGMA82[i * 16u + 3u], 2u, 10u, 18u, 26u); - G(SIGMA82[i * 16u + 4u], SIGMA82[i * 16u + 5u], 4u, 12u, 20u, 28u); - G(SIGMA82[i * 16u + 6u], SIGMA82[i * 16u + 7u], 6u, 14u, 22u, 30u); - G(SIGMA82[i * 16u + 8u], SIGMA82[i * 16u + 9u], 0u, 10u, 20u, 30u); - G(SIGMA82[i * 16u + 10u], SIGMA82[i * 16u + 11u], 2u, 12u, 22u, 24u); - G(SIGMA82[i * 16u + 12u], SIGMA82[i * 16u + 13u], 4u, 14u, 16u, 26u); - G(SIGMA82[i * 16u + 14u], SIGMA82[i * 16u + 15u], 6u, 8u, 18u, 28u); - } - - // Pixel data is multipled by threshold test result (0 or 1) - // First 4 bytes insignificant, only calculate digest of second 4 bytes - if ((BLAKE2B_IV32_1 ^ v[1u] ^ v[17u]) > threshold) { - fragColor = vec4( - float(x_index + 1u)/255.0, // +1 to distinguish from 0 (unsuccessful) pixels - float(y_index + 1u)/255.0, // Same as previous - float(x_pos)/255.0, // Return the 2 custom bytes used in work value - float(y_pos)/255.0 // Second custom byte - ); - } else { - discard; - } -}` - /** Used to set canvas size. Must be a multiple of 256. */ static #WORKLOAD: number = 256 * Math.max(1, Math.floor(navigator.hardwareConcurrency)) @@ -292,14 +70,14 @@ void main() { this.#vertexShader = this.#gl.createShader(this.#gl.VERTEX_SHADER) if (this.#vertexShader == null) throw new Error('Failed to create vertex shader') - this.#gl.shaderSource(this.#vertexShader, this.#vsSource) + this.#gl.shaderSource(this.#vertexShader, NanoPowGlVertexShader) this.#gl.compileShader(this.#vertexShader) if (!this.#gl.getShaderParameter(this.#vertexShader, this.#gl.COMPILE_STATUS)) throw new Error(this.#gl.getShaderInfoLog(this.#vertexShader) ?? `Failed to compile vertex shader`) this.#fragmentShader = this.#gl.createShader(this.#gl.FRAGMENT_SHADER) if (this.#fragmentShader == null) throw new Error('Failed to create fragment shader') - this.#gl.shaderSource(this.#fragmentShader, this.#fsSource) + this.#gl.shaderSource(this.#fragmentShader, NanoPowGlFragmentShader) this.#gl.compileShader(this.#fragmentShader) if (!this.#gl.getShaderParameter(this.#fragmentShader, this.#gl.COMPILE_STATUS)) throw new Error(this.#gl.getShaderInfoLog(this.#fragmentShader) ?? `Failed to compile fragment shader`) @@ -444,6 +222,8 @@ void main() { } export default ` + const NanoPowGlFragmentShader = ${NanoPowGlFragmentShader} + const NanoPowGlVertexShader = ${NanoPowGlVertexShader} const WorkerInterface = ${WorkerInterface} const PowGl = ${PowGl} ` diff --git a/src/lib/workers/powgpu.ts b/src/lib/workers/powgpu.ts deleted file mode 100644 index affc177..0000000 --- a/src/lib/workers/powgpu.ts +++ /dev/null @@ -1,566 +0,0 @@ -// SPDX-FileCopyrightText: 2024 Chris Duncan -// SPDX-License-Identifier: GPL-3.0-or-later -// BLAKE2b hashing implementation derived from nano-webgl-pow by Ben Green (https://github.com/numtel/nano-webgl-pow) -/// -import { WorkerInterface } from '../pool.js' - -/** -* Nano proof-of-work using WebGPU. -*/ -export class PowGpu extends WorkerInterface { - static { - PowGpu.listen() - } - - /** - * Calculates proof-of-work as described by the Nano cryptocurrency protocol. - * - * @param {any[]} data - Array of hashes and minimum thresholds - * @returns Promise for proof-of-work attached to original array objects - */ - static async work (data: any[]): Promise { - return new Promise(async (resolve, reject): Promise => { - for (const d of data) { - try { - d.work = await this.search(d.hash, d.threshold) - } catch (err) { - reject(err) - } - } - resolve(data) - }) - } - - // WebGPU Compute Shader - static shader = ` - struct UBO { - blockhash: array, 2>, - random: u32, - threshold: u32 - }; - @group(0) @binding(0) var ubo: UBO; - - struct WORK { - nonce: vec2, - found: atomic - }; - @group(0) @binding(1) var work: WORK; - - /** - * Defined separately from uint v[32] below as the original value is required - * to calculate the second uint32 of the digest for threshold comparison - */ - const BLAKE2B_IV32_1: u32 = 0x6A09E667u; - - /** - * These are offsets into the input data buffer for each mixing step. - * They are multiplied by 2 from the original SIGMA values in - * the C reference implementation, which refered to uint64s. - * - * const SIGMA82: array = array( - * 0u,2u,4u,6u,8u,10u,12u,14u,16u,18u,20u,22u,24u,26u,28u,30u, - * 28u,20u,8u,16u,18u,30u,26u,12u,2u,24u,0u,4u,22u,14u,10u,6u, - * 22u,16u,24u,0u,10u,4u,30u,26u,20u,28u,6u,12u,14u,2u,18u,8u, - * 14u,18u,6u,2u,26u,24u,22u,28u,4u,12u,10u,20u,8u,0u,30u,16u, - * 18u,0u,10u,14u,4u,8u,20u,30u,28u,2u,22u,24u,12u,16u,6u,26u, - * 4u,24u,12u,20u,0u,22u,16u,6u,8u,26u,14u,10u,30u,28u,2u,18u, - * 24u,10u,2u,30u,28u,26u,8u,20u,0u,14u,12u,6u,18u,4u,16u,22u, - * 26u,22u,14u,28u,24u,2u,6u,18u,10u,0u,30u,8u,16u,12u,4u,20u, - * 12u,30u,28u,18u,22u,6u,0u,16u,24u,4u,26u,14u,2u,8u,20u,10u, - * 20u,4u,16u,8u,14u,12u,2u,10u,30u,22u,18u,28u,6u,24u,26u,0u, - * 0u,2u,4u,6u,8u,10u,12u,14u,16u,18u,20u,22u,24u,26u,28u,30u, - * 28u,20u,8u,16u,18u,30u,26u,12u,2u,24u,0u,4u,22u,14u,10u,6u - * ); - */ - - /** - * G Mixing function - */ - fn G ( - va0: ptr, va1: ptr, - vb0: ptr, vb1: ptr, - vc0: ptr, vc1: ptr, - vd0: ptr, vd1: ptr, - mx0: u32, mx1: u32, my0: u32, my1: u32 - ) { - var o0: u32; - var o1: u32; - var xor0: u32; - var xor1: u32; - - // a = a + b; - o0 = *va0 + *vb0; - o1 = *va1 + *vb1; - if (*va0 > 0xFFFFFFFFu - *vb0) { - o1 = o1 + 1u; - } - *va0 = o0; - *va1 = o1; - - // a = a + m[sigma[r][2*i+0]]; - o0 = *va0 + mx0; - o1 = *va1 + mx1; - if (*va0 > 0xFFFFFFFFu - mx0) { - o1 = o1 + 1u; - } - *va0 = o0; - *va1 = o1; - - // d = rotr64(d ^ a, 32); - xor0 = *vd0 ^ *va0; - xor1 = *vd1 ^ *va1; - *vd0 = xor1; - *vd1 = xor0; - - // c = c + d; - o0 = *vc0 + *vd0; - o1 = *vc1 + *vd1; - if (*vc0 > 0xFFFFFFFFu - *vd0) { - o1 = o1 + 1u; - } - *vc0 = o0; - *vc1 = o1; - - // b = rotr64(b ^ c, 24); - xor0 = *vb0 ^ *vc0; - xor1 = *vb1 ^ *vc1; - *vb0 = (xor0 >> 24u) ^ (xor1 << 8u); - *vb1 = (xor1 >> 24u) ^ (xor0 << 8u); - - // a = a + b; - o0 = *va0 + *vb0; - o1 = *va1 + *vb1; - if (*va0 > 0xFFFFFFFFu - *vb0) { - o1 = o1 + 1u; - } - *va0 = o0; - *va1 = o1; - - // a = a + m[sigma[r][2*i+1]]; - o0 = *va0 + my0; - o1 = *va1 + my1; - if (*va0 > 0xFFFFFFFFu - my0) { - o1 = o1 + 1u; - } - *va0 = o0; - *va1 = o1; - - // d = rotr64(d ^ a, 16) - xor0 = *vd0 ^ *va0; - xor1 = *vd1 ^ *va1; - *vd0 = (xor0 >> 16u) ^ (xor1 << 16u); - *vd1 = (xor1 >> 16u) ^ (xor0 << 16u); - - // c = c + d; - o0 = *vc0 + *vd0; - o1 = *vc1 + *vd1; - if (*vc0 > 0xFFFFFFFFu - *vd0) { - o1 = o1 + 1u; - } - *vc0 = o0; - *vc1 = o1; - - // b = rotr64(b ^ c, 63) - xor0 = *vb0 ^ *vc0; - xor1 = *vb1 ^ *vc1; - *vb0 = (xor1 >> 31u) ^ (xor0 << 1u); - *vb1 = (xor0 >> 31u) ^ (xor1 << 1u); - } - - /** - * Main compute function - * 8-byte work is split into two 4-byte u32. Low 4 bytes are random u32 from - * UBO. High 4 bytes are the random value XOR'd with index of each thread. - */ - @compute @workgroup_size(64) - fn main( - @builtin(workgroup_id) workgroup_id: vec3, - @builtin(local_invocation_id) local_id: vec3 - ) { - if (atomicLoad(&work.found) != 0u) { return; } - - let threshold: u32 = ubo.threshold; - - /** - * Flatten 3D workgroup and local identifiers into u32 for each thread - */ - var id: u32 = ((workgroup_id.x & 0xFFu) << 24u) | - ((workgroup_id.y & 0xFFu) << 16u) | - ((workgroup_id.z & 0xFFu) << 8u) | - (local_id.x & 0xFFu); - - /** - * Initialize (nonce||blockhash) concatenation - */ - var m0: u32 = ubo.random; - var m1: u32 = ubo.random ^ id; - var m2: u32 = ubo.blockhash[0u].x; - var m3: u32 = ubo.blockhash[0u].y; - var m4: u32 = ubo.blockhash[0u].z; - var m5: u32 = ubo.blockhash[0u].w; - var m6: u32 = ubo.blockhash[1u].x; - var m7: u32 = ubo.blockhash[1u].y; - var m8: u32 = ubo.blockhash[1u].z; - var m9: u32 = ubo.blockhash[1u].w; - - /** - * Compression buffer intialized to 2 instances of initialization vector - * The following values have been modified from the BLAKE2B_IV: - * OUTLEN is constant 8 bytes - * v[0u] ^= 0x01010000u ^ uint(OUTLEN); - * INLEN is constant 40 bytes: work value (8) + block hash (32) - * v[24u] ^= uint(INLEN); - * It is always the "last" compression at this INLEN - * v[28u] = ~v[28u]; - * v[29u] = ~v[29u]; - */ - var v0: u32 = 0xF2BDC900u; - var v1: u32 = 0x6A09E667u; - var v2: u32 = 0x84CAA73Bu; - var v3: u32 = 0xBB67AE85u; - var v4: u32 = 0xFE94F82Bu; - var v5: u32 = 0x3C6EF372u; - var v6: u32 = 0x5F1D36F1u; - var v7: u32 = 0xA54FF53Au; - var v8: u32 = 0xADE682D1u; - var v9: u32 = 0x510E527Fu; - var v10: u32 = 0x2B3E6C1Fu; - var v11: u32 = 0x9B05688Cu; - var v12: u32 = 0xFB41BD6Bu; - var v13: u32 = 0x1F83D9ABu; - var v14: u32 = 0x137E2179u; - var v15: u32 = 0x5BE0CD19u; - var v16: u32 = 0xF3BCC908u; - var v17: u32 = 0x6A09E667u; - var v18: u32 = 0x84CAA73Bu; - var v19: u32 = 0xBB67AE85u; - var v20: u32 = 0xFE94F82Bu; - var v21: u32 = 0x3C6EF372u; - var v22: u32 = 0x5F1D36F1u; - var v23: u32 = 0xA54FF53Au; - var v24: u32 = 0xADE682F9u; - var v25: u32 = 0x510E527Fu; - var v26: u32 = 0x2B3E6C1Fu; - var v27: u32 = 0x9B05688Cu; - var v28: u32 = 0x04BE4294u; - var v29: u32 = 0xE07C2654u; - var v30: u32 = 0x137E2179u; - var v31: u32 = 0x5BE0CD19u; - - /** - * Twelve rounds of mixing as part of BLAKE2b compression step - */ - // ROUND(0) - G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, m0, m1, m2, m3); - G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, m4, m5, m6, m7); - G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, m8, m9, 0u, 0u); - G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, 0u, 0u, 0u, 0u); - G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, 0u, 0u, 0u, 0u); - G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, 0u, 0u, 0u, 0u); - G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, 0u, 0u, 0u, 0u); - G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, 0u, 0u, 0u, 0u); - - // ROUND(1) - G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, 0u, 0u, 0u, 0u); - G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, m8, m9, 0u, 0u); - G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, 0u, 0u, 0u, 0u); - G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, 0u, 0u, 0u, 0u); - G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, m2, m3, 0u, 0u); - G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, m0, m1, m4, m5); - G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, 0u, 0u, 0u, 0u); - G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, 0u, 0u, m6, m7); - - // ROUND(2) - G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, 0u, 0u, 0u, 0u); - G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, 0u, 0u, m0, m1); - G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, 0u, 0u, m4, m5); - G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, 0u, 0u, 0u, 0u); - G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, 0u, 0u, 0u, 0u); - G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, m6, m7, 0u, 0u); - G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, 0u, 0u, m2, m3); - G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, 0u, 0u, m8, m9); - - // ROUND(3) - G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, 0u, 0u, 0u, 0u); - G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, m6, m7, m2, m3); - G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, 0u, 0u, 0u, 0u); - G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, 0u, 0u, 0u, 0u); - G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, m4, m5, 0u, 0u); - G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, 0u, 0u, 0u, 0u); - G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, m8, m9, m0, m1); - G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, 0u, 0u, 0u, 0u); - - // ROUND(4) - G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, 0u, 0u, m0, m1); - G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, 0u, 0u, 0u, 0u); - G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, m4, m5, m8, m9); - G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, 0u, 0u, 0u, 0u); - G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, 0u, 0u, m2, m3); - G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, 0u, 0u, 0u, 0u); - G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, 0u, 0u, 0u, 0u); - G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, m6, m7, 0u, 0u); - - // ROUND(5) - G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, m4, m5, 0u, 0u); - G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, 0u, 0u, 0u, 0u); - G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, m0, m1, 0u, 0u); - G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, 0u, 0u, m6, m7); - G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, m8, m9, 0u, 0u); - G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, 0u, 0u, 0u, 0u); - G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, 0u, 0u, 0u, 0u); - G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, m2, m3, 0u, 0u); - - // ROUND(6) - G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, 0u, 0u, 0u, 0u); - G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, m2, m3, 0u, 0u); - G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, 0u, 0u, 0u, 0u); - G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, m8, m9, 0u, 0u); - G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, m0, m1, 0u, 0u); - G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, 0u, 0u, m6, m7); - G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, 0u, 0u, m4, m5); - G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, 0u, 0u, 0u, 0u); - - // ROUND(7) - G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, 0u, 0u, 0u, 0u); - G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, 0u, 0u, 0u, 0u); - G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, 0u, 0u, m2, m3); - G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, m6, m7, 0u, 0u); - G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, 0u, 0u, m0, m1); - G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, 0u, 0u, m8, m9); - G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, 0u, 0u, 0u, 0u); - G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, m4, m5, 0u, 0u); - - // ROUND(8) - G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, 0u, 0u, 0u, 0u); - G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, 0u, 0u, 0u, 0u); - G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, 0u, 0u, m6, m7); - G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, m0, m1, 0u, 0u); - G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, 0u, 0u, m4, m5); - G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, 0u, 0u, 0u, 0u); - G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, m2, m3, m8, m9); - G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, 0u, 0u, 0u, 0u); - - // ROUND(9) - G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, 0u, 0u, m4, m5); - G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, 0u, 0u, m8, m9); - G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, 0u, 0u, 0u, 0u); - G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, m2, m3, 0u, 0u); - G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, 0u, 0u, 0u, 0u); - G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, 0u, 0u, 0u, 0u); - G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, m6, m7, 0u, 0u); - G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, 0u, 0u, m0, m1); - - // ROUND(10) - G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, m0, m1, m2, m3); - G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, m4, m5, m6, m7); - G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, m8, m9, 0u, 0u); - G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, 0u, 0u, 0u, 0u); - G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, 0u, 0u, 0u, 0u); - G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, 0u, 0u, 0u, 0u); - G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, 0u, 0u, 0u, 0u); - G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, 0u, 0u, 0u, 0u); - - // ROUND(11) - G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, 0u, 0u, 0u, 0u); - G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, m8, m9, 0u, 0u); - G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, 0u, 0u, 0u, 0u); - G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, 0u, 0u, 0u, 0u); - G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, m2, m3, 0u, 0u); - G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, m0, m1, m4, m5); - G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, 0u, 0u, 0u, 0u); - G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, 0u, 0u, m6, m7); - - /** - * Set nonce if it passes the threshold and no other thread has set it - */ - if ((BLAKE2B_IV32_1 ^ v1 ^ v17) > threshold && atomicLoad(&work.found) == 0u) { - atomicStore(&work.found, 1u); - work.nonce.x = m0; - work.nonce.y = m1; - } - return; - } - `; - - // Initialize WebGPU - static #device: GPUDevice | null = null - static #uboBuffer: GPUBuffer - static #gpuBuffer: GPUBuffer - static #cpuBuffer: GPUBuffer - static #bindGroupLayout: GPUBindGroupLayout - static #pipeline: GPUComputePipeline - - static { - this.init() - } - - // Initialize WebGPU - static init () { - // Request device and adapter - if (navigator.gpu == null) { - throw new Error('WebGPU is not supported in this browser.') - } - navigator.gpu.requestAdapter() - .then(adapter => { - if (adapter == null) { - throw new Error('WebGPU adapter refused by browser.') - } - adapter.requestDevice() - .then(device => { - this.#device = device - this.#device.lost.then(loss => { - console.warn(`(${loss.reason}) ${loss.message}. Reinitializing device.`) - this.init() - }) - - // Create buffers for writing GPU calculations and reading from Javascript - this.#uboBuffer = this.#device.createBuffer({ - size: 48, - usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST - }) - this.#gpuBuffer = this.#device.createBuffer({ - size: 16, - usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC - }) - this.#cpuBuffer = this.#device.createBuffer({ - size: 16, - usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ - }) - - // Create binding group data structure and use it later once UBO is known - this.#bindGroupLayout = this.#device.createBindGroupLayout({ - entries: [ - { - binding: 0, - visibility: GPUShaderStage.COMPUTE, - buffer: { - type: 'uniform' - }, - }, - { - binding: 1, - visibility: GPUShaderStage.COMPUTE, - buffer: { - type: 'storage' - }, - } - ] - }) - - // Create pipeline to connect compute shader to binding layout - this.#pipeline = this.#device.createComputePipeline({ - layout: this.#device.createPipelineLayout({ - bindGroupLayouts: [this.#bindGroupLayout] - }), - compute: { - entryPoint: 'main', - module: this.#device.createShaderModule({ - code: this.shader - }) - } - }) - }) - }) - .catch(err => { throw new Error(err.message) }) - } - - /** - * Finds a nonce that satisfies the Nano proof-of-work requirements. - * - * @param {string} hash - Hexadecimal hash of previous block, or public key for new accounts - * @param {number} [threshold=0xfffffff8] - Difficulty of proof-of-work calculation - */ - static async search (hash: string, threshold: number = 0xfffffff8): Promise { - if (!/^[A-Fa-f0-9]{64}$/.test(hash)) throw new TypeError(`Invalid hash ${hash}`) - if (typeof threshold !== 'number') throw new TypeError(`Invalid threshold ${threshold}`) - - // Ensure WebGPU is initialized before calculating, up to a max time frame - while (this.#device == null && performance.now() < 8000) { - await new Promise(resolve => { - setTimeout(resolve, 500) - }) - } - if (this.#device == null) throw new Error(`WebGPU device failed to load.`) - - // Set up uniform buffer object - // Note: u32 size is 4, but total alignment must be multiple of 16 - const uboView = new DataView(new ArrayBuffer(48)) - for (let i = 0; i < 64; i += 8) { - const uint32 = hash.slice(i, i + 8) - uboView.setUint32(i / 2, parseInt(uint32, 16)) - } - const random = Math.floor((Math.random() * 0xffffffff)) - uboView.setUint32(32, random, true) - uboView.setUint32(36, threshold, true) - this.#device.queue.writeBuffer(this.#uboBuffer, 0, uboView) - - // Reset `found` flag to 0u in WORK before each calculation - this.#device.queue.writeBuffer(this.#gpuBuffer, 8, new Uint32Array([0])) - - // Bind UBO read and GPU write buffers - const bindGroup = this.#device.createBindGroup({ - layout: this.#bindGroupLayout, - entries: [ - { - binding: 0, - resource: { - buffer: this.#uboBuffer - }, - }, - { - binding: 1, - resource: { - buffer: this.#gpuBuffer - }, - }, - ], - }) - - // Create command encoder to issue commands to GPU and initiate computation - const commandEncoder = this.#device.createCommandEncoder() - const passEncoder = commandEncoder.beginComputePass() - - // Issue commands and end compute pass structure - passEncoder.setPipeline(this.#pipeline) - passEncoder.setBindGroup(0, bindGroup) - passEncoder.dispatchWorkgroups(256, 256, 256) - passEncoder.end() - - // Copy 8-byte nonce and 4-byte found flag from GPU to CPU for reading - commandEncoder.copyBufferToBuffer( - this.#gpuBuffer, - 0, - this.#cpuBuffer, - 0, - 12 - ) - - // End computation by passing array of command buffers to command queue for execution - this.#device.queue.submit([commandEncoder.finish()]) - - // Read results back to Javascript and then unmap buffer after reading - try { - await this.#cpuBuffer.mapAsync(GPUMapMode.READ) - await this.#device.queue.onSubmittedWorkDone() - } catch (err) { - console.warn(`Error getting data from GPU, retrying. ${err}`) - return await this.search(hash, threshold) - } - const data = new DataView(this.#cpuBuffer.getMappedRange()) - const nonce = data.getBigUint64(0, true) - const found = !!data.getUint32(8) - this.#cpuBuffer.unmap() - - if (found) { - const hex = nonce.toString(16).padStart(16, '0') - return hex - } else { - return await this.search(hash, threshold) - } - } -} - -export default ` - const WorkerInterface = ${WorkerInterface} - const PowGpu = ${PowGpu} -` diff --git a/src/main.ts b/src/main.ts index fc8e66d..f728b22 100644 --- a/src/main.ts +++ b/src/main.ts @@ -5,11 +5,11 @@ import { Account } from './lib/account.js' import { Blake2b } from './lib/blake2b.js' import { SendBlock, ReceiveBlock, ChangeBlock } from './lib/block.js' import { PowGl } from './lib/workers/powgl.js' -import { PowGpu } from './lib/workers/powgpu.js' +import { NanoPowGpu } from './lib/workers/nano-pow.js' import { Rpc } from './lib/rpc.js' import { Rolodex } from './lib/rolodex.js' import { Safe } from './lib/safe.js' import { Tools } from './lib/tools.js' import { Bip44Wallet, Blake2bWallet, LedgerWallet } from './lib/wallet.js' -export { Account, Blake2b, SendBlock, ReceiveBlock, ChangeBlock, PowGl, PowGpu, Rpc, Rolodex, Safe, Tools, Bip44Wallet, Blake2bWallet, LedgerWallet } +export { Account, Blake2b, SendBlock, ReceiveBlock, ChangeBlock, PowGl, NanoPowGpu as PowGpu, Rpc, Rolodex, Safe, Tools, Bip44Wallet, Blake2bWallet, LedgerWallet } -- 2.34.1