console.log(`Maximum: ${max} ms`)
})
- await test(`nano-webgl-pow: Time to calculate proof-of-work for a send block ${COUNT} times`, async () => {
+ await skip(`nano-webgl-pow: Time to calculate proof-of-work for a send block ${COUNT} times`, async () => {
//@ts-expect-error
window.NanoWebglPow.width = 256 * Math.max(1, Math.floor(navigator.hardwareConcurrency))
//@ts-expect-error
import { NanoNaCl } from './workers/nano-nacl.js'
import { Pool } from './pool.js'
import { Rpc } from './rpc.js'
-import { PowGl, PowGpu } from './workers.js'
+import { PowGl, NanoPowGpu } from './workers.js'
/**
* Represents a block as defined by the Nano cryptocurrency protocol. The Block
* of three derived classes: SendBlock, ReceiveBlock, ChangeBlock.
*/
abstract class Block {
- static #pool: Pool = new Pool(PowGpu)
+ static #pool: Pool = new Pool(NanoPowGpu)
account: Account
type: string = 'state'
abstract subtype: 'send' | 'receive' | 'change'
import { default as Bip44Ckd } from './workers/bip44-ckd.js'
import { default as NanoNaCl } from './workers/nano-nacl.js'
import { default as PowGl } from './workers/powgl.js'
-import { default as PowGpu } from './workers/powgpu.js'
+import { default as NanoPowGpu } from './workers/nano-pow.js'
-export { Bip44Ckd, NanoNaCl, PowGl, PowGpu }
+export { Bip44Ckd, NanoNaCl, PowGl, NanoPowGpu }
--- /dev/null
+export const NanoPowGlVertexShader = `#version 300 es
+#pragma vscode_glsllint_stage: vert
+precision highp float;
+layout (location=0) in vec4 position;
+layout (location=1) in vec2 uv;
+
+out vec2 uv_pos;
+
+void main() {
+ uv_pos = uv;
+ gl_Position = position;
+}
+`
+
+export const NanoPowGlFragmentShader = `#version 300 es
+#pragma vscode_glsllint_stage: frag
+precision highp float;
+precision highp int;
+
+in vec2 uv_pos;
+out vec4 fragColor;
+
+// blockhash - array of precalculated block hash components
+// threshold - 0xfffffff8 for send/change blocks, 0xfffffe00 for all else
+// workload - Defines canvas size
+layout(std140) uniform UBO {
+ uint blockhash[8];
+ uint threshold;
+ float workload;
+};
+
+// Random work values
+// First 2 bytes will be overwritten by texture pixel position
+// Second 2 bytes will be modified if the canvas size is greater than 256x256
+// Last 4 bytes remain as generated externally
+layout(std140) uniform WORK {
+ uvec4 work[2];
+};
+
+// Defined separately from uint v[32] below as the original value is required
+// to calculate the second uint32 of the digest for threshold comparison
+const uint BLAKE2B_IV32_1 = 0x6A09E667u;
+
+// Both buffers represent 16 uint64s as 32 uint32s
+// because that's what GLSL offers, just like Javascript
+
+// Compression buffer, intialized to 2 instances of the initialization vector
+// The following values have been modified from the BLAKE2B_IV:
+// OUTLEN is constant 8 bytes
+// v[0] ^= 0x01010000u ^ uint(OUTLEN);
+// INLEN is constant 40 bytes: work value (8) + block hash (32)
+// v[24] ^= uint(INLEN);
+// It's always the "last" compression at this INLEN
+// v[28] = ~v[28];
+// v[29] = ~v[29];
+uint v[32] = uint[32](
+ 0xF2BDC900u, 0x6A09E667u, 0x84CAA73Bu, 0xBB67AE85u,
+ 0xFE94F82Bu, 0x3C6EF372u, 0x5F1D36F1u, 0xA54FF53Au,
+ 0xADE682D1u, 0x510E527Fu, 0x2B3E6C1Fu, 0x9B05688Cu,
+ 0xFB41BD6Bu, 0x1F83D9ABu, 0x137E2179u, 0x5BE0CD19u,
+ 0xF3BCC908u, 0x6A09E667u, 0x84CAA73Bu, 0xBB67AE85u,
+ 0xFE94F82Bu, 0x3C6EF372u, 0x5F1D36F1u, 0xA54FF53Au,
+ 0xADE682F9u, 0x510E527Fu, 0x2B3E6C1Fu, 0x9B05688Cu,
+ 0x04BE4294u, 0xE07C2654u, 0x137E2179u, 0x5BE0CD19u
+);
+// Input data buffer
+uint m[32];
+
+// These are offsets into the input data buffer for each mixing step.
+// They are multiplied by 2 from the original SIGMA values in
+// the C reference implementation, which refered to uint64s.
+const uint SIGMA82[192] = uint[192](
+ 0u,2u,4u,6u,8u,10u,12u,14u,16u,18u,20u,22u,24u,26u,28u,30u,
+ 28u,20u,8u,16u,18u,30u,26u,12u,2u,24u,0u,4u,22u,14u,10u,6u,
+ 22u,16u,24u,0u,10u,4u,30u,26u,20u,28u,6u,12u,14u,2u,18u,8u,
+ 14u,18u,6u,2u,26u,24u,22u,28u,4u,12u,10u,20u,8u,0u,30u,16u,
+ 18u,0u,10u,14u,4u,8u,20u,30u,28u,2u,22u,24u,12u,16u,6u,26u,
+ 4u,24u,12u,20u,0u,22u,16u,6u,8u,26u,14u,10u,30u,28u,2u,18u,
+ 24u,10u,2u,30u,28u,26u,8u,20u,0u,14u,12u,6u,18u,4u,16u,22u,
+ 26u,22u,14u,28u,24u,2u,6u,18u,10u,0u,30u,8u,16u,12u,4u,20u,
+ 12u,30u,28u,18u,22u,6u,0u,16u,24u,4u,26u,14u,2u,8u,20u,10u,
+ 20u,4u,16u,8u,14u,12u,2u,10u,30u,22u,18u,28u,6u,24u,26u,0u,
+ 0u,2u,4u,6u,8u,10u,12u,14u,16u,18u,20u,22u,24u,26u,28u,30u,
+ 28u,20u,8u,16u,18u,30u,26u,12u,2u,24u,0u,4u,22u,14u,10u,6u
+);
+
+// G mixing function
+void G (uint ix, uint iy, uint a, uint b, uint c, uint d) {
+ uint o0;
+ uint o1;
+ uint xor0;
+ uint xor1;
+
+ // a = a + b;
+ o0 = v[a] + v[b];
+ o1 = v[a+1u] + v[b+1u];
+ if (v[a] > 0xFFFFFFFFu - v[b]) {
+ o1 = o1 + 1u;
+ }
+ v[a] = o0;
+ v[a+1u] = o1;
+
+ // a = a + m[sigma[r][2*i+0]];
+ o0 = v[a] + m[ix];
+ o1 = v[a+1u] + m[ix+1u];
+ if (v[a] > 0xFFFFFFFFu - m[ix]) {
+ o1 = o1 + 1u;
+ }
+ v[a] = o0;
+ v[a+1u] = o1;
+
+ // d = rotr64(d ^ a, 32);
+ xor0 = v[d] ^ v[a];
+ xor1 = v[d+1u] ^ v[a+1u];
+ v[d] = xor1;
+ v[d+1u] = xor0;
+
+ // c = c + d;
+ o0 = v[c] + v[d];
+ o1 = v[c+1u] + v[d+1u];
+ if (v[c] > 0xFFFFFFFFu - v[d]) {
+ o1 = o1 + 1u;
+ }
+ v[c] = o0;
+ v[c+1u] = o1;
+
+ // b = rotr64(b ^ c, 24);
+ xor0 = v[b] ^ v[c];
+ xor1 = v[b+1u] ^ v[c+1u];
+ v[b] = (xor0 >> 24u) ^ (xor1 << 8u);
+ v[b+1u] = (xor1 >> 24u) ^ (xor0 << 8u);
+
+ // a = a + b;
+ o0 = v[a] + v[b];
+ o1 = v[a+1u] + v[b+1u];
+ if (v[a] > 0xFFFFFFFFu - v[b]) {
+ o1 = o1 + 1u;
+ }
+ v[a] = o0;
+ v[a+1u] = o1;
+
+ // a = a + m[sigma[r][2*i+1]];
+ o0 = v[a] + m[iy];
+ o1 = v[a+1u] + m[iy+1u];
+ if (v[a] > 0xFFFFFFFFu - m[iy]) {
+ o1 = o1 + 1u;
+ }
+ v[a] = o0;
+ v[a+1u] = o1;
+
+ // d = rotr64(d ^ a, 16)
+ xor0 = v[d] ^ v[a];
+ xor1 = v[d+1u] ^ v[a+1u];
+ v[d] = (xor0 >> 16u) ^ (xor1 << 16u);
+ v[d+1u] = (xor1 >> 16u) ^ (xor0 << 16u);
+
+ // c = c + d;
+ o0 = v[c] + v[d];
+ o1 = v[c+1u] + v[d+1u];
+ if (v[c] > 0xFFFFFFFFu - v[d]) {
+ o1 = o1 + 1u;
+ }
+ v[c] = o0;
+ v[c+1u] = o1;
+
+ // b = rotr64(b ^ c, 63)
+ xor0 = v[b] ^ v[c];
+ xor1 = v[b+1u] ^ v[c+1u];
+ v[b] = (xor1 >> 31u) ^ (xor0 << 1u);
+ v[b+1u] = (xor0 >> 31u) ^ (xor1 << 1u);
+}
+
+void main() {
+ int i;
+ uvec4 u_work0 = work[0u];
+ uvec4 u_work1 = work[1u];
+ uint uv_x = uint(uv_pos.x * workload);
+ uint uv_y = uint(uv_pos.y * workload);
+ uint x_pos = uv_x % 256u;
+ uint y_pos = uv_y % 256u;
+ uint x_index = (uv_x - x_pos) / 256u;
+ uint y_index = (uv_y - y_pos) / 256u;
+
+ // First 2 work bytes are the x,y pos within the 256x256 area, the next
+ // two bytes are modified from the random generated value, XOR'd with
+ // the x,y area index of where this pixel is located
+ m[0u] = (x_pos ^ (y_pos << 8u) ^ ((u_work0.b ^ x_index) << 16u) ^ ((u_work0.a ^ y_index) << 24u));
+
+ // Remaining bytes are un-modified from the random generated value
+ m[1u] = (u_work1.r ^ (u_work1.g << 8u) ^ (u_work1.b << 16u) ^ (u_work1.a << 24u));
+
+ // Block hash
+ for (uint i = 0u; i < 8u; i = i + 1u) {
+ m[i+2u] = blockhash[i];
+ }
+
+ // twelve rounds of mixing
+ for(uint i = 0u; i < 12u; i = i + 1u) {
+ G(SIGMA82[i * 16u + 0u], SIGMA82[i * 16u + 1u], 0u, 8u, 16u, 24u);
+ G(SIGMA82[i * 16u + 2u], SIGMA82[i * 16u + 3u], 2u, 10u, 18u, 26u);
+ G(SIGMA82[i * 16u + 4u], SIGMA82[i * 16u + 5u], 4u, 12u, 20u, 28u);
+ G(SIGMA82[i * 16u + 6u], SIGMA82[i * 16u + 7u], 6u, 14u, 22u, 30u);
+ G(SIGMA82[i * 16u + 8u], SIGMA82[i * 16u + 9u], 0u, 10u, 20u, 30u);
+ G(SIGMA82[i * 16u + 10u], SIGMA82[i * 16u + 11u], 2u, 12u, 22u, 24u);
+ G(SIGMA82[i * 16u + 12u], SIGMA82[i * 16u + 13u], 4u, 14u, 16u, 26u);
+ G(SIGMA82[i * 16u + 14u], SIGMA82[i * 16u + 15u], 6u, 8u, 18u, 28u);
+ }
+
+ // Pixel data is multipled by threshold test result (0 or 1)
+ // First 4 bytes insignificant, only calculate digest of second 4 bytes
+ if ((BLAKE2B_IV32_1 ^ v[1u] ^ v[17u]) > threshold) {
+ fragColor = vec4(
+ float(x_index + 1u)/255.0, // +1 to distinguish from 0 (unsuccessful) pixels
+ float(y_index + 1u)/255.0, // Same as previous
+ float(x_pos)/255.0, // Return the 2 custom bytes used in work value
+ float(y_pos)/255.0 // Second custom byte
+ );
+ } else {
+ discard;
+ }
+}
+`
+
+export const NanoPowGpuComputeShader = `
+struct UBO {
+ blockhash: array<vec4<u32>, 2>,
+ random: u32,
+ threshold: u32
+};
+@group(0) @binding(0) var<uniform> ubo: UBO;
+
+struct WORK {
+ nonce: vec2<u32>,
+ found: atomic<u32>
+};
+@group(0) @binding(1) var<storage, read_write> work: WORK;
+
+/**
+* Defined separately from uint v[32] below as the original value is required
+* to calculate the second uint32 of the digest for threshold comparison
+*/
+const BLAKE2B_IV32_1: u32 = 0x6A09E667u;
+
+/**
+* These are offsets into the input data buffer for each mixing step.
+* They are multiplied by 2 from the original SIGMA values in
+* the C reference implementation, which refered to uint64s.
+*
+* const SIGMA82: array<u32, 192> = array<u32, 192>(
+* 0u,2u,4u,6u,8u,10u,12u,14u,16u,18u,20u,22u,24u,26u,28u,30u,
+* 28u,20u,8u,16u,18u,30u,26u,12u,2u,24u,0u,4u,22u,14u,10u,6u,
+* 22u,16u,24u,0u,10u,4u,30u,26u,20u,28u,6u,12u,14u,2u,18u,8u,
+* 14u,18u,6u,2u,26u,24u,22u,28u,4u,12u,10u,20u,8u,0u,30u,16u,
+* 18u,0u,10u,14u,4u,8u,20u,30u,28u,2u,22u,24u,12u,16u,6u,26u,
+* 4u,24u,12u,20u,0u,22u,16u,6u,8u,26u,14u,10u,30u,28u,2u,18u,
+* 24u,10u,2u,30u,28u,26u,8u,20u,0u,14u,12u,6u,18u,4u,16u,22u,
+* 26u,22u,14u,28u,24u,2u,6u,18u,10u,0u,30u,8u,16u,12u,4u,20u,
+* 12u,30u,28u,18u,22u,6u,0u,16u,24u,4u,26u,14u,2u,8u,20u,10u,
+* 20u,4u,16u,8u,14u,12u,2u,10u,30u,22u,18u,28u,6u,24u,26u,0u,
+* 0u,2u,4u,6u,8u,10u,12u,14u,16u,18u,20u,22u,24u,26u,28u,30u,
+* 28u,20u,8u,16u,18u,30u,26u,12u,2u,24u,0u,4u,22u,14u,10u,6u
+* );
+*/
+
+/**
+* G Mixing function
+*/
+fn G (
+ va0: ptr<function, u32>, va1: ptr<function, u32>,
+ vb0: ptr<function, u32>, vb1: ptr<function, u32>,
+ vc0: ptr<function, u32>, vc1: ptr<function, u32>,
+ vd0: ptr<function, u32>, vd1: ptr<function, u32>,
+ mx0: u32, mx1: u32, my0: u32, my1: u32
+) {
+ var o0: u32;
+ var o1: u32;
+ var xor0: u32;
+ var xor1: u32;
+
+ // a = a + b;
+ o0 = *va0 + *vb0;
+ o1 = *va1 + *vb1;
+ if (*va0 > 0xFFFFFFFFu - *vb0) {
+ o1 = o1 + 1u;
+ }
+ *va0 = o0;
+ *va1 = o1;
+
+ // a = a + m[sigma[r][2*i+0]];
+ o0 = *va0 + mx0;
+ o1 = *va1 + mx1;
+ if (*va0 > 0xFFFFFFFFu - mx0) {
+ o1 = o1 + 1u;
+ }
+ *va0 = o0;
+ *va1 = o1;
+
+ // d = rotr64(d ^ a, 32);
+ xor0 = *vd0 ^ *va0;
+ xor1 = *vd1 ^ *va1;
+ *vd0 = xor1;
+ *vd1 = xor0;
+
+ // c = c + d;
+ o0 = *vc0 + *vd0;
+ o1 = *vc1 + *vd1;
+ if (*vc0 > 0xFFFFFFFFu - *vd0) {
+ o1 = o1 + 1u;
+ }
+ *vc0 = o0;
+ *vc1 = o1;
+
+ // b = rotr64(b ^ c, 24);
+ xor0 = *vb0 ^ *vc0;
+ xor1 = *vb1 ^ *vc1;
+ *vb0 = (xor0 >> 24u) ^ (xor1 << 8u);
+ *vb1 = (xor1 >> 24u) ^ (xor0 << 8u);
+
+ // a = a + b;
+ o0 = *va0 + *vb0;
+ o1 = *va1 + *vb1;
+ if (*va0 > 0xFFFFFFFFu - *vb0) {
+ o1 = o1 + 1u;
+ }
+ *va0 = o0;
+ *va1 = o1;
+
+ // a = a + m[sigma[r][2*i+1]];
+ o0 = *va0 + my0;
+ o1 = *va1 + my1;
+ if (*va0 > 0xFFFFFFFFu - my0) {
+ o1 = o1 + 1u;
+ }
+ *va0 = o0;
+ *va1 = o1;
+
+ // d = rotr64(d ^ a, 16)
+ xor0 = *vd0 ^ *va0;
+ xor1 = *vd1 ^ *va1;
+ *vd0 = (xor0 >> 16u) ^ (xor1 << 16u);
+ *vd1 = (xor1 >> 16u) ^ (xor0 << 16u);
+
+ // c = c + d;
+ o0 = *vc0 + *vd0;
+ o1 = *vc1 + *vd1;
+ if (*vc0 > 0xFFFFFFFFu - *vd0) {
+ o1 = o1 + 1u;
+ }
+ *vc0 = o0;
+ *vc1 = o1;
+
+ // b = rotr64(b ^ c, 63)
+ xor0 = *vb0 ^ *vc0;
+ xor1 = *vb1 ^ *vc1;
+ *vb0 = (xor1 >> 31u) ^ (xor0 << 1u);
+ *vb1 = (xor0 >> 31u) ^ (xor1 << 1u);
+}
+
+/**
+* Main compute function
+* 8-byte work is split into two 4-byte u32. Low 4 bytes are random u32 from
+* UBO. High 4 bytes are the random value XOR'd with index of each thread.
+*/
+@compute @workgroup_size(64)
+fn main(
+ @builtin(workgroup_id) workgroup_id: vec3<u32>,
+ @builtin(local_invocation_id) local_id: vec3<u32>
+) {
+ if (atomicLoad(&work.found) != 0u) { return; }
+
+ let threshold: u32 = ubo.threshold;
+
+ /**
+ * Flatten 3D workgroup and local identifiers into u32 for each thread
+ */
+ var id: u32 = ((workgroup_id.x & 0xFFu) << 24u) |
+ ((workgroup_id.y & 0xFFu) << 16u) |
+ ((workgroup_id.z & 0xFFu) << 8u) |
+ (local_id.x & 0xFFu);
+
+ /**
+ * Initialize (nonce||blockhash) concatenation
+ */
+ var m0: u32 = ubo.random;
+ var m1: u32 = ubo.random ^ id;
+ var m2: u32 = ubo.blockhash[0u].x;
+ var m3: u32 = ubo.blockhash[0u].y;
+ var m4: u32 = ubo.blockhash[0u].z;
+ var m5: u32 = ubo.blockhash[0u].w;
+ var m6: u32 = ubo.blockhash[1u].x;
+ var m7: u32 = ubo.blockhash[1u].y;
+ var m8: u32 = ubo.blockhash[1u].z;
+ var m9: u32 = ubo.blockhash[1u].w;
+
+ /**
+ * Compression buffer intialized to 2 instances of initialization vector
+ * The following values have been modified from the BLAKE2B_IV:
+ * OUTLEN is constant 8 bytes
+ * v[0u] ^= 0x01010000u ^ uint(OUTLEN);
+ * INLEN is constant 40 bytes: work value (8) + block hash (32)
+ * v[24u] ^= uint(INLEN);
+ * It is always the "last" compression at this INLEN
+ * v[28u] = ~v[28u];
+ * v[29u] = ~v[29u];
+ */
+ var v0: u32 = 0xF2BDC900u;
+ var v1: u32 = 0x6A09E667u;
+ var v2: u32 = 0x84CAA73Bu;
+ var v3: u32 = 0xBB67AE85u;
+ var v4: u32 = 0xFE94F82Bu;
+ var v5: u32 = 0x3C6EF372u;
+ var v6: u32 = 0x5F1D36F1u;
+ var v7: u32 = 0xA54FF53Au;
+ var v8: u32 = 0xADE682D1u;
+ var v9: u32 = 0x510E527Fu;
+ var v10: u32 = 0x2B3E6C1Fu;
+ var v11: u32 = 0x9B05688Cu;
+ var v12: u32 = 0xFB41BD6Bu;
+ var v13: u32 = 0x1F83D9ABu;
+ var v14: u32 = 0x137E2179u;
+ var v15: u32 = 0x5BE0CD19u;
+ var v16: u32 = 0xF3BCC908u;
+ var v17: u32 = 0x6A09E667u;
+ var v18: u32 = 0x84CAA73Bu;
+ var v19: u32 = 0xBB67AE85u;
+ var v20: u32 = 0xFE94F82Bu;
+ var v21: u32 = 0x3C6EF372u;
+ var v22: u32 = 0x5F1D36F1u;
+ var v23: u32 = 0xA54FF53Au;
+ var v24: u32 = 0xADE682F9u;
+ var v25: u32 = 0x510E527Fu;
+ var v26: u32 = 0x2B3E6C1Fu;
+ var v27: u32 = 0x9B05688Cu;
+ var v28: u32 = 0x04BE4294u;
+ var v29: u32 = 0xE07C2654u;
+ var v30: u32 = 0x137E2179u;
+ var v31: u32 = 0x5BE0CD19u;
+
+ /**
+ * Twelve rounds of mixing as part of BLAKE2b compression step
+ */
+ // ROUND(0)
+ G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, m0, m1, m2, m3);
+ G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, m4, m5, m6, m7);
+ G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, m8, m9, 0u, 0u);
+ G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, 0u, 0u, 0u, 0u);
+ G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, 0u, 0u, 0u, 0u);
+ G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, 0u, 0u, 0u, 0u);
+ G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, 0u, 0u, 0u, 0u);
+ G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, 0u, 0u, 0u, 0u);
+
+ // ROUND(1)
+ G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, 0u, 0u, 0u, 0u);
+ G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, m8, m9, 0u, 0u);
+ G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, 0u, 0u, 0u, 0u);
+ G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, 0u, 0u, 0u, 0u);
+ G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, m2, m3, 0u, 0u);
+ G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, m0, m1, m4, m5);
+ G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, 0u, 0u, 0u, 0u);
+ G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, 0u, 0u, m6, m7);
+
+ // ROUND(2)
+ G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, 0u, 0u, 0u, 0u);
+ G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, 0u, 0u, m0, m1);
+ G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, 0u, 0u, m4, m5);
+ G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, 0u, 0u, 0u, 0u);
+ G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, 0u, 0u, 0u, 0u);
+ G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, m6, m7, 0u, 0u);
+ G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, 0u, 0u, m2, m3);
+ G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, 0u, 0u, m8, m9);
+
+ // ROUND(3)
+ G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, 0u, 0u, 0u, 0u);
+ G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, m6, m7, m2, m3);
+ G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, 0u, 0u, 0u, 0u);
+ G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, 0u, 0u, 0u, 0u);
+ G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, m4, m5, 0u, 0u);
+ G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, 0u, 0u, 0u, 0u);
+ G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, m8, m9, m0, m1);
+ G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, 0u, 0u, 0u, 0u);
+
+ // ROUND(4)
+ G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, 0u, 0u, m0, m1);
+ G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, 0u, 0u, 0u, 0u);
+ G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, m4, m5, m8, m9);
+ G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, 0u, 0u, 0u, 0u);
+ G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, 0u, 0u, m2, m3);
+ G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, 0u, 0u, 0u, 0u);
+ G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, 0u, 0u, 0u, 0u);
+ G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, m6, m7, 0u, 0u);
+
+ // ROUND(5)
+ G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, m4, m5, 0u, 0u);
+ G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, 0u, 0u, 0u, 0u);
+ G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, m0, m1, 0u, 0u);
+ G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, 0u, 0u, m6, m7);
+ G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, m8, m9, 0u, 0u);
+ G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, 0u, 0u, 0u, 0u);
+ G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, 0u, 0u, 0u, 0u);
+ G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, m2, m3, 0u, 0u);
+
+ // ROUND(6)
+ G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, 0u, 0u, 0u, 0u);
+ G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, m2, m3, 0u, 0u);
+ G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, 0u, 0u, 0u, 0u);
+ G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, m8, m9, 0u, 0u);
+ G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, m0, m1, 0u, 0u);
+ G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, 0u, 0u, m6, m7);
+ G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, 0u, 0u, m4, m5);
+ G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, 0u, 0u, 0u, 0u);
+
+ // ROUND(7)
+ G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, 0u, 0u, 0u, 0u);
+ G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, 0u, 0u, 0u, 0u);
+ G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, 0u, 0u, m2, m3);
+ G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, m6, m7, 0u, 0u);
+ G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, 0u, 0u, m0, m1);
+ G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, 0u, 0u, m8, m9);
+ G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, 0u, 0u, 0u, 0u);
+ G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, m4, m5, 0u, 0u);
+
+ // ROUND(8)
+ G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, 0u, 0u, 0u, 0u);
+ G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, 0u, 0u, 0u, 0u);
+ G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, 0u, 0u, m6, m7);
+ G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, m0, m1, 0u, 0u);
+ G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, 0u, 0u, m4, m5);
+ G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, 0u, 0u, 0u, 0u);
+ G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, m2, m3, m8, m9);
+ G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, 0u, 0u, 0u, 0u);
+
+ // ROUND(9)
+ G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, 0u, 0u, m4, m5);
+ G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, 0u, 0u, m8, m9);
+ G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, 0u, 0u, 0u, 0u);
+ G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, m2, m3, 0u, 0u);
+ G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, 0u, 0u, 0u, 0u);
+ G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, 0u, 0u, 0u, 0u);
+ G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, m6, m7, 0u, 0u);
+ G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, 0u, 0u, m0, m1);
+
+ // ROUND(10)
+ G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, m0, m1, m2, m3);
+ G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, m4, m5, m6, m7);
+ G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, m8, m9, 0u, 0u);
+ G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, 0u, 0u, 0u, 0u);
+ G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, 0u, 0u, 0u, 0u);
+ G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, 0u, 0u, 0u, 0u);
+ G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, 0u, 0u, 0u, 0u);
+ G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, 0u, 0u, 0u, 0u);
+
+ // ROUND(11)
+ G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, 0u, 0u, 0u, 0u);
+ G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, m8, m9, 0u, 0u);
+ G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, 0u, 0u, 0u, 0u);
+ G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, 0u, 0u, 0u, 0u);
+ G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, m2, m3, 0u, 0u);
+ G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, m0, m1, m4, m5);
+ G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, 0u, 0u, 0u, 0u);
+ G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, 0u, 0u, m6, m7);
+
+ /**
+ * Set nonce if it passes the threshold and no other thread has set it
+ */
+ if ((BLAKE2B_IV32_1 ^ v1 ^ v17) > threshold && atomicLoad(&work.found) == 0u) {
+ atomicStore(&work.found, 1u);
+ work.nonce.x = m0;
+ work.nonce.y = m1;
+ }
+ return;
+}
+`
+
+
+
+/**
+* The original NanoPow compute shader derived from nano-webgl-pow is saved in
+* this comment for reference purposes. It is not quite as performant as the
+* inlined version exported above.
+*/
+
+/*
+struct UBO {
+ blockhash: array<vec4<u32>, 2>,
+ random: u32,
+ threshold: u32
+};
+@group(0) @binding(0) var<uniform> ubo: UBO;
+
+struct WORK {
+ nonce: vec2<u32>,
+ found: atomic<u32>
+};
+@group(0) @binding(1) var<storage, read_write> work: WORK;
+
+// Defined separately from uint v[32] below as the original value is required
+// to calculate the second uint32 of the digest for threshold comparison
+const BLAKE2B_IV32_1: u32 = 0x6A09E667u;
+
+// These are offsets into the input data buffer for each mixing step.
+// They are multiplied by 2 from the original SIGMA values in
+// the C reference implementation, which refered to uint64s.
+
+const SIGMA82: array<u32, 192> = array<u32, 192>(
+ 0u,2u,4u,6u,8u,10u,12u,14u,16u,18u,20u,22u,24u,26u,28u,30u,
+ 28u,20u,8u,16u,18u,30u,26u,12u,2u,24u,0u,4u,22u,14u,10u,6u,
+ 22u,16u,24u,0u,10u,4u,30u,26u,20u,28u,6u,12u,14u,2u,18u,8u,
+ 14u,18u,6u,2u,26u,24u,22u,28u,4u,12u,10u,20u,8u,0u,30u,16u,
+ 18u,0u,10u,14u,4u,8u,20u,30u,28u,2u,22u,24u,12u,16u,6u,26u,
+ 4u,24u,12u,20u,0u,22u,16u,6u,8u,26u,14u,10u,30u,28u,2u,18u,
+ 24u,10u,2u,30u,28u,26u,8u,20u,0u,14u,12u,6u,18u,4u,16u,22u,
+ 26u,22u,14u,28u,24u,2u,6u,18u,10u,0u,30u,8u,16u,12u,4u,20u,
+ 12u,30u,28u,18u,22u,6u,0u,16u,24u,4u,26u,14u,2u,8u,20u,10u,
+ 20u,4u,16u,8u,14u,12u,2u,10u,30u,22u,18u,28u,6u,24u,26u,0u,
+ 0u,2u,4u,6u,8u,10u,12u,14u,16u,18u,20u,22u,24u,26u,28u,30u,
+ 28u,20u,8u,16u,18u,30u,26u,12u,2u,24u,0u,4u,22u,14u,10u,6u
+);
+
+// 64-bit unsigned addition within the compression buffer
+// Sets v[i,i+1] += b
+// LSb is the Least-Significant (32) Bits of b
+// MSb is the Most-Significant (32) Bits of b
+// If LSb overflows, increment MSb operand
+fn add_uint64 (v: ptr<function, array<u32, 32>>, i: u32, LSb: u32, MSb: u32) {
+ var o0: u32 = (*v)[i] + LSb;
+ var o1: u32 = (*v)[i+1u] + MSb;
+ if ((*v)[i] > 0xFFFFFFFFu - LSb) {
+ o1 = o1 + 1u;
+ }
+ (*v)[i] = o0;
+ (*v)[i+1u] = o1;
+}
+
+// G Mixing function
+fn G (v: ptr<function, array<u32, 32>>, m: ptr<function, array<u32, 16>>, a: u32, b: u32, c: u32, d: u32, ix: u32, iy: u32) {
+ add_uint64(v, a, (*v)[b], (*v)[b+1u]);
+ add_uint64(v, a, (*m)[ix], (*m)[ix+1u]);
+
+ // v[d,d+1] = (v[d,d+1] xor v[a,a+1]) rotated to the right by 32 bits
+ var xor0: u32 = (*v)[d] ^ (*v)[a];
+ var xor1: u32 = (*v)[d+1u] ^ (*v)[a+1u];
+ (*v)[d] = xor1;
+ (*v)[d+1u] = xor0;
+
+ add_uint64(v, c, (*v)[d], (*v)[d+1u]);
+
+ // v[b,b+1] = (v[b,b+1] xor v[c,c+1]) rotated right by 24 bits
+ xor0 = (*v)[b] ^ (*v)[c];
+ xor1 = (*v)[b+1u] ^ (*v)[c+1u];
+ (*v)[b] = (xor0 >> 24u) ^ (xor1 << 8u);
+ (*v)[b+1u] = (xor1 >> 24u) ^ (xor0 << 8u);
+
+ add_uint64(v, a, (*v)[b], (*v)[b+1u]);
+ add_uint64(v, a, (*m)[iy], (*m)[iy+1u]);
+
+ // v[d,d+1] = (v[d,d+1] xor v[a,a+1]) rotated right by 16 bits
+ xor0 = (*v)[d] ^ (*v)[a];
+ xor1 = (*v)[d+1u] ^ (*v)[a+1u];
+ (*v)[d] = (xor0 >> 16u) ^ (xor1 << 16u);
+ (*v)[d+1u] = (xor1 >> 16u) ^ (xor0 << 16u);
+
+ add_uint64(v, c, (*v)[d], (*v)[d+1u]);
+
+ // v[b,b+1] = (v[b,b+1] xor v[c,c+1]) rotated right by 63 bits
+ xor0 = (*v)[b] ^ (*v)[c];
+ xor1 = (*v)[b+1u] ^ (*v)[c+1u];
+ (*v)[b] = (xor1 >> 31u) ^ (xor0 << 1u);
+ (*v)[b+1u] = (xor0 >> 31u) ^ (xor1 << 1u);
+}
+
+// Main compute function
+// 8-byte work is split into two 4-byte u32. Low 4 bytes are random u32 from
+// UBO. High 4 bytes are the random value XOR'd with index of each thread.
+@compute @workgroup_size(${this.workload})
+fn main(
+ @builtin(workgroup_id) workgroup_id: vec3<u32>,
+ @builtin(local_invocation_id) local_id: vec3<u32>
+) {
+ if (atomicLoad(&work.found) != 0u) { return; }
+
+ let threshold: u32 = ubo.threshold;
+
+ // Flatten 3D workgroup and local identifiers into u32 for each thread
+ var id: u32 = ((workgroup_id.x & 0xff) << 24) |
+ ((workgroup_id.y & 0xff) << 16) |
+ ((workgroup_id.z & 0xff) << 8) |
+ (local_id.x & 0xff);
+
+ // Initialize (nonce||blockhash) concatenation
+ var m: array<u32, 16>;
+ m[0u] = ubo.random;
+ m[1u] = ubo.random ^ id;
+ m[2u] = ubo.blockhash[0u].x;
+ m[3u] = ubo.blockhash[0u].y;
+ m[4u] = ubo.blockhash[0u].z;
+ m[5u] = ubo.blockhash[0u].w;
+ m[6u] = ubo.blockhash[1u].x;
+ m[7u] = ubo.blockhash[1u].y;
+ m[8u] = ubo.blockhash[1u].z;
+ m[9u] = ubo.blockhash[1u].w;
+
+
+ // Compression buffer intialized to 2 instances of initialization vector
+ // The following values have been modified from the BLAKE2B_IV:
+ // OUTLEN is constant 8 bytes
+ // v[0u] ^= 0x01010000u ^ uint(OUTLEN);
+ // INLEN is constant 40 bytes: work value (8) + block hash (32)
+ // v[24u] ^= uint(INLEN);
+ // It is always the "last" compression at this INLEN
+ // v[28u] = ~v[28u];
+ // v[29u] = ~v[29u];
+ var v = array<u32, 32>(
+ 0xF2BDC900u, 0x6A09E667u, 0x84CAA73Bu, 0xBB67AE85u,
+ 0xFE94F82Bu, 0x3C6EF372u, 0x5F1D36F1u, 0xA54FF53Au,
+ 0xADE682D1u, 0x510E527Fu, 0x2B3E6C1Fu, 0x9B05688Cu,
+ 0xFB41BD6Bu, 0x1F83D9ABu, 0x137E2179u, 0x5BE0CD19u,
+ 0xF3BCC908u, 0x6A09E667u, 0x84CAA73Bu, 0xBB67AE85u,
+ 0xFE94F82Bu, 0x3C6EF372u, 0x5F1D36F1u, 0xA54FF53Au,
+ 0xADE682F9u, 0x510E527Fu, 0x2B3E6C1Fu, 0x9B05688Cu,
+ 0x04BE4294u, 0xE07C2654u, 0x137E2179u, 0x5BE0CD19u
+ );
+
+ // Twelve rounds of mixing as part of BLAKE2b compression step
+ for (var r: u32 = 0u; r < 12u; r = r + 1u) {
+ G(&v, &m, 0u, 8u, 16u, 24u, SIGMA82[r * 16u + 0u], SIGMA82[r * 16u + 1u]);
+ G(&v, &m, 2u, 10u, 18u, 26u, SIGMA82[r * 16u + 2u], SIGMA82[r * 16u + 3u]);
+ G(&v, &m, 4u, 12u, 20u, 28u, SIGMA82[r * 16u + 4u], SIGMA82[r * 16u + 5u]);
+ G(&v, &m, 6u, 14u, 22u, 30u, SIGMA82[r * 16u + 6u], SIGMA82[r * 16u + 7u]);
+ G(&v, &m, 0u, 10u, 20u, 30u, SIGMA82[r * 16u + 8u], SIGMA82[r * 16u + 9u]);
+ G(&v, &m, 2u, 12u, 22u, 24u, SIGMA82[r * 16u + 10u], SIGMA82[r * 16u + 11u]);
+ G(&v, &m, 4u, 14u, 16u, 26u, SIGMA82[r * 16u + 12u], SIGMA82[r * 16u + 13u]);
+ G(&v, &m, 6u, 8u, 18u, 28u, SIGMA82[r * 16u + 14u], SIGMA82[r * 16u + 15u]);
+ }
+
+ // Set nonce if it passes the threshold and no other thread has set it
+ if ((BLAKE2B_IV32_1 ^ v[1u] ^ v[17u]) > threshold && atomicLoad(&work.found) == 0u) {
+ atomicStore(&work.found, 1u);
+ work.nonce.x = m[0];
+ work.nonce.y = m[1];
+ }
+ return;
+}
+*/
--- /dev/null
+// SPDX-FileCopyrightText: 2024 Chris Duncan <chris@zoso.dev>
+// SPDX-License-Identifier: GPL-3.0-or-later
+// BLAKE2b hashing implementation derived from nano-webgl-pow by Ben Green <ben@latenightsketches.com> (https://github.com/numtel/nano-webgl-pow)
+/// <reference types="@webgpu/types" />
+import { WorkerInterface } from '../pool.js'
+import { NanoPowGpuComputeShader } from './nano-pow-shaders.js'
+
+/**
+* Nano proof-of-work using WebGPU.
+*/
+export class NanoPowGpu extends WorkerInterface {
+ static {
+ NanoPowGpu.listen()
+ }
+
+ /**
+ * Calculates proof-of-work as described by the Nano cryptocurrency protocol.
+ *
+ * @param {any[]} data - Array of hashes and minimum thresholds
+ * @returns Promise for proof-of-work attached to original array objects
+ */
+ static async work (data: any[]): Promise<any[]> {
+ return new Promise(async (resolve, reject): Promise<void> => {
+ for (const d of data) {
+ try {
+ d.work = await this.search(d.hash, d.threshold)
+ } catch (err) {
+ reject(err)
+ }
+ }
+ resolve(data)
+ })
+ }
+
+ // Initialize WebGPU
+ static #device: GPUDevice | null = null
+ static #uboBuffer: GPUBuffer
+ static #gpuBuffer: GPUBuffer
+ static #cpuBuffer: GPUBuffer
+ static #bindGroupLayout: GPUBindGroupLayout
+ static #pipeline: GPUComputePipeline
+
+ static {
+ this.init()
+ }
+
+ // Initialize WebGPU
+ static init () {
+ // Request device and adapter
+ if (navigator.gpu == null) {
+ throw new Error('WebGPU is not supported in this browser.')
+ }
+ navigator.gpu.requestAdapter()
+ .then(adapter => {
+ if (adapter == null) {
+ throw new Error('WebGPU adapter refused by browser.')
+ }
+ adapter.requestDevice()
+ .then(device => {
+ this.#device = device
+ this.#device.lost.then(loss => {
+ console.warn(`(${loss.reason}) ${loss.message}. Reinitializing device.`)
+ this.init()
+ })
+
+ // Create buffers for writing GPU calculations and reading from Javascript
+ this.#uboBuffer = this.#device.createBuffer({
+ size: 48,
+ usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST
+ })
+ this.#gpuBuffer = this.#device.createBuffer({
+ size: 16,
+ usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC
+ })
+ this.#cpuBuffer = this.#device.createBuffer({
+ size: 16,
+ usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ
+ })
+
+ // Create binding group data structure and use it later once UBO is known
+ this.#bindGroupLayout = this.#device.createBindGroupLayout({
+ entries: [
+ {
+ binding: 0,
+ visibility: GPUShaderStage.COMPUTE,
+ buffer: {
+ type: 'uniform'
+ },
+ },
+ {
+ binding: 1,
+ visibility: GPUShaderStage.COMPUTE,
+ buffer: {
+ type: 'storage'
+ },
+ }
+ ]
+ })
+
+ // Create pipeline to connect compute shader to binding layout
+ this.#pipeline = this.#device.createComputePipeline({
+ layout: this.#device.createPipelineLayout({
+ bindGroupLayouts: [this.#bindGroupLayout]
+ }),
+ compute: {
+ entryPoint: 'main',
+ module: this.#device.createShaderModule({
+ code: NanoPowGpuComputeShader
+ })
+ }
+ })
+ })
+ })
+ .catch(err => { throw new Error(err.message) })
+ }
+
+ /**
+ * Finds a nonce that satisfies the Nano proof-of-work requirements.
+ *
+ * @param {string} hash - Hexadecimal hash of previous block, or public key for new accounts
+ * @param {number} [threshold=0xfffffff8] - Difficulty of proof-of-work calculation
+ */
+ static async search (hash: string, threshold: number = 0xfffffff8): Promise<string> {
+ if (!/^[A-Fa-f0-9]{64}$/.test(hash)) throw new TypeError(`Invalid hash ${hash}`)
+ if (typeof threshold !== 'number') throw new TypeError(`Invalid threshold ${threshold}`)
+
+ // Ensure WebGPU is initialized before calculating, up to a max time frame
+ while (this.#device == null && performance.now() < 8000) {
+ await new Promise(resolve => {
+ setTimeout(resolve, 500)
+ })
+ }
+ if (this.#device == null) throw new Error(`WebGPU device failed to load.`)
+
+ // Set up uniform buffer object
+ // Note: u32 size is 4, but total alignment must be multiple of 16
+ const uboView = new DataView(new ArrayBuffer(48))
+ for (let i = 0; i < 64; i += 8) {
+ const uint32 = hash.slice(i, i + 8)
+ uboView.setUint32(i / 2, parseInt(uint32, 16))
+ }
+ const random = Math.floor((Math.random() * 0xffffffff))
+ uboView.setUint32(32, random, true)
+ uboView.setUint32(36, threshold, true)
+ this.#device.queue.writeBuffer(this.#uboBuffer, 0, uboView)
+
+ // Reset `found` flag to 0u in WORK before each calculation
+ this.#device.queue.writeBuffer(this.#gpuBuffer, 8, new Uint32Array([0]))
+
+ // Bind UBO read and GPU write buffers
+ const bindGroup = this.#device.createBindGroup({
+ layout: this.#bindGroupLayout,
+ entries: [
+ {
+ binding: 0,
+ resource: {
+ buffer: this.#uboBuffer
+ },
+ },
+ {
+ binding: 1,
+ resource: {
+ buffer: this.#gpuBuffer
+ },
+ },
+ ],
+ })
+
+ // Create command encoder to issue commands to GPU and initiate computation
+ const commandEncoder = this.#device.createCommandEncoder()
+ const passEncoder = commandEncoder.beginComputePass()
+
+ // Issue commands and end compute pass structure
+ passEncoder.setPipeline(this.#pipeline)
+ passEncoder.setBindGroup(0, bindGroup)
+ passEncoder.dispatchWorkgroups(256, 256, 256)
+ passEncoder.end()
+
+ // Copy 8-byte nonce and 4-byte found flag from GPU to CPU for reading
+ commandEncoder.copyBufferToBuffer(
+ this.#gpuBuffer,
+ 0,
+ this.#cpuBuffer,
+ 0,
+ 12
+ )
+
+ // End computation by passing array of command buffers to command queue for execution
+ this.#device.queue.submit([commandEncoder.finish()])
+
+ // Read results back to Javascript and then unmap buffer after reading
+ try {
+ await this.#cpuBuffer.mapAsync(GPUMapMode.READ)
+ await this.#device.queue.onSubmittedWorkDone()
+ } catch (err) {
+ console.warn(`Error getting data from GPU, retrying. ${err}`)
+ return await this.search(hash, threshold)
+ }
+ const data = new DataView(this.#cpuBuffer.getMappedRange())
+ const nonce = data.getBigUint64(0, true)
+ const found = !!data.getUint32(8)
+ this.#cpuBuffer.unmap()
+
+ if (found) {
+ const hex = nonce.toString(16).padStart(16, '0')
+ return hex
+ } else {
+ return await this.search(hash, threshold)
+ }
+ }
+}
+
+export default `
+ const NanoPowGpuComputeShader = \`${NanoPowGpuComputeShader}\`
+ const WorkerInterface = ${WorkerInterface}
+ const NanoPowGpu = ${NanoPowGpu}
+`
+++ /dev/null
-struct UBO {
- blockhash: array<vec4<u32>, 2>,
- random: u32,
- threshold: u32
-};
-@group(0) @binding(0) var<uniform> ubo: UBO;
-
-struct WORK {
- nonce: vec2<u32>,
- found: atomic<u32>
-};
-@group(0) @binding(1) var<storage, read_write> work: WORK;
-
-/**
-* Defined separately from uint v[32] below as the original value is required
-* to calculate the second uint32 of the digest for threshold comparison
-*/
-const BLAKE2B_IV32_1: u32 = 0x6A09E667u;
-
-/**
-* These are offsets into the input data buffer for each mixing step.
-* They are multiplied by 2 from the original SIGMA values in
-* the C reference implementation, which refered to uint64s.
-*/
-const SIGMA82: array<u32, 192> = array<u32, 192>(
- 0u,2u,4u,6u,8u,10u,12u,14u,16u,18u,20u,22u,24u,26u,28u,30u,
- 28u,20u,8u,16u,18u,30u,26u,12u,2u,24u,0u,4u,22u,14u,10u,6u,
- 22u,16u,24u,0u,10u,4u,30u,26u,20u,28u,6u,12u,14u,2u,18u,8u,
- 14u,18u,6u,2u,26u,24u,22u,28u,4u,12u,10u,20u,8u,0u,30u,16u,
- 18u,0u,10u,14u,4u,8u,20u,30u,28u,2u,22u,24u,12u,16u,6u,26u,
- 4u,24u,12u,20u,0u,22u,16u,6u,8u,26u,14u,10u,30u,28u,2u,18u,
- 24u,10u,2u,30u,28u,26u,8u,20u,0u,14u,12u,6u,18u,4u,16u,22u,
- 26u,22u,14u,28u,24u,2u,6u,18u,10u,0u,30u,8u,16u,12u,4u,20u,
- 12u,30u,28u,18u,22u,6u,0u,16u,24u,4u,26u,14u,2u,8u,20u,10u,
- 20u,4u,16u,8u,14u,12u,2u,10u,30u,22u,18u,28u,6u,24u,26u,0u,
- 0u,2u,4u,6u,8u,10u,12u,14u,16u,18u,20u,22u,24u,26u,28u,30u,
- 28u,20u,8u,16u,18u,30u,26u,12u,2u,24u,0u,4u,22u,14u,10u,6u
-);
-
-/**
-* 64-bit unsigned addition within the compression buffer
-* Sets v[i,i+1] += b
-* LSb is the Least-Significant (32) Bits of b
-* MSb is the Most-Significant (32) Bits of b
-* If LSb overflows, increment MSb operand
-*/
-fn add_uint64 (v: ptr<function, array<u32, 32>>, i: u32, LSb: u32, MSb: u32) {
- var o0: u32 = (*v)[i] + LSb;
- var o1: u32 = (*v)[i+1u] + MSb;
- if ((*v)[i] > 0xFFFFFFFFu - LSb) {
- o1 = o1 + 1u;
- }
- (*v)[i] = o0;
- (*v)[i+1u] = o1;
-}
-
-/**
-* G Mixing function
-*/
-fn G (v: ptr<function, array<u32, 32>>, m: ptr<function, array<u32, 16>>, a: u32, b: u32, c: u32, d: u32, ix: u32, iy: u32) {
- add_uint64(v, a, (*v)[b], (*v)[b+1u]);
- add_uint64(v, a, (*m)[ix], (*m)[ix+1u]);
-
- // v[d,d+1] = (v[d,d+1] xor v[a,a+1]) rotated to the right by 32 bits
- var xor0: u32 = (*v)[d] ^ (*v)[a];
- var xor1: u32 = (*v)[d+1u] ^ (*v)[a+1u];
- (*v)[d] = xor1;
- (*v)[d+1u] = xor0;
-
- add_uint64(v, c, (*v)[d], (*v)[d+1u]);
-
- // v[b,b+1] = (v[b,b+1] xor v[c,c+1]) rotated right by 24 bits
- xor0 = (*v)[b] ^ (*v)[c];
- xor1 = (*v)[b+1u] ^ (*v)[c+1u];
- (*v)[b] = (xor0 >> 24u) ^ (xor1 << 8u);
- (*v)[b+1u] = (xor1 >> 24u) ^ (xor0 << 8u);
-
- add_uint64(v, a, (*v)[b], (*v)[b+1u]);
- add_uint64(v, a, (*m)[iy], (*m)[iy+1u]);
-
- // v[d,d+1] = (v[d,d+1] xor v[a,a+1]) rotated right by 16 bits
- xor0 = (*v)[d] ^ (*v)[a];
- xor1 = (*v)[d+1u] ^ (*v)[a+1u];
- (*v)[d] = (xor0 >> 16u) ^ (xor1 << 16u);
- (*v)[d+1u] = (xor1 >> 16u) ^ (xor0 << 16u);
-
- add_uint64(v, c, (*v)[d], (*v)[d+1u]);
-
- // v[b,b+1] = (v[b,b+1] xor v[c,c+1]) rotated right by 63 bits
- xor0 = (*v)[b] ^ (*v)[c];
- xor1 = (*v)[b+1u] ^ (*v)[c+1u];
- (*v)[b] = (xor1 >> 31u) ^ (xor0 << 1u);
- (*v)[b+1u] = (xor0 >> 31u) ^ (xor1 << 1u);
-}
-
-/**
-* Main compute function
-* 8-byte work is split into two 4-byte u32. Low 4 bytes are random u32 from
-* UBO. High 4 bytes are the random value XOR'd with index of each thread.
-*/
-@compute @workgroup_size(${this.workload})
-fn main(
- @builtin(workgroup_id) workgroup_id: vec3<u32>,
- @builtin(local_invocation_id) local_id: vec3<u32>
-) {
- if (atomicLoad(&work.found) != 0u) { return; }
-
- let threshold: u32 = ubo.threshold;
-
- /**
- * Flatten 3D workgroup and local identifiers into u32 for each thread
- */
- var id: u32 = ((workgroup_id.x & 0xff) << 24) |
- ((workgroup_id.y & 0xff) << 16) |
- ((workgroup_id.z & 0xff) << 8) |
- (local_id.x & 0xff);
-
- /**
- * Initialize (nonce||blockhash) concatenation
- */
- var m: array<u32, 16>;
- m[0u] = ubo.random;
- m[1u] = ubo.random ^ id;
- m[2u] = ubo.blockhash[0u].x;
- m[3u] = ubo.blockhash[0u].y;
- m[4u] = ubo.blockhash[0u].z;
- m[5u] = ubo.blockhash[0u].w;
- m[6u] = ubo.blockhash[1u].x;
- m[7u] = ubo.blockhash[1u].y;
- m[8u] = ubo.blockhash[1u].z;
- m[9u] = ubo.blockhash[1u].w;
-
- /**
- * Compression buffer intialized to 2 instances of initialization vector
- * The following values have been modified from the BLAKE2B_IV:
- * OUTLEN is constant 8 bytes
- * v[0u] ^= 0x01010000u ^ uint(OUTLEN);
- * INLEN is constant 40 bytes: work value (8) + block hash (32)
- * v[24u] ^= uint(INLEN);
- * It is always the "last" compression at this INLEN
- * v[28u] = ~v[28u];
- * v[29u] = ~v[29u];
- */
- var v = array<u32, 32>(
- 0xF2BDC900u, 0x6A09E667u, 0x84CAA73Bu, 0xBB67AE85u,
- 0xFE94F82Bu, 0x3C6EF372u, 0x5F1D36F1u, 0xA54FF53Au,
- 0xADE682D1u, 0x510E527Fu, 0x2B3E6C1Fu, 0x9B05688Cu,
- 0xFB41BD6Bu, 0x1F83D9ABu, 0x137E2179u, 0x5BE0CD19u,
- 0xF3BCC908u, 0x6A09E667u, 0x84CAA73Bu, 0xBB67AE85u,
- 0xFE94F82Bu, 0x3C6EF372u, 0x5F1D36F1u, 0xA54FF53Au,
- 0xADE682F9u, 0x510E527Fu, 0x2B3E6C1Fu, 0x9B05688Cu,
- 0x04BE4294u, 0xE07C2654u, 0x137E2179u, 0x5BE0CD19u
- );
-
- /**
- * Twelve rounds of mixing as part of BLAKE2b compression step
- */
- for (var r: u32 = 0u; r < 12u; r = r + 1u) {
- G(&v, &m, 0u, 8u, 16u, 24u, SIGMA82[r * 16u + 0u], SIGMA82[r * 16u + 1u]);
- G(&v, &m, 2u, 10u, 18u, 26u, SIGMA82[r * 16u + 2u], SIGMA82[r * 16u + 3u]);
- G(&v, &m, 4u, 12u, 20u, 28u, SIGMA82[r * 16u + 4u], SIGMA82[r * 16u + 5u]);
- G(&v, &m, 6u, 14u, 22u, 30u, SIGMA82[r * 16u + 6u], SIGMA82[r * 16u + 7u]);
- G(&v, &m, 0u, 10u, 20u, 30u, SIGMA82[r * 16u + 8u], SIGMA82[r * 16u + 9u]);
- G(&v, &m, 2u, 12u, 22u, 24u, SIGMA82[r * 16u + 10u], SIGMA82[r * 16u + 11u]);
- G(&v, &m, 4u, 14u, 16u, 26u, SIGMA82[r * 16u + 12u], SIGMA82[r * 16u + 13u]);
- G(&v, &m, 6u, 8u, 18u, 28u, SIGMA82[r * 16u + 14u], SIGMA82[r * 16u + 15u]);
- }
-
- /**
- * Set nonce if it passes the threshold and no other thread has set it
- */
- if ((BLAKE2B_IV32_1 ^ v[1u] ^ v[17u]) > threshold && atomicLoad(&work.found) == 0u) {
- atomicStore(&work.found, 1u);
- work.nonce.x = m[0];
- work.nonce.y = m[1];
- }
- return;
-}
// Based on nano-webgl-pow by Ben Green (numtel) <ben@latenightsketches.com>
// https://github.com/numtel/nano-webgl-pow
import { WorkerInterface } from '../pool.js'
+import { NanoPowGlFragmentShader, NanoPowGlVertexShader } from './nano-pow-shaders.js'
export class PowGl extends WorkerInterface {
static {
})
}
- // Vertex Shader
- static #vsSource = `#version 300 es
-#pragma vscode_glsllint_stage: vert
-precision highp float;
-layout (location=0) in vec4 position;
-layout (location=1) in vec2 uv;
-
-out vec2 uv_pos;
-
-void main() {
- uv_pos = uv;
- gl_Position = position;
-}`
-
- // Fragment shader
- static #fsSource = `#version 300 es
-#pragma vscode_glsllint_stage: frag
-precision highp float;
-precision highp int;
-
-in vec2 uv_pos;
-out vec4 fragColor;
-
-// blockhash - array of precalculated block hash components
-// threshold - 0xfffffff8 for send/change blocks, 0xfffffe00 for all else
-// workload - Defines canvas size
-layout(std140) uniform UBO {
- uint blockhash[8];
- uint threshold;
- float workload;
-};
-
-// Random work values
-// First 2 bytes will be overwritten by texture pixel position
-// Second 2 bytes will be modified if the canvas size is greater than 256x256
-// Last 4 bytes remain as generated externally
-layout(std140) uniform WORK {
- uvec4 work[2];
-};
-
-// Defined separately from uint v[32] below as the original value is required
-// to calculate the second uint32 of the digest for threshold comparison
-const uint BLAKE2B_IV32_1 = 0x6A09E667u;
-
-// Both buffers represent 16 uint64s as 32 uint32s
-// because that's what GLSL offers, just like Javascript
-
-// Compression buffer, intialized to 2 instances of the initialization vector
-// The following values have been modified from the BLAKE2B_IV:
-// OUTLEN is constant 8 bytes
-// v[0] ^= 0x01010000u ^ uint(OUTLEN);
-// INLEN is constant 40 bytes: work value (8) + block hash (32)
-// v[24] ^= uint(INLEN);
-// It's always the "last" compression at this INLEN
-// v[28] = ~v[28];
-// v[29] = ~v[29];
-uint v[32] = uint[32](
- 0xF2BDC900u, 0x6A09E667u, 0x84CAA73Bu, 0xBB67AE85u,
- 0xFE94F82Bu, 0x3C6EF372u, 0x5F1D36F1u, 0xA54FF53Au,
- 0xADE682D1u, 0x510E527Fu, 0x2B3E6C1Fu, 0x9B05688Cu,
- 0xFB41BD6Bu, 0x1F83D9ABu, 0x137E2179u, 0x5BE0CD19u,
- 0xF3BCC908u, 0x6A09E667u, 0x84CAA73Bu, 0xBB67AE85u,
- 0xFE94F82Bu, 0x3C6EF372u, 0x5F1D36F1u, 0xA54FF53Au,
- 0xADE682F9u, 0x510E527Fu, 0x2B3E6C1Fu, 0x9B05688Cu,
- 0x04BE4294u, 0xE07C2654u, 0x137E2179u, 0x5BE0CD19u
-);
-// Input data buffer
-uint m[32];
-
-// These are offsets into the input data buffer for each mixing step.
-// They are multiplied by 2 from the original SIGMA values in
-// the C reference implementation, which refered to uint64s.
-const uint SIGMA82[192] = uint[192](
- 0u,2u,4u,6u,8u,10u,12u,14u,16u,18u,20u,22u,24u,26u,28u,30u,
- 28u,20u,8u,16u,18u,30u,26u,12u,2u,24u,0u,4u,22u,14u,10u,6u,
- 22u,16u,24u,0u,10u,4u,30u,26u,20u,28u,6u,12u,14u,2u,18u,8u,
- 14u,18u,6u,2u,26u,24u,22u,28u,4u,12u,10u,20u,8u,0u,30u,16u,
- 18u,0u,10u,14u,4u,8u,20u,30u,28u,2u,22u,24u,12u,16u,6u,26u,
- 4u,24u,12u,20u,0u,22u,16u,6u,8u,26u,14u,10u,30u,28u,2u,18u,
- 24u,10u,2u,30u,28u,26u,8u,20u,0u,14u,12u,6u,18u,4u,16u,22u,
- 26u,22u,14u,28u,24u,2u,6u,18u,10u,0u,30u,8u,16u,12u,4u,20u,
- 12u,30u,28u,18u,22u,6u,0u,16u,24u,4u,26u,14u,2u,8u,20u,10u,
- 20u,4u,16u,8u,14u,12u,2u,10u,30u,22u,18u,28u,6u,24u,26u,0u,
- 0u,2u,4u,6u,8u,10u,12u,14u,16u,18u,20u,22u,24u,26u,28u,30u,
- 28u,20u,8u,16u,18u,30u,26u,12u,2u,24u,0u,4u,22u,14u,10u,6u
-);
-
-// G mixing function
-void G (uint ix, uint iy, uint a, uint b, uint c, uint d) {
- uint o0;
- uint o1;
- uint xor0;
- uint xor1;
-
- // a = a + b;
- o0 = v[a] + v[b];
- o1 = v[a+1u] + v[b+1u];
- if (v[a] > 0xFFFFFFFFu - v[b]) {
- o1 = o1 + 1u;
- }
- v[a] = o0;
- v[a+1u] = o1;
-
- // a = a + m[sigma[r][2*i+0]];
- o0 = v[a] + m[ix];
- o1 = v[a+1u] + m[ix+1u];
- if (v[a] > 0xFFFFFFFFu - m[ix]) {
- o1 = o1 + 1u;
- }
- v[a] = o0;
- v[a+1u] = o1;
-
- // d = rotr64(d ^ a, 32);
- xor0 = v[d] ^ v[a];
- xor1 = v[d+1u] ^ v[a+1u];
- v[d] = xor1;
- v[d+1u] = xor0;
-
- // c = c + d;
- o0 = v[c] + v[d];
- o1 = v[c+1u] + v[d+1u];
- if (v[c] > 0xFFFFFFFFu - v[d]) {
- o1 = o1 + 1u;
- }
- v[c] = o0;
- v[c+1u] = o1;
-
- // b = rotr64(b ^ c, 24);
- xor0 = v[b] ^ v[c];
- xor1 = v[b+1u] ^ v[c+1u];
- v[b] = (xor0 >> 24u) ^ (xor1 << 8u);
- v[b+1u] = (xor1 >> 24u) ^ (xor0 << 8u);
-
- // a = a + b;
- o0 = v[a] + v[b];
- o1 = v[a+1u] + v[b+1u];
- if (v[a] > 0xFFFFFFFFu - v[b]) {
- o1 = o1 + 1u;
- }
- v[a] = o0;
- v[a+1u] = o1;
-
- // a = a + m[sigma[r][2*i+1]];
- o0 = v[a] + m[iy];
- o1 = v[a+1u] + m[iy+1u];
- if (v[a] > 0xFFFFFFFFu - m[iy]) {
- o1 = o1 + 1u;
- }
- v[a] = o0;
- v[a+1u] = o1;
-
- // d = rotr64(d ^ a, 16)
- xor0 = v[d] ^ v[a];
- xor1 = v[d+1u] ^ v[a+1u];
- v[d] = (xor0 >> 16u) ^ (xor1 << 16u);
- v[d+1u] = (xor1 >> 16u) ^ (xor0 << 16u);
-
- // c = c + d;
- o0 = v[c] + v[d];
- o1 = v[c+1u] + v[d+1u];
- if (v[c] > 0xFFFFFFFFu - v[d]) {
- o1 = o1 + 1u;
- }
- v[c] = o0;
- v[c+1u] = o1;
-
- // b = rotr64(b ^ c, 63)
- xor0 = v[b] ^ v[c];
- xor1 = v[b+1u] ^ v[c+1u];
- v[b] = (xor1 >> 31u) ^ (xor0 << 1u);
- v[b+1u] = (xor0 >> 31u) ^ (xor1 << 1u);
-}
-
-void main() {
- int i;
- uvec4 u_work0 = work[0u];
- uvec4 u_work1 = work[1u];
- uint uv_x = uint(uv_pos.x * workload);
- uint uv_y = uint(uv_pos.y * workload);
- uint x_pos = uv_x % 256u;
- uint y_pos = uv_y % 256u;
- uint x_index = (uv_x - x_pos) / 256u;
- uint y_index = (uv_y - y_pos) / 256u;
-
- // First 2 work bytes are the x,y pos within the 256x256 area, the next
- // two bytes are modified from the random generated value, XOR'd with
- // the x,y area index of where this pixel is located
- m[0u] = (x_pos ^ (y_pos << 8u) ^ ((u_work0.b ^ x_index) << 16u) ^ ((u_work0.a ^ y_index) << 24u));
-
- // Remaining bytes are un-modified from the random generated value
- m[1u] = (u_work1.r ^ (u_work1.g << 8u) ^ (u_work1.b << 16u) ^ (u_work1.a << 24u));
-
- // Block hash
- for (uint i = 0u; i < 8u; i = i + 1u) {
- m[i+2u] = blockhash[i];
- }
-
- // twelve rounds of mixing
- for(uint i = 0u; i < 12u; i = i + 1u) {
- G(SIGMA82[i * 16u + 0u], SIGMA82[i * 16u + 1u], 0u, 8u, 16u, 24u);
- G(SIGMA82[i * 16u + 2u], SIGMA82[i * 16u + 3u], 2u, 10u, 18u, 26u);
- G(SIGMA82[i * 16u + 4u], SIGMA82[i * 16u + 5u], 4u, 12u, 20u, 28u);
- G(SIGMA82[i * 16u + 6u], SIGMA82[i * 16u + 7u], 6u, 14u, 22u, 30u);
- G(SIGMA82[i * 16u + 8u], SIGMA82[i * 16u + 9u], 0u, 10u, 20u, 30u);
- G(SIGMA82[i * 16u + 10u], SIGMA82[i * 16u + 11u], 2u, 12u, 22u, 24u);
- G(SIGMA82[i * 16u + 12u], SIGMA82[i * 16u + 13u], 4u, 14u, 16u, 26u);
- G(SIGMA82[i * 16u + 14u], SIGMA82[i * 16u + 15u], 6u, 8u, 18u, 28u);
- }
-
- // Pixel data is multipled by threshold test result (0 or 1)
- // First 4 bytes insignificant, only calculate digest of second 4 bytes
- if ((BLAKE2B_IV32_1 ^ v[1u] ^ v[17u]) > threshold) {
- fragColor = vec4(
- float(x_index + 1u)/255.0, // +1 to distinguish from 0 (unsuccessful) pixels
- float(y_index + 1u)/255.0, // Same as previous
- float(x_pos)/255.0, // Return the 2 custom bytes used in work value
- float(y_pos)/255.0 // Second custom byte
- );
- } else {
- discard;
- }
-}`
-
/** Used to set canvas size. Must be a multiple of 256. */
static #WORKLOAD: number = 256 * Math.max(1, Math.floor(navigator.hardwareConcurrency))
this.#vertexShader = this.#gl.createShader(this.#gl.VERTEX_SHADER)
if (this.#vertexShader == null) throw new Error('Failed to create vertex shader')
- this.#gl.shaderSource(this.#vertexShader, this.#vsSource)
+ this.#gl.shaderSource(this.#vertexShader, NanoPowGlVertexShader)
this.#gl.compileShader(this.#vertexShader)
if (!this.#gl.getShaderParameter(this.#vertexShader, this.#gl.COMPILE_STATUS))
throw new Error(this.#gl.getShaderInfoLog(this.#vertexShader) ?? `Failed to compile vertex shader`)
this.#fragmentShader = this.#gl.createShader(this.#gl.FRAGMENT_SHADER)
if (this.#fragmentShader == null) throw new Error('Failed to create fragment shader')
- this.#gl.shaderSource(this.#fragmentShader, this.#fsSource)
+ this.#gl.shaderSource(this.#fragmentShader, NanoPowGlFragmentShader)
this.#gl.compileShader(this.#fragmentShader)
if (!this.#gl.getShaderParameter(this.#fragmentShader, this.#gl.COMPILE_STATUS))
throw new Error(this.#gl.getShaderInfoLog(this.#fragmentShader) ?? `Failed to compile fragment shader`)
}
export default `
+ const NanoPowGlFragmentShader = ${NanoPowGlFragmentShader}
+ const NanoPowGlVertexShader = ${NanoPowGlVertexShader}
const WorkerInterface = ${WorkerInterface}
const PowGl = ${PowGl}
`
+++ /dev/null
-// SPDX-FileCopyrightText: 2024 Chris Duncan <chris@zoso.dev>
-// SPDX-License-Identifier: GPL-3.0-or-later
-// BLAKE2b hashing implementation derived from nano-webgl-pow by Ben Green <ben@latenightsketches.com> (https://github.com/numtel/nano-webgl-pow)
-/// <reference types="@webgpu/types" />
-import { WorkerInterface } from '../pool.js'
-
-/**
-* Nano proof-of-work using WebGPU.
-*/
-export class PowGpu extends WorkerInterface {
- static {
- PowGpu.listen()
- }
-
- /**
- * Calculates proof-of-work as described by the Nano cryptocurrency protocol.
- *
- * @param {any[]} data - Array of hashes and minimum thresholds
- * @returns Promise for proof-of-work attached to original array objects
- */
- static async work (data: any[]): Promise<any[]> {
- return new Promise(async (resolve, reject): Promise<void> => {
- for (const d of data) {
- try {
- d.work = await this.search(d.hash, d.threshold)
- } catch (err) {
- reject(err)
- }
- }
- resolve(data)
- })
- }
-
- // WebGPU Compute Shader
- static shader = `
- struct UBO {
- blockhash: array<vec4<u32>, 2>,
- random: u32,
- threshold: u32
- };
- @group(0) @binding(0) var<uniform> ubo: UBO;
-
- struct WORK {
- nonce: vec2<u32>,
- found: atomic<u32>
- };
- @group(0) @binding(1) var<storage, read_write> work: WORK;
-
- /**
- * Defined separately from uint v[32] below as the original value is required
- * to calculate the second uint32 of the digest for threshold comparison
- */
- const BLAKE2B_IV32_1: u32 = 0x6A09E667u;
-
- /**
- * These are offsets into the input data buffer for each mixing step.
- * They are multiplied by 2 from the original SIGMA values in
- * the C reference implementation, which refered to uint64s.
- *
- * const SIGMA82: array<u32, 192> = array<u32, 192>(
- * 0u,2u,4u,6u,8u,10u,12u,14u,16u,18u,20u,22u,24u,26u,28u,30u,
- * 28u,20u,8u,16u,18u,30u,26u,12u,2u,24u,0u,4u,22u,14u,10u,6u,
- * 22u,16u,24u,0u,10u,4u,30u,26u,20u,28u,6u,12u,14u,2u,18u,8u,
- * 14u,18u,6u,2u,26u,24u,22u,28u,4u,12u,10u,20u,8u,0u,30u,16u,
- * 18u,0u,10u,14u,4u,8u,20u,30u,28u,2u,22u,24u,12u,16u,6u,26u,
- * 4u,24u,12u,20u,0u,22u,16u,6u,8u,26u,14u,10u,30u,28u,2u,18u,
- * 24u,10u,2u,30u,28u,26u,8u,20u,0u,14u,12u,6u,18u,4u,16u,22u,
- * 26u,22u,14u,28u,24u,2u,6u,18u,10u,0u,30u,8u,16u,12u,4u,20u,
- * 12u,30u,28u,18u,22u,6u,0u,16u,24u,4u,26u,14u,2u,8u,20u,10u,
- * 20u,4u,16u,8u,14u,12u,2u,10u,30u,22u,18u,28u,6u,24u,26u,0u,
- * 0u,2u,4u,6u,8u,10u,12u,14u,16u,18u,20u,22u,24u,26u,28u,30u,
- * 28u,20u,8u,16u,18u,30u,26u,12u,2u,24u,0u,4u,22u,14u,10u,6u
- * );
- */
-
- /**
- * G Mixing function
- */
- fn G (
- va0: ptr<function, u32>, va1: ptr<function, u32>,
- vb0: ptr<function, u32>, vb1: ptr<function, u32>,
- vc0: ptr<function, u32>, vc1: ptr<function, u32>,
- vd0: ptr<function, u32>, vd1: ptr<function, u32>,
- mx0: u32, mx1: u32, my0: u32, my1: u32
- ) {
- var o0: u32;
- var o1: u32;
- var xor0: u32;
- var xor1: u32;
-
- // a = a + b;
- o0 = *va0 + *vb0;
- o1 = *va1 + *vb1;
- if (*va0 > 0xFFFFFFFFu - *vb0) {
- o1 = o1 + 1u;
- }
- *va0 = o0;
- *va1 = o1;
-
- // a = a + m[sigma[r][2*i+0]];
- o0 = *va0 + mx0;
- o1 = *va1 + mx1;
- if (*va0 > 0xFFFFFFFFu - mx0) {
- o1 = o1 + 1u;
- }
- *va0 = o0;
- *va1 = o1;
-
- // d = rotr64(d ^ a, 32);
- xor0 = *vd0 ^ *va0;
- xor1 = *vd1 ^ *va1;
- *vd0 = xor1;
- *vd1 = xor0;
-
- // c = c + d;
- o0 = *vc0 + *vd0;
- o1 = *vc1 + *vd1;
- if (*vc0 > 0xFFFFFFFFu - *vd0) {
- o1 = o1 + 1u;
- }
- *vc0 = o0;
- *vc1 = o1;
-
- // b = rotr64(b ^ c, 24);
- xor0 = *vb0 ^ *vc0;
- xor1 = *vb1 ^ *vc1;
- *vb0 = (xor0 >> 24u) ^ (xor1 << 8u);
- *vb1 = (xor1 >> 24u) ^ (xor0 << 8u);
-
- // a = a + b;
- o0 = *va0 + *vb0;
- o1 = *va1 + *vb1;
- if (*va0 > 0xFFFFFFFFu - *vb0) {
- o1 = o1 + 1u;
- }
- *va0 = o0;
- *va1 = o1;
-
- // a = a + m[sigma[r][2*i+1]];
- o0 = *va0 + my0;
- o1 = *va1 + my1;
- if (*va0 > 0xFFFFFFFFu - my0) {
- o1 = o1 + 1u;
- }
- *va0 = o0;
- *va1 = o1;
-
- // d = rotr64(d ^ a, 16)
- xor0 = *vd0 ^ *va0;
- xor1 = *vd1 ^ *va1;
- *vd0 = (xor0 >> 16u) ^ (xor1 << 16u);
- *vd1 = (xor1 >> 16u) ^ (xor0 << 16u);
-
- // c = c + d;
- o0 = *vc0 + *vd0;
- o1 = *vc1 + *vd1;
- if (*vc0 > 0xFFFFFFFFu - *vd0) {
- o1 = o1 + 1u;
- }
- *vc0 = o0;
- *vc1 = o1;
-
- // b = rotr64(b ^ c, 63)
- xor0 = *vb0 ^ *vc0;
- xor1 = *vb1 ^ *vc1;
- *vb0 = (xor1 >> 31u) ^ (xor0 << 1u);
- *vb1 = (xor0 >> 31u) ^ (xor1 << 1u);
- }
-
- /**
- * Main compute function
- * 8-byte work is split into two 4-byte u32. Low 4 bytes are random u32 from
- * UBO. High 4 bytes are the random value XOR'd with index of each thread.
- */
- @compute @workgroup_size(64)
- fn main(
- @builtin(workgroup_id) workgroup_id: vec3<u32>,
- @builtin(local_invocation_id) local_id: vec3<u32>
- ) {
- if (atomicLoad(&work.found) != 0u) { return; }
-
- let threshold: u32 = ubo.threshold;
-
- /**
- * Flatten 3D workgroup and local identifiers into u32 for each thread
- */
- var id: u32 = ((workgroup_id.x & 0xFFu) << 24u) |
- ((workgroup_id.y & 0xFFu) << 16u) |
- ((workgroup_id.z & 0xFFu) << 8u) |
- (local_id.x & 0xFFu);
-
- /**
- * Initialize (nonce||blockhash) concatenation
- */
- var m0: u32 = ubo.random;
- var m1: u32 = ubo.random ^ id;
- var m2: u32 = ubo.blockhash[0u].x;
- var m3: u32 = ubo.blockhash[0u].y;
- var m4: u32 = ubo.blockhash[0u].z;
- var m5: u32 = ubo.blockhash[0u].w;
- var m6: u32 = ubo.blockhash[1u].x;
- var m7: u32 = ubo.blockhash[1u].y;
- var m8: u32 = ubo.blockhash[1u].z;
- var m9: u32 = ubo.blockhash[1u].w;
-
- /**
- * Compression buffer intialized to 2 instances of initialization vector
- * The following values have been modified from the BLAKE2B_IV:
- * OUTLEN is constant 8 bytes
- * v[0u] ^= 0x01010000u ^ uint(OUTLEN);
- * INLEN is constant 40 bytes: work value (8) + block hash (32)
- * v[24u] ^= uint(INLEN);
- * It is always the "last" compression at this INLEN
- * v[28u] = ~v[28u];
- * v[29u] = ~v[29u];
- */
- var v0: u32 = 0xF2BDC900u;
- var v1: u32 = 0x6A09E667u;
- var v2: u32 = 0x84CAA73Bu;
- var v3: u32 = 0xBB67AE85u;
- var v4: u32 = 0xFE94F82Bu;
- var v5: u32 = 0x3C6EF372u;
- var v6: u32 = 0x5F1D36F1u;
- var v7: u32 = 0xA54FF53Au;
- var v8: u32 = 0xADE682D1u;
- var v9: u32 = 0x510E527Fu;
- var v10: u32 = 0x2B3E6C1Fu;
- var v11: u32 = 0x9B05688Cu;
- var v12: u32 = 0xFB41BD6Bu;
- var v13: u32 = 0x1F83D9ABu;
- var v14: u32 = 0x137E2179u;
- var v15: u32 = 0x5BE0CD19u;
- var v16: u32 = 0xF3BCC908u;
- var v17: u32 = 0x6A09E667u;
- var v18: u32 = 0x84CAA73Bu;
- var v19: u32 = 0xBB67AE85u;
- var v20: u32 = 0xFE94F82Bu;
- var v21: u32 = 0x3C6EF372u;
- var v22: u32 = 0x5F1D36F1u;
- var v23: u32 = 0xA54FF53Au;
- var v24: u32 = 0xADE682F9u;
- var v25: u32 = 0x510E527Fu;
- var v26: u32 = 0x2B3E6C1Fu;
- var v27: u32 = 0x9B05688Cu;
- var v28: u32 = 0x04BE4294u;
- var v29: u32 = 0xE07C2654u;
- var v30: u32 = 0x137E2179u;
- var v31: u32 = 0x5BE0CD19u;
-
- /**
- * Twelve rounds of mixing as part of BLAKE2b compression step
- */
- // ROUND(0)
- G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, m0, m1, m2, m3);
- G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, m4, m5, m6, m7);
- G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, m8, m9, 0u, 0u);
- G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, 0u, 0u, 0u, 0u);
- G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, 0u, 0u, 0u, 0u);
- G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, 0u, 0u, 0u, 0u);
- G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, 0u, 0u, 0u, 0u);
- G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, 0u, 0u, 0u, 0u);
-
- // ROUND(1)
- G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, 0u, 0u, 0u, 0u);
- G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, m8, m9, 0u, 0u);
- G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, 0u, 0u, 0u, 0u);
- G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, 0u, 0u, 0u, 0u);
- G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, m2, m3, 0u, 0u);
- G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, m0, m1, m4, m5);
- G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, 0u, 0u, 0u, 0u);
- G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, 0u, 0u, m6, m7);
-
- // ROUND(2)
- G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, 0u, 0u, 0u, 0u);
- G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, 0u, 0u, m0, m1);
- G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, 0u, 0u, m4, m5);
- G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, 0u, 0u, 0u, 0u);
- G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, 0u, 0u, 0u, 0u);
- G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, m6, m7, 0u, 0u);
- G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, 0u, 0u, m2, m3);
- G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, 0u, 0u, m8, m9);
-
- // ROUND(3)
- G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, 0u, 0u, 0u, 0u);
- G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, m6, m7, m2, m3);
- G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, 0u, 0u, 0u, 0u);
- G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, 0u, 0u, 0u, 0u);
- G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, m4, m5, 0u, 0u);
- G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, 0u, 0u, 0u, 0u);
- G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, m8, m9, m0, m1);
- G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, 0u, 0u, 0u, 0u);
-
- // ROUND(4)
- G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, 0u, 0u, m0, m1);
- G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, 0u, 0u, 0u, 0u);
- G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, m4, m5, m8, m9);
- G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, 0u, 0u, 0u, 0u);
- G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, 0u, 0u, m2, m3);
- G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, 0u, 0u, 0u, 0u);
- G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, 0u, 0u, 0u, 0u);
- G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, m6, m7, 0u, 0u);
-
- // ROUND(5)
- G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, m4, m5, 0u, 0u);
- G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, 0u, 0u, 0u, 0u);
- G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, m0, m1, 0u, 0u);
- G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, 0u, 0u, m6, m7);
- G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, m8, m9, 0u, 0u);
- G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, 0u, 0u, 0u, 0u);
- G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, 0u, 0u, 0u, 0u);
- G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, m2, m3, 0u, 0u);
-
- // ROUND(6)
- G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, 0u, 0u, 0u, 0u);
- G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, m2, m3, 0u, 0u);
- G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, 0u, 0u, 0u, 0u);
- G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, m8, m9, 0u, 0u);
- G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, m0, m1, 0u, 0u);
- G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, 0u, 0u, m6, m7);
- G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, 0u, 0u, m4, m5);
- G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, 0u, 0u, 0u, 0u);
-
- // ROUND(7)
- G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, 0u, 0u, 0u, 0u);
- G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, 0u, 0u, 0u, 0u);
- G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, 0u, 0u, m2, m3);
- G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, m6, m7, 0u, 0u);
- G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, 0u, 0u, m0, m1);
- G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, 0u, 0u, m8, m9);
- G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, 0u, 0u, 0u, 0u);
- G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, m4, m5, 0u, 0u);
-
- // ROUND(8)
- G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, 0u, 0u, 0u, 0u);
- G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, 0u, 0u, 0u, 0u);
- G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, 0u, 0u, m6, m7);
- G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, m0, m1, 0u, 0u);
- G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, 0u, 0u, m4, m5);
- G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, 0u, 0u, 0u, 0u);
- G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, m2, m3, m8, m9);
- G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, 0u, 0u, 0u, 0u);
-
- // ROUND(9)
- G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, 0u, 0u, m4, m5);
- G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, 0u, 0u, m8, m9);
- G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, 0u, 0u, 0u, 0u);
- G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, m2, m3, 0u, 0u);
- G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, 0u, 0u, 0u, 0u);
- G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, 0u, 0u, 0u, 0u);
- G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, m6, m7, 0u, 0u);
- G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, 0u, 0u, m0, m1);
-
- // ROUND(10)
- G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, m0, m1, m2, m3);
- G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, m4, m5, m6, m7);
- G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, m8, m9, 0u, 0u);
- G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, 0u, 0u, 0u, 0u);
- G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, 0u, 0u, 0u, 0u);
- G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, 0u, 0u, 0u, 0u);
- G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, 0u, 0u, 0u, 0u);
- G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, 0u, 0u, 0u, 0u);
-
- // ROUND(11)
- G(&v0, &v1, &v8, &v9, &v16, &v17, &v24, &v25, 0u, 0u, 0u, 0u);
- G(&v2, &v3, &v10, &v11, &v18, &v19, &v26, &v27, m8, m9, 0u, 0u);
- G(&v4, &v5, &v12, &v13, &v20, &v21, &v28, &v29, 0u, 0u, 0u, 0u);
- G(&v6, &v7, &v14, &v15, &v22, &v23, &v30, &v31, 0u, 0u, 0u, 0u);
- G(&v0, &v1, &v10, &v11, &v20, &v21, &v30, &v31, m2, m3, 0u, 0u);
- G(&v2, &v3, &v12, &v13, &v22, &v23, &v24, &v25, m0, m1, m4, m5);
- G(&v4, &v5, &v14, &v15, &v16, &v17, &v26, &v27, 0u, 0u, 0u, 0u);
- G(&v6, &v7, &v8, &v9, &v18, &v19, &v28, &v29, 0u, 0u, m6, m7);
-
- /**
- * Set nonce if it passes the threshold and no other thread has set it
- */
- if ((BLAKE2B_IV32_1 ^ v1 ^ v17) > threshold && atomicLoad(&work.found) == 0u) {
- atomicStore(&work.found, 1u);
- work.nonce.x = m0;
- work.nonce.y = m1;
- }
- return;
- }
- `;
-
- // Initialize WebGPU
- static #device: GPUDevice | null = null
- static #uboBuffer: GPUBuffer
- static #gpuBuffer: GPUBuffer
- static #cpuBuffer: GPUBuffer
- static #bindGroupLayout: GPUBindGroupLayout
- static #pipeline: GPUComputePipeline
-
- static {
- this.init()
- }
-
- // Initialize WebGPU
- static init () {
- // Request device and adapter
- if (navigator.gpu == null) {
- throw new Error('WebGPU is not supported in this browser.')
- }
- navigator.gpu.requestAdapter()
- .then(adapter => {
- if (adapter == null) {
- throw new Error('WebGPU adapter refused by browser.')
- }
- adapter.requestDevice()
- .then(device => {
- this.#device = device
- this.#device.lost.then(loss => {
- console.warn(`(${loss.reason}) ${loss.message}. Reinitializing device.`)
- this.init()
- })
-
- // Create buffers for writing GPU calculations and reading from Javascript
- this.#uboBuffer = this.#device.createBuffer({
- size: 48,
- usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST
- })
- this.#gpuBuffer = this.#device.createBuffer({
- size: 16,
- usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC
- })
- this.#cpuBuffer = this.#device.createBuffer({
- size: 16,
- usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ
- })
-
- // Create binding group data structure and use it later once UBO is known
- this.#bindGroupLayout = this.#device.createBindGroupLayout({
- entries: [
- {
- binding: 0,
- visibility: GPUShaderStage.COMPUTE,
- buffer: {
- type: 'uniform'
- },
- },
- {
- binding: 1,
- visibility: GPUShaderStage.COMPUTE,
- buffer: {
- type: 'storage'
- },
- }
- ]
- })
-
- // Create pipeline to connect compute shader to binding layout
- this.#pipeline = this.#device.createComputePipeline({
- layout: this.#device.createPipelineLayout({
- bindGroupLayouts: [this.#bindGroupLayout]
- }),
- compute: {
- entryPoint: 'main',
- module: this.#device.createShaderModule({
- code: this.shader
- })
- }
- })
- })
- })
- .catch(err => { throw new Error(err.message) })
- }
-
- /**
- * Finds a nonce that satisfies the Nano proof-of-work requirements.
- *
- * @param {string} hash - Hexadecimal hash of previous block, or public key for new accounts
- * @param {number} [threshold=0xfffffff8] - Difficulty of proof-of-work calculation
- */
- static async search (hash: string, threshold: number = 0xfffffff8): Promise<string> {
- if (!/^[A-Fa-f0-9]{64}$/.test(hash)) throw new TypeError(`Invalid hash ${hash}`)
- if (typeof threshold !== 'number') throw new TypeError(`Invalid threshold ${threshold}`)
-
- // Ensure WebGPU is initialized before calculating, up to a max time frame
- while (this.#device == null && performance.now() < 8000) {
- await new Promise(resolve => {
- setTimeout(resolve, 500)
- })
- }
- if (this.#device == null) throw new Error(`WebGPU device failed to load.`)
-
- // Set up uniform buffer object
- // Note: u32 size is 4, but total alignment must be multiple of 16
- const uboView = new DataView(new ArrayBuffer(48))
- for (let i = 0; i < 64; i += 8) {
- const uint32 = hash.slice(i, i + 8)
- uboView.setUint32(i / 2, parseInt(uint32, 16))
- }
- const random = Math.floor((Math.random() * 0xffffffff))
- uboView.setUint32(32, random, true)
- uboView.setUint32(36, threshold, true)
- this.#device.queue.writeBuffer(this.#uboBuffer, 0, uboView)
-
- // Reset `found` flag to 0u in WORK before each calculation
- this.#device.queue.writeBuffer(this.#gpuBuffer, 8, new Uint32Array([0]))
-
- // Bind UBO read and GPU write buffers
- const bindGroup = this.#device.createBindGroup({
- layout: this.#bindGroupLayout,
- entries: [
- {
- binding: 0,
- resource: {
- buffer: this.#uboBuffer
- },
- },
- {
- binding: 1,
- resource: {
- buffer: this.#gpuBuffer
- },
- },
- ],
- })
-
- // Create command encoder to issue commands to GPU and initiate computation
- const commandEncoder = this.#device.createCommandEncoder()
- const passEncoder = commandEncoder.beginComputePass()
-
- // Issue commands and end compute pass structure
- passEncoder.setPipeline(this.#pipeline)
- passEncoder.setBindGroup(0, bindGroup)
- passEncoder.dispatchWorkgroups(256, 256, 256)
- passEncoder.end()
-
- // Copy 8-byte nonce and 4-byte found flag from GPU to CPU for reading
- commandEncoder.copyBufferToBuffer(
- this.#gpuBuffer,
- 0,
- this.#cpuBuffer,
- 0,
- 12
- )
-
- // End computation by passing array of command buffers to command queue for execution
- this.#device.queue.submit([commandEncoder.finish()])
-
- // Read results back to Javascript and then unmap buffer after reading
- try {
- await this.#cpuBuffer.mapAsync(GPUMapMode.READ)
- await this.#device.queue.onSubmittedWorkDone()
- } catch (err) {
- console.warn(`Error getting data from GPU, retrying. ${err}`)
- return await this.search(hash, threshold)
- }
- const data = new DataView(this.#cpuBuffer.getMappedRange())
- const nonce = data.getBigUint64(0, true)
- const found = !!data.getUint32(8)
- this.#cpuBuffer.unmap()
-
- if (found) {
- const hex = nonce.toString(16).padStart(16, '0')
- return hex
- } else {
- return await this.search(hash, threshold)
- }
- }
-}
-
-export default `
- const WorkerInterface = ${WorkerInterface}
- const PowGpu = ${PowGpu}
-`
import { Blake2b } from './lib/blake2b.js'
import { SendBlock, ReceiveBlock, ChangeBlock } from './lib/block.js'
import { PowGl } from './lib/workers/powgl.js'
-import { PowGpu } from './lib/workers/powgpu.js'
+import { NanoPowGpu } from './lib/workers/nano-pow.js'
import { Rpc } from './lib/rpc.js'
import { Rolodex } from './lib/rolodex.js'
import { Safe } from './lib/safe.js'
import { Tools } from './lib/tools.js'
import { Bip44Wallet, Blake2bWallet, LedgerWallet } from './lib/wallet.js'
-export { Account, Blake2b, SendBlock, ReceiveBlock, ChangeBlock, PowGl, PowGpu, Rpc, Rolodex, Safe, Tools, Bip44Wallet, Blake2bWallet, LedgerWallet }
+export { Account, Blake2b, SendBlock, ReceiveBlock, ChangeBlock, PowGl, NanoPowGpu as PowGpu, Rpc, Rolodex, Safe, Tools, Bip44Wallet, Blake2bWallet, LedgerWallet }