From 3eb8fb49a58d0e31c3a95273c80bb425b7a3ceec Mon Sep 17 00:00:00 2001 From: Chris Duncan Date: Mon, 30 Dec 2024 17:53:55 -0800 Subject: [PATCH] Initial WebGPU conversion. --- perf/block.perf.js | 2 +- src/lib/block.ts | 2 +- src/lib/workers.ts | 3 +- src/lib/workers/powgpu.ts | 643 +++++++++++++++++++------------------- 4 files changed, 319 insertions(+), 331 deletions(-) diff --git a/perf/block.perf.js b/perf/block.perf.js index 0fa1337..79658c5 100644 --- a/perf/block.perf.js +++ b/perf/block.perf.js @@ -9,7 +9,7 @@ import { SendBlock } from '#dist/main.js' import 'nano-webgl-pow' await suite('Block performance', async () => { - const COUNT = 0x10 + const COUNT = 0x1 await test(`Customized PoW: Time to calculate proof-of-work for a send block ${COUNT} times`, async () => { const times = [] diff --git a/src/lib/block.ts b/src/lib/block.ts index da23d56..83b4cdd 100644 --- a/src/lib/block.ts +++ b/src/lib/block.ts @@ -8,7 +8,7 @@ import { dec, hex } from './convert.js' import { NanoNaCl } from './workers/nano-nacl.js' import { Pool } from './pool.js' import { Rpc } from './rpc.js' -import { Pow } from './workers.js' +import { Pow, PowGpu } from './workers.js' /** * Represents a block as defined by the Nano cryptocurrency protocol. The Block diff --git a/src/lib/workers.ts b/src/lib/workers.ts index 017025f..b67290d 100644 --- a/src/lib/workers.ts +++ b/src/lib/workers.ts @@ -3,5 +3,6 @@ import { default as Bip44Ckd } from './workers/bip44-ckd.js' import { default as NanoNaCl } from './workers/nano-nacl.js' import { default as Pow } from './workers/powgl.js' +import { default as PowGpu } from './workers/powgpu.js' -export { Bip44Ckd, NanoNaCl, Pow } +export { Bip44Ckd, NanoNaCl, Pow, PowGpu } diff --git a/src/lib/workers/powgpu.ts b/src/lib/workers/powgpu.ts index 5f7733a..a52cd59 100644 --- a/src/lib/workers/powgpu.ts +++ b/src/lib/workers/powgpu.ts @@ -2,12 +2,14 @@ // SPDX-License-Identifier: GPL-3.0-or-later // Based on nano-webgl-pow by Ben Green (numtel) // https://github.com/numtel/nano-webgl-pow +/// import { WorkerInterface } from '../pool.js' export class Pow extends WorkerInterface { static { Pow.listen() } + /** * Calculates proof-of-work as described by the Nano cryptocurrency protocol. * @@ -34,197 +36,11 @@ export class Pow extends WorkerInterface { * @param {number} [threshold=0xfffffff8] - Difficulty of proof-of-work calculation */ static async find (hash: string, threshold: number = 0xfffffff8): Promise { - return new Promise(resolve => { - this.#calculate(hash, resolve, threshold) + return new Promise(async (resolve) => { + await this.#calculate(hash, resolve, threshold) }) } - // Vertex Shader - static #vsSource = `#version 300 es -#pragma vscode_glsllint_stage: vert -precision highp float; -layout (location=0) in vec4 position; -layout (location=1) in vec2 uv; - -out vec2 uv_pos; - -void main() { - uv_pos = uv; - gl_Position = position; -}` - - // Fragment shader - static #fsSource = `#version 300 es -#pragma vscode_glsllint_stage: frag -precision highp float; -precision highp int; - -in vec2 uv_pos; -out vec4 fragColor; - -// blockhash - array of precalculated block hash components -// threshold - 0xfffffff8 for send/change blocks, 0xfffffe00 for all else -// workload - Defines canvas size -layout(std140) uniform UBO { - uint blockhash[8]; - uint threshold; - float workload; -}; - -// Random work values -// First 2 bytes will be overwritten by texture pixel position -// Second 2 bytes will be modified if the canvas size is greater than 256x256 -// Last 4 bytes remain as generated externally -layout(std140) uniform WORK { - uvec4 work[2]; -}; - -// Defined separately from uint v[32] below as the original value is required -// to calculate the second uint32 of the digest for threshold comparison -#define BLAKE2B_IV32_1 0x6A09E667u - -// Both buffers represent 16 uint64s as 32 uint32s -// because that's what GLSL offers, just like Javascript - -// Compression buffer, intialized to 2 instances of the initialization vector -// The following values have been modified from the BLAKE2B_IV: -// OUTLEN is constant 8 bytes -// v[0] ^= 0x01010000u ^ uint(OUTLEN); -// INLEN is constant 40 bytes: work value (8) + block hash (32) -// v[24] ^= uint(INLEN); -// It's always the "last" compression at this INLEN -// v[28] = ~v[28]; -// v[29] = ~v[29]; -uint v[32] = uint[32]( - 0xF2BDC900u, 0x6A09E667u, 0x84CAA73Bu, 0xBB67AE85u, - 0xFE94F82Bu, 0x3C6EF372u, 0x5F1D36F1u, 0xA54FF53Au, - 0xADE682D1u, 0x510E527Fu, 0x2B3E6C1Fu, 0x9B05688Cu, - 0xFB41BD6Bu, 0x1F83D9ABu, 0x137E2179u, 0x5BE0CD19u, - 0xF3BCC908u, 0x6A09E667u, 0x84CAA73Bu, 0xBB67AE85u, - 0xFE94F82Bu, 0x3C6EF372u, 0x5F1D36F1u, 0xA54FF53Au, - 0xADE682F9u, 0x510E527Fu, 0x2B3E6C1Fu, 0x9B05688Cu, - 0x04BE4294u, 0xE07C2654u, 0x137E2179u, 0x5BE0CD19u -); -// Input data buffer -uint m[32]; - -// These are offsets into the input data buffer for each mixing step. -// They are multiplied by 2 from the original SIGMA values in -// the C reference implementation, which refered to uint64s. -const int SIGMA82[192] = int[192]( - 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30, - 28,20,8,16,18,30,26,12,2,24,0,4,22,14,10,6, - 22,16,24,0,10,4,30,26,20,28,6,12,14,2,18,8, - 14,18,6,2,26,24,22,28,4,12,10,20,8,0,30,16, - 18,0,10,14,4,8,20,30,28,2,22,24,12,16,6,26, - 4,24,12,20,0,22,16,6,8,26,14,10,30,28,2,18, - 24,10,2,30,28,26,8,20,0,14,12,6,18,4,16,22, - 26,22,14,28,24,2,6,18,10,0,30,8,16,12,4,20, - 12,30,28,18,22,6,0,16,24,4,26,14,2,8,20,10, - 20,4,16,8,14,12,2,10,30,22,18,28,6,24,26,0, - 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30, - 28,20,8,16,18,30,26,12,2,24,0,4,22,14,10,6 -); - -// 64-bit unsigned addition within the compression buffer -// Sets v[a,a+1] += b -// b0 is the low 32 bits of b, b1 represents the high 32 bits -void add_uint64 (int a, uint b0, uint b1) { - uint o0 = v[a] + b0; - uint o1 = v[a + 1] + b1; - if (v[a] > 0xFFFFFFFFu - b0) { // did low 32 bits overflow? - o1++; - } - v[a] = o0; - v[a + 1] = o1; -} - -// G Mixing function -void B2B_G (int a, int b, int c, int d, int ix, int iy) { - add_uint64(a, v[b], v[b+1]); - add_uint64(a, m[ix], m[ix + 1]); - - // v[d,d+1] = (v[d,d+1] xor v[a,a+1]) rotated to the right by 32 bits - uint xor0 = v[d] ^ v[a]; - uint xor1 = v[d + 1] ^ v[a + 1]; - v[d] = xor1; - v[d + 1] = xor0; - - add_uint64(c, v[d], v[d+1]); - - // v[b,b+1] = (v[b,b+1] xor v[c,c+1]) rotated right by 24 bits - xor0 = v[b] ^ v[c]; - xor1 = v[b + 1] ^ v[c + 1]; - v[b] = (xor0 >> 24) ^ (xor1 << 8); - v[b + 1] = (xor1 >> 24) ^ (xor0 << 8); - - add_uint64(a, v[b], v[b+1]); - add_uint64(a, m[iy], m[iy + 1]); - - // v[d,d+1] = (v[d,d+1] xor v[a,a+1]) rotated right by 16 bits - xor0 = v[d] ^ v[a]; - xor1 = v[d + 1] ^ v[a + 1]; - v[d] = (xor0 >> 16) ^ (xor1 << 16); - v[d + 1] = (xor1 >> 16) ^ (xor0 << 16); - - add_uint64(c, v[d], v[d+1]); - - // v[b,b+1] = (v[b,b+1] xor v[c,c+1]) rotated right by 63 bits - xor0 = v[b] ^ v[c]; - xor1 = v[b + 1] ^ v[c + 1]; - v[b] = (xor1 >> 31) ^ (xor0 << 1); - v[b + 1] = (xor0 >> 31) ^ (xor1 << 1); -} - -void main() { - int i; - uvec4 u_work0 = work[0]; - uvec4 u_work1 = work[1]; - uint uv_x = uint(uv_pos.x * workload); - uint uv_y = uint(uv_pos.y * workload); - uint x_pos = uv_x % 256u; - uint y_pos = uv_y % 256u; - uint x_index = (uv_x - x_pos) / 256u; - uint y_index = (uv_y - y_pos) / 256u; - - // First 2 work bytes are the x,y pos within the 256x256 area, the next - // two bytes are modified from the random generated value, XOR'd with - // the x,y area index of where this pixel is located - m[0] = (x_pos ^ (y_pos << 8) ^ ((u_work0.b ^ x_index) << 16) ^ ((u_work0.a ^ y_index) << 24)); - - // Remaining bytes are un-modified from the random generated value - m[1] = (u_work1.r ^ (u_work1.g << 8) ^ (u_work1.b << 16) ^ (u_work1.a << 24)); - - // Block hash - for (i=0;i<8;i++) { - m[i+2] = blockhash[i]; - } - - // twelve rounds of mixing - for(i=0;i<12;i++) { - B2B_G(0, 8, 16, 24, SIGMA82[i * 16 + 0], SIGMA82[i * 16 + 1]); - B2B_G(2, 10, 18, 26, SIGMA82[i * 16 + 2], SIGMA82[i * 16 + 3]); - B2B_G(4, 12, 20, 28, SIGMA82[i * 16 + 4], SIGMA82[i * 16 + 5]); - B2B_G(6, 14, 22, 30, SIGMA82[i * 16 + 6], SIGMA82[i * 16 + 7]); - B2B_G(0, 10, 20, 30, SIGMA82[i * 16 + 8], SIGMA82[i * 16 + 9]); - B2B_G(2, 12, 22, 24, SIGMA82[i * 16 + 10], SIGMA82[i * 16 + 11]); - B2B_G(4, 14, 16, 26, SIGMA82[i * 16 + 12], SIGMA82[i * 16 + 13]); - B2B_G(6, 8, 18, 28, SIGMA82[i * 16 + 14], SIGMA82[i * 16 + 15]); - } - - // Pixel data is multipled by threshold test result (0 or 1) - // First 4 bytes insignificant, only calculate digest of second 4 bytes - if ((BLAKE2B_IV32_1 ^ v[1] ^ v[17]) > threshold) { - fragColor = vec4( - float(x_index + 1u)/255., // +1 to distinguish from 0 (unsuccessful) pixels - float(y_index + 1u)/255., // Same as previous - float(x_pos)/255., // Return the 2 custom bytes used in work value - float(y_pos)/255. // Second custom byte - ); - } else { - discard; - } -}` /** Used to set canvas size. Must be a multiple of 256. */ static #WORKLOAD: number = 256 * Math.max(1, Math.floor(navigator.hardwareConcurrency)) @@ -237,95 +53,246 @@ void main() { return out } - static #gl: WebGL2RenderingContext | null - static #program: WebGLProgram | null - static #vertexShader: WebGLShader | null - static #fragmentShader: WebGLShader | null - static #positionBuffer: WebGLBuffer | null - static #uvBuffer: WebGLBuffer | null - static #uboBuffer: WebGLBuffer | null - static #workBuffer: WebGLBuffer | null - static #query: WebGLQuery | null - static #pixels: Uint8Array - // Vertex Positions, 2 triangles - static #positions = new Float32Array([ - -1, -1, 0, -1, 1, 0, 1, 1, 0, - 1, -1, 0, 1, 1, 0, -1, -1, 0 - ]) - // Texture Positions - static #uvPosArray = new Float32Array([ - 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1 - ]) - - // Compile - static { - this.#gl = new OffscreenCanvas(this.#WORKLOAD, this.#WORKLOAD).getContext('webgl2') - if (this.#gl == null) throw new Error('WebGL 2 is required') - this.#gl.clearColor(0, 0, 0, 1) - - this.#program = this.#gl.createProgram() - if (this.#program == null) throw new Error('Failed to create shader program') - - this.#vertexShader = this.#gl.createShader(this.#gl.VERTEX_SHADER) - if (this.#vertexShader == null) throw new Error('Failed to create vertex shader') - this.#gl.shaderSource(this.#vertexShader, this.#vsSource) - this.#gl.compileShader(this.#vertexShader) - if (!this.#gl.getShaderParameter(this.#vertexShader, this.#gl.COMPILE_STATUS)) - throw new Error(this.#gl.getShaderInfoLog(this.#vertexShader) ?? `Failed to compile vertex shader`) - - this.#fragmentShader = this.#gl.createShader(this.#gl.FRAGMENT_SHADER) - if (this.#fragmentShader == null) throw new Error('Failed to create fragment shader') - this.#gl.shaderSource(this.#fragmentShader, this.#fsSource) - this.#gl.compileShader(this.#fragmentShader) - if (!this.#gl.getShaderParameter(this.#fragmentShader, this.#gl.COMPILE_STATUS)) - throw new Error(this.#gl.getShaderInfoLog(this.#fragmentShader) ?? `Failed to compile fragment shader`) - - this.#gl.attachShader(this.#program, this.#vertexShader) - this.#gl.attachShader(this.#program, this.#fragmentShader) - this.#gl.linkProgram(this.#program) - if (!this.#gl.getProgramParameter(this.#program, this.#gl.LINK_STATUS)) - throw new Error(this.#gl.getProgramInfoLog(this.#program) ?? `Failed to link program`) - - // Construct simple 2D geometry - this.#gl.useProgram(this.#program) - const triangleArray = this.#gl.createVertexArray() - this.#gl.bindVertexArray(triangleArray) - - this.#positionBuffer = this.#gl.createBuffer() - this.#gl.bindBuffer(this.#gl.ARRAY_BUFFER, this.#positionBuffer) - this.#gl.bufferData(this.#gl.ARRAY_BUFFER, this.#positions, this.#gl.STATIC_DRAW) - this.#gl.vertexAttribPointer(0, 3, this.#gl.FLOAT, false, 0, 0) - this.#gl.enableVertexAttribArray(0) - - this.#uvBuffer = this.#gl.createBuffer() - this.#gl.bindBuffer(this.#gl.ARRAY_BUFFER, this.#uvBuffer) - this.#gl.bufferData(this.#gl.ARRAY_BUFFER, this.#uvPosArray, this.#gl.STATIC_DRAW) - this.#gl.vertexAttribPointer(1, 2, this.#gl.FLOAT, false, 0, 0) - this.#gl.enableVertexAttribArray(1) - - this.#uboBuffer = this.#gl.createBuffer() - this.#gl.bindBuffer(this.#gl.UNIFORM_BUFFER, this.#uboBuffer) - this.#gl.bufferData(this.#gl.UNIFORM_BUFFER, 144, this.#gl.DYNAMIC_DRAW) - this.#gl.bindBuffer(this.#gl.UNIFORM_BUFFER, null) - this.#gl.bindBufferBase(this.#gl.UNIFORM_BUFFER, 0, this.#uboBuffer) - this.#gl.uniformBlockBinding(this.#program, this.#gl.getUniformBlockIndex(this.#program, 'UBO'), 0) - - this.#workBuffer = this.#gl.createBuffer() - this.#gl.bindBuffer(this.#gl.UNIFORM_BUFFER, this.#workBuffer) - this.#gl.bufferData(this.#gl.UNIFORM_BUFFER, 32, this.#gl.STREAM_DRAW) - this.#gl.bindBuffer(this.#gl.UNIFORM_BUFFER, null) - this.#gl.bindBufferBase(this.#gl.UNIFORM_BUFFER, 1, this.#workBuffer) - this.#gl.uniformBlockBinding(this.#program, this.#gl.getUniformBlockIndex(this.#program, 'WORK'), 1) - - this.#pixels = new Uint8Array(this.#gl.drawingBufferWidth * this.#gl.drawingBufferHeight * 4) - this.#query = this.#gl.createQuery() + static #device: GPUDevice | null = null; + static #pipeline: GPUComputePipeline | null = null; + static #workgroupSize: number = 256; // Must align with shader + static #bindGroupLayout: GPUBindGroupLayout | null = null; + + + // WebGPU Compute Shader + static #computeShader = ` + @group(0) @binding(0) var ubo: UBO; + @group(0) @binding(1) var work: array, 2>; + + struct UBO { + blockhash: array, 2>, + threshold: u32, + workload: f32, + }; + + /** + * Defined separately from uint v[32] below as the original value is required + * to calculate the second uint32 of the digest for threshold comparison + */ + const BLAKE2B_IV32_1: u32 = 0x6A09E667u; + + /** + * These are offsets into the input data buffer for each mixing step. + * They are multiplied by 2 from the original SIGMA values in + * the C reference implementation, which refered to uint64s. + */ + const SIGMA82: array = array( + 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30, + 28,20,8,16,18,30,26,12,2,24,0,4,22,14,10,6, + 22,16,24,0,10,4,30,26,20,28,6,12,14,2,18,8, + 14,18,6,2,26,24,22,28,4,12,10,20,8,0,30,16, + 18,0,10,14,4,8,20,30,28,2,22,24,12,16,6,26, + 4,24,12,20,0,22,16,6,8,26,14,10,30,28,2,18, + 24,10,2,30,28,26,8,20,0,14,12,6,18,4,16,22, + 26,22,14,28,24,2,6,18,10,0,30,8,16,12,4,20, + 12,30,28,18,22,6,0,16,24,4,26,14,2,8,20,10, + 20,4,16,8,14,12,2,10,30,22,18,28,6,24,26,0, + 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30, + 28,20,8,16,18,30,26,12,2,24,0,4,22,14,10,6 + ); + + /** + * 64-bit unsigned addition within the compression buffer + * Sets v[a,a+1] += b + * b0 is the low 32 bits of b, b1 represents the high 32 bits + */ + fn add_uint64 (vv: ptr>, a: u32, b0: u32, b1: u32) { + var v: array = (*vv); + var o0: u32 = v[a] + b0; + var o1: u32 = v[a+1u] + b1; + if (v[a] > 0xFFFFFFFFu - b0) { // did low 32 bits overflow? + o1 = o1 + 1u; + } + v[a] = o0; + v[a+1u] = o1; + } + + /** + * G Mixing function + */ + fn B2B_G (ptr_v: ptr>, ptr_m: ptr>, a: u32, b: u32, c: u32, d: u32, ix: u32, iy: u32) { + var v: array = (*ptr_v); + var m: array = (*ptr_m); + add_uint64(ptr_v, a, v[b], v[b+1u]); + add_uint64(ptr_v, a, m[ix], m[ix+1u]); + + // v[d,d+1] = (v[d,d+1] xor v[a,a+1]) rotated to the right by 32 bits + var xor0: u32 = v[d] ^ v[a]; + var xor1: u32 = v[d+1u] ^ v[a+1u]; + v[d] = xor1; + v[d+1u] = xor0; + + add_uint64(ptr_v, c, v[d], v[d+1u]); + + // v[b,b+1] = (v[b,b+1] xor v[c,c+1]) rotated right by 24 bits + xor0 = v[b] ^ v[c]; + xor1 = v[b+1u] ^ v[c+1u]; + v[b] = (xor0 >> 24u) ^ (xor1 << 8u); + v[b+1u] = (xor1 >> 24u) ^ (xor0 << 8u); + + add_uint64(ptr_v, a, v[b], v[b+1u]); + add_uint64(ptr_v, a, m[iy], m[iy+1u]); + + // v[d,d+1] = (v[d,d+1] xor v[a,a+1]) rotated right by 16 bits + xor0 = v[d] ^ v[a]; + xor1 = v[d+1u] ^ v[a+1u]; + v[d] = (xor0 >> 16u) ^ (xor1 << 16u); + v[d+1u] = (xor1 >> 16u) ^ (xor0 << 16u); + + add_uint64(ptr_v, c, v[d], v[d+1u]); + + // v[b,b+1] = (v[b,b+1] xor v[c,c+1]) rotated right by 63 bits + xor0 = v[b] ^ v[c]; + xor1 = v[b+1u] ^ v[c+1u]; + v[b] = (xor1 >> 31u) ^ (xor0 << 1u); + v[b+1u] = (xor0 >> 31u) ^ (xor1 << 1u); + } + + /** + * Main compute function + */ + @compute @workgroup_size(${this.#workgroupSize}) + fn main(@builtin(global_invocation_id) global_id : vec3) { + let uv_x = global_id.x; + let uv_y = global_id.y; + + // Check bounds, may be unnecessary with proper dispatch size + if (uv_x >= u32(ubo.workload) || uv_y >= u32(ubo.workload) ) { + return; + } + + var m: array; + var u_work0: vec4 = work[0u]; + var u_work1: vec4 = work[1u]; + let x_pos = uv_x % 256u; + let y_pos = uv_y % 256u; + let x_index = (uv_x - x_pos) / 256u; + let y_index = (uv_y - y_pos) / 256u; + + // First 2 work bytes are the x,y pos within the 256x256 area, the next + // two bytes are modified from the random generated value, XOR'd with + // the x,y area index of where this pixel is located + m[0u] = (x_pos ^ (y_pos << 8u) ^ ((u_work0.b ^ x_index) << 16u) ^ ((u_work0.a ^ y_index) << 24u)); + + // Remaining bytes are un-modified from the random generated value + m[1u] = (u_work1.r ^ (u_work1.g << 8u) ^ (u_work1.b << 16u) ^ (u_work1.a << 24u)); + + // Block hash + m[2u] = ubo.blockhash[0u].x; + m[3u] = ubo.blockhash[0u].y; + m[4u] = ubo.blockhash[0u].z; + m[5u] = ubo.blockhash[0u].w; + m[6u] = ubo.blockhash[1u].x; + m[7u] = ubo.blockhash[1u].y; + m[8u] = ubo.blockhash[1u].z; + m[9u] = ubo.blockhash[1u].w; + + /** + * Compression buffer, intialized to 2 instances of the initialization vector + * The following values have been modified from the BLAKE2B_IV: + * OUTLEN is constant 8 bytes + * v[0] ^= 0x01010000u ^ uint(OUTLEN); + * INLEN is constant 40 bytes: work value (8) + block hash (32) + * v[24] ^= uint(INLEN); + * It's always the "last" compression at this INLEN + * v[28] = ~v[28]; + * v[29] = ~v[29]; + */ + var v = array( + 0xF2BDC900u, 0x6A09E667u, 0x84CAA73Bu, 0xBB67AE85u, + 0xFE94F82Bu, 0x3C6EF372u, 0x5F1D36F1u, 0xA54FF53Au, + 0xADE682D1u, 0x510E527Fu, 0x2B3E6C1Fu, 0x9B05688Cu, + 0xFB41BD6Bu, 0x1F83D9ABu, 0x137E2179u, 0x5BE0CD19u, + 0xF3BCC908u, 0x6A09E667u, 0x84CAA73Bu, 0xBB67AE85u, + 0xFE94F82Bu, 0x3C6EF372u, 0x5F1D36F1u, 0xA54FF53Au, + 0xADE682F9u, 0x510E527Fu, 0x2B3E6C1Fu, 0x9B05688Cu, + 0x04BE4294u, 0xE07C2654u, 0x137E2179u, 0x5BE0CD19u + ); + + // twelve rounds of mixing + for (var i: u32 = 0u; i < 12u; i = i + 1u) { + B2B_G(&v, &m, 0, 8, 16, 24, SIGMA82[i * 16 + 0], SIGMA82[i * 16 + 1]); + B2B_G(&v, &m, 2, 10, 18, 26, SIGMA82[i * 16 + 2], SIGMA82[i * 16 + 3]); + B2B_G(&v, &m, 4, 12, 20, 28, SIGMA82[i * 16 + 4], SIGMA82[i * 16 + 5]); + B2B_G(&v, &m, 6, 14, 22, 30, SIGMA82[i * 16 + 6], SIGMA82[i * 16 + 7]); + B2B_G(&v, &m, 0, 10, 20, 30, SIGMA82[i * 16 + 8], SIGMA82[i * 16 + 9]); + B2B_G(&v, &m, 2, 12, 22, 24, SIGMA82[i * 16 + 10], SIGMA82[i * 16 + 11]); + B2B_G(&v, &m, 4, 14, 16, 26, SIGMA82[i * 16 + 12], SIGMA82[i * 16 + 13]); + B2B_G(&v, &m, 6, 8, 18, 28, SIGMA82[i * 16 + 14], SIGMA82[i * 16 + 15]); + } + + // Store the result directly into work array + if ((BLAKE2B_IV32_1 ^ v[1u] ^ v[17u]) > ubo.threshold) { + work[0u].x = x_index + 1u; + work[0u].y = y_index + 1u; + work[0u].z = x_pos; + work[0u].w = y_pos; + } + } + `; + + // Initialize WebGPU + static async #initializeWebGPU (): Promise { + if (!navigator.gpu) { + throw new Error("WebGPU is not supported.") + } + + const adapter = await navigator.gpu.requestAdapter() + if (!adapter) { + throw new Error("No suitable WebGPU adapter found.") + } + + this.#device = await adapter.requestDevice() + this.#bindGroupLayout = this.#device.createBindGroupLayout({ + entries: [ + { + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { type: 'uniform', }, + }, + { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { type: 'storage', }, + }, + ], + }) + + this.#pipeline = this.#device.createComputePipeline({ + layout: this.#device.createPipelineLayout({ + bindGroupLayouts: [this.#bindGroupLayout], + }), + compute: { + module: this.#device.createShaderModule({ + code: this.#computeShader, + }), + entryPoint: 'main', + }, + }) } - static #calculate (hashHex: string, callback: (nonce: string | PromiseLike) => any, threshold: number): void { - if (Pow.#gl == null) throw new Error('WebGL 2 is required') + + static async #calculate (hashHex: string, callback: (nonce: string | PromiseLike) => any, threshold: number): Promise { + if (!/^[A-F-a-f0-9]{64}$/.test(hashHex)) throw new Error(`invalid_hash ${hashHex}`) if (typeof threshold !== 'number') throw new TypeError(`Invalid threshold ${threshold}`) - if (this.#gl == null) throw new Error('WebGL 2 is required') + + // Ensure WebGPU is initialized. Call this once + if (!this.#device) { + this.#initializeWebGPU() + .then(() => { + this.#calculate(hashHex, callback, threshold) // restart calculation + }) + .catch((error) => { + console.error("Failed to initalize WebGPU:", error) + }) + return // Stop execution until WebGPU is initalized + } // Set up uniform buffer object const uboView = new DataView(new ArrayBuffer(144)) @@ -335,66 +302,86 @@ void main() { } uboView.setUint32(128, threshold, true) uboView.setFloat32(132, Pow.#WORKLOAD - 1, true) - Pow.#gl.bindBuffer(Pow.#gl.UNIFORM_BUFFER, Pow.#uboBuffer) - Pow.#gl.bufferSubData(Pow.#gl.UNIFORM_BUFFER, 0, uboView) - Pow.#gl.bindBuffer(Pow.#gl.UNIFORM_BUFFER, null) + const uboBuffer = this.#device.createBuffer({ + size: uboView.byteLength, + usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, + }) + this.#device.queue.writeBuffer(uboBuffer, 0, uboView) - // Draw output until success or progressCallback says to stop + // Work buffer const work = new Uint8Array(8) - const draw = (): void => { - if (Pow.#gl == null) throw new Error('WebGL 2 is required') - if (Pow.#query == null) throw new Error('WebGL 2 is required to run queries') - Pow.#gl.clear(Pow.#gl.COLOR_BUFFER_BIT) - - // Upload work buffer - crypto.getRandomValues(work) - Pow.#gl.bindBuffer(Pow.#gl.UNIFORM_BUFFER, Pow.#workBuffer) - Pow.#gl.bufferSubData(Pow.#gl.UNIFORM_BUFFER, 0, Uint32Array.from(work)) - Pow.#gl.bindBuffer(Pow.#gl.UNIFORM_BUFFER, null) - - Pow.#gl.beginQuery(Pow.#gl.ANY_SAMPLES_PASSED_CONSERVATIVE, Pow.#query) - Pow.#gl.drawArrays(Pow.#gl.TRIANGLES, 0, 6) - Pow.#gl.endQuery(Pow.#gl.ANY_SAMPLES_PASSED_CONSERVATIVE) - - requestAnimationFrame(checkQueryResult) - } + crypto.getRandomValues(work) + const workBuffer = this.#device.createBuffer({ + size: work.byteLength, + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC, + }) + this.#device.queue.writeBuffer(workBuffer, 0, work) + + const bindGroup = this.#device.createBindGroup({ + layout: this.#bindGroupLayout!, + entries: [ + { + binding: 0, + resource: { + buffer: uboBuffer, + }, + }, + { + binding: 1, + resource: { + buffer: workBuffer, + }, + }, + ], + }) - function checkQueryResult () { - if (Pow.#gl == null) throw new Error('WebGL 2 is required to check query results') - if (Pow.#query == null) throw new Error('Query not found') - if (Pow.#gl.getQueryParameter(Pow.#query, Pow.#gl.QUERY_RESULT_AVAILABLE)) { - const anySamplesPassed = Pow.#gl.getQueryParameter(Pow.#query, Pow.#gl.QUERY_RESULT) - if (anySamplesPassed) { - // A valid nonce was found - readBackResult() - } else { - // No valid nonce found, start the next draw call - requestAnimationFrame(draw) - } - } else { - // Query result not yet available, check again in the next frame - requestAnimationFrame(checkQueryResult) - } - } - function readBackResult () { - if (Pow.#gl == null) throw new Error('WebGL 2 is required to check read results') - Pow.#gl.readPixels(0, 0, Pow.#gl.drawingBufferWidth, Pow.#gl.drawingBufferHeight, Pow.#gl.RGBA, Pow.#gl.UNSIGNED_BYTE, Pow.#pixels) - // Check the pixels for any success - for (let i = 0; i < Pow.#pixels.length; i += 4) { - if (Pow.#pixels[i] !== 0) { - const hex = Pow.#hexify(work.subarray(4, 8)) + Pow.#hexify([ - Pow.#pixels[i + 2], - Pow.#pixels[i + 3], - work[2] ^ (Pow.#pixels[i] - 1), - work[3] ^ (Pow.#pixels[i + 1] - 1) - ]) - // Return the work value with the custom bits - typeof callback === 'function' && callback(hex) - return - } + + const commandEncoder = this.#device.createCommandEncoder() + const passEncoder = commandEncoder.beginComputePass() + passEncoder.setPipeline(this.#pipeline!) + passEncoder.setBindGroup(0, bindGroup) + + const dispatchSizeX = Math.ceil(this.#WORKLOAD / this.#workgroupSize) + const dispatchSizeY = Math.ceil(this.#WORKLOAD / this.#workgroupSize) + + passEncoder.dispatchWorkgroups(dispatchSizeX, dispatchSizeY) + passEncoder.end() + + // Get result + const resultStagingBuffer = this.#device.createBuffer({ + size: workBuffer.size, + usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ + }) + commandEncoder.copyBufferToBuffer( + workBuffer, + 0, + resultStagingBuffer, + 0, + workBuffer.size + ) + this.#device.queue.submit([commandEncoder.finish()]) + + // Get result + await resultStagingBuffer.mapAsync(GPUMapMode.READ) + + const arrayBuffer = resultStagingBuffer.getMappedRange() + const result = new Uint32Array(arrayBuffer) + resultStagingBuffer.unmap() // Unmap after reading + + for (let i = 0; i < result.length; i += 4) { + if (result[i] !== 0) { + const hex = this.#hexify(work.subarray(4, 8)) + this.#hexify([ + result[i + 2], + result[i + 3], + work[2] ^ (result[i] - 1), + work[3] ^ (result[i + 1] - 1) + ]) + typeof callback === 'function' && callback(hex) + return } } - draw() + // No result found. Redraw + requestAnimationFrame(() => this.#calculate(hashHex, callback, threshold)) } } -- 2.34.1