]> zoso.dev Git - libnemo.git/commitdiff
Initial WebGPU conversion.
authorChris Duncan <chris@zoso.dev>
Tue, 31 Dec 2024 01:53:55 +0000 (17:53 -0800)
committerChris Duncan <chris@zoso.dev>
Tue, 31 Dec 2024 01:53:55 +0000 (17:53 -0800)
perf/block.perf.js
src/lib/block.ts
src/lib/workers.ts
src/lib/workers/powgpu.ts

index 0fa13373e0e0c9a042872dd51841b697ba4f2e00..79658c5f0f3e7e07969e6b207ddd82a8863d888b 100644 (file)
@@ -9,7 +9,7 @@ import { SendBlock } from '#dist/main.js'
 import 'nano-webgl-pow'
 
 await suite('Block performance', async () => {
-       const COUNT = 0x10
+       const COUNT = 0x1
 
        await test(`Customized PoW: Time to calculate proof-of-work for a send block ${COUNT} times`, async () => {
                const times = []
index da23d562e7b3949a9d4cc9d126c0757f1c57f220..83b4cdd346f8508fbe4f5d90481a9a0b050d0af1 100644 (file)
@@ -8,7 +8,7 @@ import { dec, hex } from './convert.js'
 import { NanoNaCl } from './workers/nano-nacl.js'
 import { Pool } from './pool.js'
 import { Rpc } from './rpc.js'
-import { Pow } from './workers.js'
+import { Pow, PowGpu } from './workers.js'
 
 /**
 * Represents a block as defined by the Nano cryptocurrency protocol. The Block
index 017025fabdf270c6e7dc0fb68b8ea058bb8ab675..b67290d29c7ba8b9ff83b97615fdcef292b49b0a 100644 (file)
@@ -3,5 +3,6 @@
 import { default as Bip44Ckd } from './workers/bip44-ckd.js'
 import { default as NanoNaCl } from './workers/nano-nacl.js'
 import { default as Pow } from './workers/powgl.js'
+import { default as PowGpu } from './workers/powgpu.js'
 
-export { Bip44Ckd, NanoNaCl, Pow }
+export { Bip44Ckd, NanoNaCl, Pow, PowGpu }
index 5f7733a40fec84343f6e9dc5d5f02ecdebec0a3f..a52cd5987d328f65cf9f172bec40f05edb7b01a6 100644 (file)
@@ -2,12 +2,14 @@
 // SPDX-License-Identifier: GPL-3.0-or-later
 // Based on nano-webgl-pow by Ben Green (numtel) <ben@latenightsketches.com>
 // https://github.com/numtel/nano-webgl-pow
+/// <reference types="@webgpu/types" />
 import { WorkerInterface } from '../pool.js'
 
 export class Pow extends WorkerInterface {
        static {
                Pow.listen()
        }
+
        /**
        * Calculates proof-of-work as described by the Nano cryptocurrency protocol.
        *
@@ -34,197 +36,11 @@ export class Pow extends WorkerInterface {
        * @param {number} [threshold=0xfffffff8] - Difficulty of proof-of-work calculation
        */
        static async find (hash: string, threshold: number = 0xfffffff8): Promise<string> {
-               return new Promise<string>(resolve => {
-                       this.#calculate(hash, resolve, threshold)
+               return new Promise<string>(async (resolve) => {
+                       await this.#calculate(hash, resolve, threshold)
                })
        }
 
-       // Vertex Shader
-       static #vsSource = `#version 300 es
-#pragma vscode_glsllint_stage: vert
-precision highp float;
-layout (location=0) in vec4 position;
-layout (location=1) in vec2 uv;
-
-out vec2 uv_pos;
-
-void main() {
-       uv_pos = uv;
-       gl_Position = position;
-}`
-
-       // Fragment shader
-       static #fsSource = `#version 300 es
-#pragma vscode_glsllint_stage: frag
-precision highp float;
-precision highp int;
-
-in vec2 uv_pos;
-out vec4 fragColor;
-
-// blockhash - array of precalculated block hash components
-// threshold - 0xfffffff8 for send/change blocks, 0xfffffe00 for all else
-// workload - Defines canvas size
-layout(std140) uniform UBO {
-       uint blockhash[8];
-       uint threshold;
-       float workload;
-};
-
-// Random work values
-// First 2 bytes will be overwritten by texture pixel position
-// Second 2 bytes will be modified if the canvas size is greater than 256x256
-// Last 4 bytes remain as generated externally
-layout(std140) uniform WORK {
-       uvec4 work[2];
-};
-
-// Defined separately from uint v[32] below as the original value is required
-// to calculate the second uint32 of the digest for threshold comparison
-#define BLAKE2B_IV32_1 0x6A09E667u
-
-// Both buffers represent 16 uint64s as 32 uint32s
-// because that's what GLSL offers, just like Javascript
-
-// Compression buffer, intialized to 2 instances of the initialization vector
-// The following values have been modified from the BLAKE2B_IV:
-// OUTLEN is constant 8 bytes
-// v[0] ^= 0x01010000u ^ uint(OUTLEN);
-// INLEN is constant 40 bytes: work value (8) + block hash (32)
-// v[24] ^= uint(INLEN);
-// It's always the "last" compression at this INLEN
-// v[28] = ~v[28];
-// v[29] = ~v[29];
-uint v[32] = uint[32](
-       0xF2BDC900u, 0x6A09E667u, 0x84CAA73Bu, 0xBB67AE85u,
-       0xFE94F82Bu, 0x3C6EF372u, 0x5F1D36F1u, 0xA54FF53Au,
-       0xADE682D1u, 0x510E527Fu, 0x2B3E6C1Fu, 0x9B05688Cu,
-       0xFB41BD6Bu, 0x1F83D9ABu, 0x137E2179u, 0x5BE0CD19u,
-       0xF3BCC908u, 0x6A09E667u, 0x84CAA73Bu, 0xBB67AE85u,
-       0xFE94F82Bu, 0x3C6EF372u, 0x5F1D36F1u, 0xA54FF53Au,
-       0xADE682F9u, 0x510E527Fu, 0x2B3E6C1Fu, 0x9B05688Cu,
-       0x04BE4294u, 0xE07C2654u, 0x137E2179u, 0x5BE0CD19u
-);
-// Input data buffer
-uint m[32];
-
-// These are offsets into the input data buffer for each mixing step.
-// They are multiplied by 2 from the original SIGMA values in
-// the C reference implementation, which refered to uint64s.
-const int SIGMA82[192] = int[192](
-       0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,
-       28,20,8,16,18,30,26,12,2,24,0,4,22,14,10,6,
-       22,16,24,0,10,4,30,26,20,28,6,12,14,2,18,8,
-       14,18,6,2,26,24,22,28,4,12,10,20,8,0,30,16,
-       18,0,10,14,4,8,20,30,28,2,22,24,12,16,6,26,
-       4,24,12,20,0,22,16,6,8,26,14,10,30,28,2,18,
-       24,10,2,30,28,26,8,20,0,14,12,6,18,4,16,22,
-       26,22,14,28,24,2,6,18,10,0,30,8,16,12,4,20,
-       12,30,28,18,22,6,0,16,24,4,26,14,2,8,20,10,
-       20,4,16,8,14,12,2,10,30,22,18,28,6,24,26,0,
-       0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,
-       28,20,8,16,18,30,26,12,2,24,0,4,22,14,10,6
-);
-
-// 64-bit unsigned addition within the compression buffer
-// Sets v[a,a+1] += b
-// b0 is the low 32 bits of b, b1 represents the high 32 bits
-void add_uint64 (int a, uint b0, uint b1) {
-       uint o0 = v[a] + b0;
-       uint o1 = v[a + 1] + b1;
-       if (v[a] > 0xFFFFFFFFu - b0) { // did low 32 bits overflow?
-               o1++;
-       }
-       v[a] = o0;
-       v[a + 1] = o1;
-}
-
-// G Mixing function
-void B2B_G (int a, int b, int c, int d, int ix, int iy) {
-       add_uint64(a, v[b], v[b+1]);
-       add_uint64(a, m[ix], m[ix + 1]);
-
-       // v[d,d+1] = (v[d,d+1] xor v[a,a+1]) rotated to the right by 32 bits
-       uint xor0 = v[d] ^ v[a];
-       uint xor1 = v[d + 1] ^ v[a + 1];
-       v[d] = xor1;
-       v[d + 1] = xor0;
-
-       add_uint64(c, v[d], v[d+1]);
-
-       // v[b,b+1] = (v[b,b+1] xor v[c,c+1]) rotated right by 24 bits
-       xor0 = v[b] ^ v[c];
-       xor1 = v[b + 1] ^ v[c + 1];
-       v[b] = (xor0 >> 24) ^ (xor1 << 8);
-       v[b + 1] = (xor1 >> 24) ^ (xor0 << 8);
-
-       add_uint64(a, v[b], v[b+1]);
-       add_uint64(a, m[iy], m[iy + 1]);
-
-       // v[d,d+1] = (v[d,d+1] xor v[a,a+1]) rotated right by 16 bits
-       xor0 = v[d] ^ v[a];
-       xor1 = v[d + 1] ^ v[a + 1];
-       v[d] = (xor0 >> 16) ^ (xor1 << 16);
-       v[d + 1] = (xor1 >> 16) ^ (xor0 << 16);
-
-       add_uint64(c, v[d], v[d+1]);
-
-       // v[b,b+1] = (v[b,b+1] xor v[c,c+1]) rotated right by 63 bits
-       xor0 = v[b] ^ v[c];
-       xor1 = v[b + 1] ^ v[c + 1];
-       v[b] = (xor1 >> 31) ^ (xor0 << 1);
-       v[b + 1] = (xor0 >> 31) ^ (xor1 << 1);
-}
-
-void main() {
-       int i;
-       uvec4 u_work0 = work[0];
-       uvec4 u_work1 = work[1];
-       uint uv_x = uint(uv_pos.x * workload);
-       uint uv_y = uint(uv_pos.y * workload);
-       uint x_pos = uv_x % 256u;
-       uint y_pos = uv_y % 256u;
-       uint x_index = (uv_x - x_pos) / 256u;
-       uint y_index = (uv_y - y_pos) / 256u;
-
-       // First 2 work bytes are the x,y pos within the 256x256 area, the next
-       // two bytes are modified from the random generated value, XOR'd with
-       // the x,y area index of where this pixel is located
-       m[0] = (x_pos ^ (y_pos << 8) ^ ((u_work0.b ^ x_index) << 16) ^ ((u_work0.a ^ y_index) << 24));
-
-       // Remaining bytes are un-modified from the random generated value
-       m[1] = (u_work1.r ^ (u_work1.g << 8) ^ (u_work1.b << 16) ^ (u_work1.a << 24));
-
-       // Block hash
-       for (i=0;i<8;i++) {
-               m[i+2] = blockhash[i];
-       }
-
-       // twelve rounds of mixing
-       for(i=0;i<12;i++) {
-               B2B_G(0, 8, 16, 24, SIGMA82[i * 16 + 0], SIGMA82[i * 16 + 1]);
-               B2B_G(2, 10, 18, 26, SIGMA82[i * 16 + 2], SIGMA82[i * 16 + 3]);
-               B2B_G(4, 12, 20, 28, SIGMA82[i * 16 + 4], SIGMA82[i * 16 + 5]);
-               B2B_G(6, 14, 22, 30, SIGMA82[i * 16 + 6], SIGMA82[i * 16 + 7]);
-               B2B_G(0, 10, 20, 30, SIGMA82[i * 16 + 8], SIGMA82[i * 16 + 9]);
-               B2B_G(2, 12, 22, 24, SIGMA82[i * 16 + 10], SIGMA82[i * 16 + 11]);
-               B2B_G(4, 14, 16, 26, SIGMA82[i * 16 + 12], SIGMA82[i * 16 + 13]);
-               B2B_G(6, 8, 18, 28, SIGMA82[i * 16 + 14], SIGMA82[i * 16 + 15]);
-       }
-
-       // Pixel data is multipled by threshold test result (0 or 1)
-       // First 4 bytes insignificant, only calculate digest of second 4 bytes
-       if ((BLAKE2B_IV32_1 ^ v[1] ^ v[17]) > threshold) {
-               fragColor = vec4(
-                       float(x_index + 1u)/255., // +1 to distinguish from 0 (unsuccessful) pixels
-                       float(y_index + 1u)/255., // Same as previous
-                       float(x_pos)/255., // Return the 2 custom bytes used in work value
-                       float(y_pos)/255.  // Second custom byte
-               );
-       } else {
-               discard;
-       }
-}`
 
        /** Used to set canvas size. Must be a multiple of 256. */
        static #WORKLOAD: number = 256 * Math.max(1, Math.floor(navigator.hardwareConcurrency))
@@ -237,95 +53,246 @@ void main() {
                return out
        }
 
-       static #gl: WebGL2RenderingContext | null
-       static #program: WebGLProgram | null
-       static #vertexShader: WebGLShader | null
-       static #fragmentShader: WebGLShader | null
-       static #positionBuffer: WebGLBuffer | null
-       static #uvBuffer: WebGLBuffer | null
-       static #uboBuffer: WebGLBuffer | null
-       static #workBuffer: WebGLBuffer | null
-       static #query: WebGLQuery | null
-       static #pixels: Uint8Array
-       // Vertex Positions, 2 triangles
-       static #positions = new Float32Array([
-               -1, -1, 0, -1, 1, 0, 1, 1, 0,
-               1, -1, 0, 1, 1, 0, -1, -1, 0
-       ])
-       // Texture Positions
-       static #uvPosArray = new Float32Array([
-               1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1
-       ])
-
-       // Compile
-       static {
-               this.#gl = new OffscreenCanvas(this.#WORKLOAD, this.#WORKLOAD).getContext('webgl2')
-               if (this.#gl == null) throw new Error('WebGL 2 is required')
-               this.#gl.clearColor(0, 0, 0, 1)
-
-               this.#program = this.#gl.createProgram()
-               if (this.#program == null) throw new Error('Failed to create shader program')
-
-               this.#vertexShader = this.#gl.createShader(this.#gl.VERTEX_SHADER)
-               if (this.#vertexShader == null) throw new Error('Failed to create vertex shader')
-               this.#gl.shaderSource(this.#vertexShader, this.#vsSource)
-               this.#gl.compileShader(this.#vertexShader)
-               if (!this.#gl.getShaderParameter(this.#vertexShader, this.#gl.COMPILE_STATUS))
-                       throw new Error(this.#gl.getShaderInfoLog(this.#vertexShader) ?? `Failed to compile vertex shader`)
-
-               this.#fragmentShader = this.#gl.createShader(this.#gl.FRAGMENT_SHADER)
-               if (this.#fragmentShader == null) throw new Error('Failed to create fragment shader')
-               this.#gl.shaderSource(this.#fragmentShader, this.#fsSource)
-               this.#gl.compileShader(this.#fragmentShader)
-               if (!this.#gl.getShaderParameter(this.#fragmentShader, this.#gl.COMPILE_STATUS))
-                       throw new Error(this.#gl.getShaderInfoLog(this.#fragmentShader) ?? `Failed to compile fragment shader`)
-
-               this.#gl.attachShader(this.#program, this.#vertexShader)
-               this.#gl.attachShader(this.#program, this.#fragmentShader)
-               this.#gl.linkProgram(this.#program)
-               if (!this.#gl.getProgramParameter(this.#program, this.#gl.LINK_STATUS))
-                       throw new Error(this.#gl.getProgramInfoLog(this.#program) ?? `Failed to link program`)
-
-               // Construct simple 2D geometry
-               this.#gl.useProgram(this.#program)
-               const triangleArray = this.#gl.createVertexArray()
-               this.#gl.bindVertexArray(triangleArray)
-
-               this.#positionBuffer = this.#gl.createBuffer()
-               this.#gl.bindBuffer(this.#gl.ARRAY_BUFFER, this.#positionBuffer)
-               this.#gl.bufferData(this.#gl.ARRAY_BUFFER, this.#positions, this.#gl.STATIC_DRAW)
-               this.#gl.vertexAttribPointer(0, 3, this.#gl.FLOAT, false, 0, 0)
-               this.#gl.enableVertexAttribArray(0)
-
-               this.#uvBuffer = this.#gl.createBuffer()
-               this.#gl.bindBuffer(this.#gl.ARRAY_BUFFER, this.#uvBuffer)
-               this.#gl.bufferData(this.#gl.ARRAY_BUFFER, this.#uvPosArray, this.#gl.STATIC_DRAW)
-               this.#gl.vertexAttribPointer(1, 2, this.#gl.FLOAT, false, 0, 0)
-               this.#gl.enableVertexAttribArray(1)
-
-               this.#uboBuffer = this.#gl.createBuffer()
-               this.#gl.bindBuffer(this.#gl.UNIFORM_BUFFER, this.#uboBuffer)
-               this.#gl.bufferData(this.#gl.UNIFORM_BUFFER, 144, this.#gl.DYNAMIC_DRAW)
-               this.#gl.bindBuffer(this.#gl.UNIFORM_BUFFER, null)
-               this.#gl.bindBufferBase(this.#gl.UNIFORM_BUFFER, 0, this.#uboBuffer)
-               this.#gl.uniformBlockBinding(this.#program, this.#gl.getUniformBlockIndex(this.#program, 'UBO'), 0)
-
-               this.#workBuffer = this.#gl.createBuffer()
-               this.#gl.bindBuffer(this.#gl.UNIFORM_BUFFER, this.#workBuffer)
-               this.#gl.bufferData(this.#gl.UNIFORM_BUFFER, 32, this.#gl.STREAM_DRAW)
-               this.#gl.bindBuffer(this.#gl.UNIFORM_BUFFER, null)
-               this.#gl.bindBufferBase(this.#gl.UNIFORM_BUFFER, 1, this.#workBuffer)
-               this.#gl.uniformBlockBinding(this.#program, this.#gl.getUniformBlockIndex(this.#program, 'WORK'), 1)
-
-               this.#pixels = new Uint8Array(this.#gl.drawingBufferWidth * this.#gl.drawingBufferHeight * 4)
-               this.#query = this.#gl.createQuery()
+       static #device: GPUDevice | null = null;
+       static #pipeline: GPUComputePipeline | null = null;
+       static #workgroupSize: number = 256; // Must align with shader
+       static #bindGroupLayout: GPUBindGroupLayout | null = null;
+
+
+       // WebGPU Compute Shader
+       static #computeShader = `
+               @group(0) @binding(0) var<uniform> ubo: UBO;
+               @group(0) @binding(1) var<storage, read_write> work: array<vec4<u32>, 2>;
+
+               struct UBO {
+                       blockhash: array<vec4<u32>, 2>,
+                       threshold: u32,
+                       workload: f32,
+               };
+
+               /**
+               * Defined separately from uint v[32] below as the original value is required
+               * to calculate the second uint32 of the digest for threshold comparison
+               */
+               const BLAKE2B_IV32_1: u32 = 0x6A09E667u;
+
+               /**
+               * These are offsets into the input data buffer for each mixing step.
+               * They are multiplied by 2 from the original SIGMA values in
+               * the C reference implementation, which refered to uint64s.
+               */
+               const SIGMA82: array<u32, 192> = array<u32, 192>(
+                       0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,
+                       28,20,8,16,18,30,26,12,2,24,0,4,22,14,10,6,
+                       22,16,24,0,10,4,30,26,20,28,6,12,14,2,18,8,
+                       14,18,6,2,26,24,22,28,4,12,10,20,8,0,30,16,
+                       18,0,10,14,4,8,20,30,28,2,22,24,12,16,6,26,
+                       4,24,12,20,0,22,16,6,8,26,14,10,30,28,2,18,
+                       24,10,2,30,28,26,8,20,0,14,12,6,18,4,16,22,
+                       26,22,14,28,24,2,6,18,10,0,30,8,16,12,4,20,
+                       12,30,28,18,22,6,0,16,24,4,26,14,2,8,20,10,
+                       20,4,16,8,14,12,2,10,30,22,18,28,6,24,26,0,
+                       0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,
+                       28,20,8,16,18,30,26,12,2,24,0,4,22,14,10,6
+               );
+
+               /**
+               * 64-bit unsigned addition within the compression buffer
+               * Sets v[a,a+1] += b
+               * b0 is the low 32 bits of b, b1 represents the high 32 bits
+               */
+               fn add_uint64 (vv: ptr<function, array<u32, 32>>, a: u32, b0: u32, b1: u32) {
+                       var v: array<u32, 32> = (*vv);
+                       var o0: u32 = v[a] + b0;
+                       var o1: u32 = v[a+1u] + b1;
+                       if (v[a] > 0xFFFFFFFFu - b0) { // did low 32 bits overflow?
+                               o1 = o1 + 1u;
+                       }
+                       v[a] = o0;
+                       v[a+1u] = o1;
+               }
+
+               /**
+               * G Mixing function
+               */
+               fn B2B_G (ptr_v: ptr<function, array<u32, 32>>, ptr_m: ptr<function, array<u32, 16>>, a: u32, b: u32, c: u32, d: u32, ix: u32, iy: u32) {
+                       var v: array<u32, 32> = (*ptr_v);
+                       var m: array<u32, 16> = (*ptr_m);
+                       add_uint64(ptr_v, a, v[b], v[b+1u]);
+                       add_uint64(ptr_v, a, m[ix], m[ix+1u]);
+
+                       // v[d,d+1] = (v[d,d+1] xor v[a,a+1]) rotated to the right by 32 bits
+                       var xor0: u32 = v[d] ^ v[a];
+                       var xor1: u32 = v[d+1u] ^ v[a+1u];
+                       v[d] = xor1;
+                       v[d+1u] = xor0;
+
+                       add_uint64(ptr_v, c, v[d], v[d+1u]);
+
+                       // v[b,b+1] = (v[b,b+1] xor v[c,c+1]) rotated right by 24 bits
+                       xor0 = v[b] ^ v[c];
+                       xor1 = v[b+1u] ^ v[c+1u];
+                       v[b] = (xor0 >> 24u) ^ (xor1 << 8u);
+                       v[b+1u] = (xor1 >> 24u) ^ (xor0 << 8u);
+
+                       add_uint64(ptr_v, a, v[b], v[b+1u]);
+                       add_uint64(ptr_v, a, m[iy], m[iy+1u]);
+
+                       // v[d,d+1] = (v[d,d+1] xor v[a,a+1]) rotated right by 16 bits
+                       xor0 = v[d] ^ v[a];
+                       xor1 = v[d+1u] ^ v[a+1u];
+                       v[d] = (xor0 >> 16u) ^ (xor1 << 16u);
+                       v[d+1u] = (xor1 >> 16u) ^ (xor0 << 16u);
+
+                       add_uint64(ptr_v, c, v[d], v[d+1u]);
+
+                       // v[b,b+1] = (v[b,b+1] xor v[c,c+1]) rotated right by 63 bits
+                       xor0 = v[b] ^ v[c];
+                       xor1 = v[b+1u] ^ v[c+1u];
+                       v[b] = (xor1 >> 31u) ^ (xor0 << 1u);
+                       v[b+1u] = (xor0 >> 31u) ^ (xor1 << 1u);
+               }
+
+               /**
+               * Main compute function
+               */
+               @compute @workgroup_size(${this.#workgroupSize})
+               fn main(@builtin(global_invocation_id) global_id : vec3<u32>) {
+                       let uv_x = global_id.x;
+                       let uv_y = global_id.y;
+
+                       // Check bounds, may be unnecessary with proper dispatch size
+                       if (uv_x >= u32(ubo.workload) || uv_y >= u32(ubo.workload) ) {
+                               return;
+                       }
+
+                       var m: array<u32, 16>;
+                       var u_work0: vec4<u32> = work[0u];
+                       var u_work1: vec4<u32> = work[1u];
+                       let x_pos = uv_x % 256u;
+                       let y_pos = uv_y % 256u;
+                       let x_index = (uv_x - x_pos) / 256u;
+                       let y_index = (uv_y - y_pos) / 256u;
+
+                       // First 2 work bytes are the x,y pos within the 256x256 area, the next
+                       // two bytes are modified from the random generated value, XOR'd with
+                       // the x,y area index of where this pixel is located
+                       m[0u] = (x_pos ^ (y_pos << 8u) ^ ((u_work0.b ^ x_index) << 16u) ^ ((u_work0.a ^ y_index) << 24u));
+
+                       // Remaining bytes are un-modified from the random generated value
+                       m[1u] = (u_work1.r ^ (u_work1.g << 8u) ^ (u_work1.b << 16u) ^ (u_work1.a << 24u));
+
+                       // Block hash
+                       m[2u] = ubo.blockhash[0u].x;
+                       m[3u] = ubo.blockhash[0u].y;
+                       m[4u] = ubo.blockhash[0u].z;
+                       m[5u] = ubo.blockhash[0u].w;
+                       m[6u] = ubo.blockhash[1u].x;
+                       m[7u] = ubo.blockhash[1u].y;
+                       m[8u] = ubo.blockhash[1u].z;
+                       m[9u] = ubo.blockhash[1u].w;
+
+                       /**
+                       * Compression buffer, intialized to 2 instances of the initialization vector
+                       * The following values have been modified from the BLAKE2B_IV:
+                       * OUTLEN is constant 8 bytes
+                       * v[0] ^= 0x01010000u ^ uint(OUTLEN);
+                       * INLEN is constant 40 bytes: work value (8) + block hash (32)
+                       * v[24] ^= uint(INLEN);
+                       * It's always the "last" compression at this INLEN
+                       * v[28] = ~v[28];
+                       * v[29] = ~v[29];
+                       */
+                       var v = array<u32, 32>(
+                               0xF2BDC900u, 0x6A09E667u, 0x84CAA73Bu, 0xBB67AE85u,
+                               0xFE94F82Bu, 0x3C6EF372u, 0x5F1D36F1u, 0xA54FF53Au,
+                               0xADE682D1u, 0x510E527Fu, 0x2B3E6C1Fu, 0x9B05688Cu,
+                               0xFB41BD6Bu, 0x1F83D9ABu, 0x137E2179u, 0x5BE0CD19u,
+                               0xF3BCC908u, 0x6A09E667u, 0x84CAA73Bu, 0xBB67AE85u,
+                               0xFE94F82Bu, 0x3C6EF372u, 0x5F1D36F1u, 0xA54FF53Au,
+                               0xADE682F9u, 0x510E527Fu, 0x2B3E6C1Fu, 0x9B05688Cu,
+                               0x04BE4294u, 0xE07C2654u, 0x137E2179u, 0x5BE0CD19u
+                       );
+
+                       // twelve rounds of mixing
+                       for (var i: u32 = 0u; i < 12u; i = i + 1u) {
+                               B2B_G(&v, &m, 0, 8, 16, 24, SIGMA82[i * 16 + 0], SIGMA82[i * 16 + 1]);
+                               B2B_G(&v, &m, 2, 10, 18, 26, SIGMA82[i * 16 + 2], SIGMA82[i * 16 + 3]);
+                               B2B_G(&v, &m, 4, 12, 20, 28, SIGMA82[i * 16 + 4], SIGMA82[i * 16 + 5]);
+                               B2B_G(&v, &m, 6, 14, 22, 30, SIGMA82[i * 16 + 6], SIGMA82[i * 16 + 7]);
+                               B2B_G(&v, &m, 0, 10, 20, 30, SIGMA82[i * 16 + 8], SIGMA82[i * 16 + 9]);
+                               B2B_G(&v, &m, 2, 12, 22, 24, SIGMA82[i * 16 + 10], SIGMA82[i * 16 + 11]);
+                               B2B_G(&v, &m, 4, 14, 16, 26, SIGMA82[i * 16 + 12], SIGMA82[i * 16 + 13]);
+                               B2B_G(&v, &m, 6, 8, 18, 28, SIGMA82[i * 16 + 14], SIGMA82[i * 16 + 15]);
+                       }
+
+                       // Store the result directly into work array
+                       if ((BLAKE2B_IV32_1 ^ v[1u] ^ v[17u]) > ubo.threshold) {
+                               work[0u].x = x_index + 1u;
+                               work[0u].y = y_index + 1u;
+                               work[0u].z = x_pos;
+                               work[0u].w = y_pos;
+                       }
+               }
+       `;
+
+       // Initialize WebGPU
+       static async #initializeWebGPU (): Promise<void> {
+               if (!navigator.gpu) {
+                       throw new Error("WebGPU is not supported.")
+               }
+
+               const adapter = await navigator.gpu.requestAdapter()
+               if (!adapter) {
+                       throw new Error("No suitable WebGPU adapter found.")
+               }
+
+               this.#device = await adapter.requestDevice()
+               this.#bindGroupLayout = this.#device.createBindGroupLayout({
+                       entries: [
+                               {
+                                       binding: 0,
+                                       visibility: GPUShaderStage.COMPUTE,
+                                       buffer: { type: 'uniform', },
+                               },
+                               {
+                                       binding: 1,
+                                       visibility: GPUShaderStage.COMPUTE,
+                                       buffer: { type: 'storage', },
+                               },
+                       ],
+               })
+
+               this.#pipeline = this.#device.createComputePipeline({
+                       layout: this.#device.createPipelineLayout({
+                               bindGroupLayouts: [this.#bindGroupLayout],
+                       }),
+                       compute: {
+                               module: this.#device.createShaderModule({
+                                       code: this.#computeShader,
+                               }),
+                               entryPoint: 'main',
+                       },
+               })
        }
 
-       static #calculate (hashHex: string, callback: (nonce: string | PromiseLike<string>) => any, threshold: number): void {
-               if (Pow.#gl == null) throw new Error('WebGL 2 is required')
+
+       static async #calculate (hashHex: string, callback: (nonce: string | PromiseLike<string>) => any, threshold: number): Promise<void> {
+
                if (!/^[A-F-a-f0-9]{64}$/.test(hashHex)) throw new Error(`invalid_hash ${hashHex}`)
                if (typeof threshold !== 'number') throw new TypeError(`Invalid threshold ${threshold}`)
-               if (this.#gl == null) throw new Error('WebGL 2 is required')
+
+               // Ensure WebGPU is initialized. Call this once
+               if (!this.#device) {
+                       this.#initializeWebGPU()
+                               .then(() => {
+                                       this.#calculate(hashHex, callback, threshold) // restart calculation
+                               })
+                               .catch((error) => {
+                                       console.error("Failed to initalize WebGPU:", error)
+                               })
+                       return // Stop execution until WebGPU is initalized
+               }
 
                // Set up uniform buffer object
                const uboView = new DataView(new ArrayBuffer(144))
@@ -335,66 +302,86 @@ void main() {
                }
                uboView.setUint32(128, threshold, true)
                uboView.setFloat32(132, Pow.#WORKLOAD - 1, true)
-               Pow.#gl.bindBuffer(Pow.#gl.UNIFORM_BUFFER, Pow.#uboBuffer)
-               Pow.#gl.bufferSubData(Pow.#gl.UNIFORM_BUFFER, 0, uboView)
-               Pow.#gl.bindBuffer(Pow.#gl.UNIFORM_BUFFER, null)
+               const uboBuffer = this.#device.createBuffer({
+                       size: uboView.byteLength,
+                       usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST,
+               })
+               this.#device.queue.writeBuffer(uboBuffer, 0, uboView)
 
-               // Draw output until success or progressCallback says to stop
+               // Work buffer
                const work = new Uint8Array(8)
-               const draw = (): void => {
-                       if (Pow.#gl == null) throw new Error('WebGL 2 is required')
-                       if (Pow.#query == null) throw new Error('WebGL 2 is required to run queries')
-                       Pow.#gl.clear(Pow.#gl.COLOR_BUFFER_BIT)
-
-                       // Upload work buffer
-                       crypto.getRandomValues(work)
-                       Pow.#gl.bindBuffer(Pow.#gl.UNIFORM_BUFFER, Pow.#workBuffer)
-                       Pow.#gl.bufferSubData(Pow.#gl.UNIFORM_BUFFER, 0, Uint32Array.from(work))
-                       Pow.#gl.bindBuffer(Pow.#gl.UNIFORM_BUFFER, null)
-
-                       Pow.#gl.beginQuery(Pow.#gl.ANY_SAMPLES_PASSED_CONSERVATIVE, Pow.#query)
-                       Pow.#gl.drawArrays(Pow.#gl.TRIANGLES, 0, 6)
-                       Pow.#gl.endQuery(Pow.#gl.ANY_SAMPLES_PASSED_CONSERVATIVE)
-
-                       requestAnimationFrame(checkQueryResult)
-               }
+               crypto.getRandomValues(work)
+               const workBuffer = this.#device.createBuffer({
+                       size: work.byteLength,
+                       usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC,
+               })
+               this.#device.queue.writeBuffer(workBuffer, 0, work)
+
+               const bindGroup = this.#device.createBindGroup({
+                       layout: this.#bindGroupLayout!,
+                       entries: [
+                               {
+                                       binding: 0,
+                                       resource: {
+                                               buffer: uboBuffer,
+                                       },
+                               },
+                               {
+                                       binding: 1,
+                                       resource: {
+                                               buffer: workBuffer,
+                                       },
+                               },
+                       ],
+               })
 
-               function checkQueryResult () {
-                       if (Pow.#gl == null) throw new Error('WebGL 2 is required to check query results')
-                       if (Pow.#query == null) throw new Error('Query not found')
-                       if (Pow.#gl.getQueryParameter(Pow.#query, Pow.#gl.QUERY_RESULT_AVAILABLE)) {
-                               const anySamplesPassed = Pow.#gl.getQueryParameter(Pow.#query, Pow.#gl.QUERY_RESULT)
-                               if (anySamplesPassed) {
-                                       // A valid nonce was found
-                                       readBackResult()
-                               } else {
-                                       // No valid nonce found, start the next draw call
-                                       requestAnimationFrame(draw)
-                               }
-                       } else {
-                               // Query result not yet available, check again in the next frame
-                               requestAnimationFrame(checkQueryResult)
-                       }
-               }
-               function readBackResult () {
-                       if (Pow.#gl == null) throw new Error('WebGL 2 is required to check read results')
-                       Pow.#gl.readPixels(0, 0, Pow.#gl.drawingBufferWidth, Pow.#gl.drawingBufferHeight, Pow.#gl.RGBA, Pow.#gl.UNSIGNED_BYTE, Pow.#pixels)
-                       // Check the pixels for any success
-                       for (let i = 0; i < Pow.#pixels.length; i += 4) {
-                               if (Pow.#pixels[i] !== 0) {
-                                       const hex = Pow.#hexify(work.subarray(4, 8)) + Pow.#hexify([
-                                               Pow.#pixels[i + 2],
-                                               Pow.#pixels[i + 3],
-                                               work[2] ^ (Pow.#pixels[i] - 1),
-                                               work[3] ^ (Pow.#pixels[i + 1] - 1)
-                                       ])
-                                       // Return the work value with the custom bits
-                                       typeof callback === 'function' && callback(hex)
-                                       return
-                               }
+
+               const commandEncoder = this.#device.createCommandEncoder()
+               const passEncoder = commandEncoder.beginComputePass()
+               passEncoder.setPipeline(this.#pipeline!)
+               passEncoder.setBindGroup(0, bindGroup)
+
+               const dispatchSizeX = Math.ceil(this.#WORKLOAD / this.#workgroupSize)
+               const dispatchSizeY = Math.ceil(this.#WORKLOAD / this.#workgroupSize)
+
+               passEncoder.dispatchWorkgroups(dispatchSizeX, dispatchSizeY)
+               passEncoder.end()
+
+               // Get result
+               const resultStagingBuffer = this.#device.createBuffer({
+                       size: workBuffer.size,
+                       usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ
+               })
+               commandEncoder.copyBufferToBuffer(
+                       workBuffer,
+                       0,
+                       resultStagingBuffer,
+                       0,
+                       workBuffer.size
+               )
+               this.#device.queue.submit([commandEncoder.finish()])
+
+               // Get result
+               await resultStagingBuffer.mapAsync(GPUMapMode.READ)
+
+               const arrayBuffer = resultStagingBuffer.getMappedRange()
+               const result = new Uint32Array(arrayBuffer)
+               resultStagingBuffer.unmap() // Unmap after reading
+
+               for (let i = 0; i < result.length; i += 4) {
+                       if (result[i] !== 0) {
+                               const hex = this.#hexify(work.subarray(4, 8)) + this.#hexify([
+                                       result[i + 2],
+                                       result[i + 3],
+                                       work[2] ^ (result[i] - 1),
+                                       work[3] ^ (result[i + 1] - 1)
+                               ])
+                               typeof callback === 'function' && callback(hex)
+                               return
                        }
                }
-               draw()
+               // No result found. Redraw
+               requestAnimationFrame(() => this.#calculate(hashHex, callback, threshold))
        }
 }