]> zoso.dev Git - libnemo.git/commitdiff
Clarified limitations of workgroup dispatching and sizes and corrected how workload...
authorChris Duncan <chris@zoso.dev>
Wed, 1 Jan 2025 10:08:33 +0000 (02:08 -0800)
committerChris Duncan <chris@zoso.dev>
Wed, 1 Jan 2025 10:08:33 +0000 (02:08 -0800)
src/lib/workers/powgpu.ts

index 81bdc57c10c98361014f51331e54043eb657f7dc..7897f26f481245c7ffc4c9c7dfc29dc832ec456b 100644 (file)
@@ -4,7 +4,6 @@
 // https://github.com/numtel/nano-webgl-pow
 /// <reference types="@webgpu/types" />
 import { WorkerInterface } from '../pool.js'
-import powgl from './powgl.js'
 
 export class PowGpu extends WorkerInterface {
        static {
@@ -42,19 +41,15 @@ export class PowGpu extends WorkerInterface {
                })
        }
 
-       static #workDispatchSize: number = Math.max(1, Math.floor(navigator.hardwareConcurrency))
-       static #workgroupSize: number = 256 // Must align with shader
-       static #workload: number = this.#workDispatchSize * this.#workgroupSize
-
        // WebGPU Compute Shader
        static #shader = `
                struct UBO {
                        blockhash: array<vec4<u32>, 2>,
-                       threshold: u32,
-                       workload: u32
+                       rand: vec2<u32>,
+                       threshold: u32
                };
                @group(0) @binding(0) var<uniform> ubo: UBO;
-               @group(0) @binding(1) var<storage, read_write> work: array<vec2<u32>>;
+               @group(0) @binding(1) var<storage, read_write> work: u32;
 
                /**
                * Defined separately from uint v[32] below as the original value is required
@@ -142,18 +137,18 @@ export class PowGpu extends WorkerInterface {
                /**
                * Main compute function
                */
-               @compute @workgroup_size(${this.#workgroupSize}, ${this.#workgroupSize})
-               fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
-                       // Check bounds, may be unnecessary with proper dispatch size
-                       if (global_id.x >= ubo.workload || global_id.y >= ubo.workload) {
-                               return;
-                       }
-
+               @compute @workgroup_size(256)
+               fn main(
+                       @builtin(global_invocation_id) global_id: vec3<u32>,
+                       @builtin(workgroup_id) workgroup_id: vec3<u32>,
+                       @builtin(local_invocation_id) local_id: vec3<u32>
+               ) {
                        var m: array<u32, 16>;
 
-                       // Workgroup index defines work value for this calculation point
-                       m[0u] = global_id.x;
-                       m[1u] = global_id.y;
+                       // 8-byte work is split into two 4-byte u32
+                       // First 3 bytes provided, last byte defined by this compute index
+                       m[0u] = (ubo.rand.x << 8u) ^ workgroup_id.x;
+                       m[1u] = (ubo.rand.y << 8u) ^ local_id.x;
 
                        // Block hash
                        m[2u] = ubo.blockhash[0u].x;
@@ -200,10 +195,9 @@ export class PowGpu extends WorkerInterface {
                        }
 
                        // Store the result directly into work array
-                       // if ((BLAKE2B_IV32_1 ^ v[1u] ^ v[17u]) > 0) {
-                               work[global_id.x].x = 1u ^ (2u << 8u);
-                               work[global_id.x].y = 3u ^ (4u << 8u);
-                       // }
+                       if ((BLAKE2B_IV32_1 ^ v[1u] ^ v[17u]) > 0u) {
+                               work = global_id.x;
+                       }
                }
        `;
 
@@ -239,11 +233,11 @@ export class PowGpu extends WorkerInterface {
 
                                                // Create buffers for writing GPU calculations and reading from Javascript
                                                this.#gpuBuffer = this.#device.createBuffer({
-                                                       size: this.#workload,
+                                                       size: 16,
                                                        usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC
                                                })
                                                this.#cpuBuffer = this.#device.createBuffer({
-                                                       size: this.#workload,
+                                                       size: 16,
                                                        usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ
                                                })
 
@@ -300,8 +294,10 @@ export class PowGpu extends WorkerInterface {
                        const uint32 = hashHex.slice(i, i + 8)
                        uboView.setUint32(i / 2, parseInt(uint32, 16))
                }
-               uboView.setUint32(32, threshold, true)
-               uboView.setUint32(40, PowGpu.#workload, true)
+               const rand = crypto.getRandomValues(new Uint32Array(2))
+               uboView.setUint32(32, rand[0], true)
+               uboView.setUint32(36, rand[1], true)
+               uboView.setUint32(40, threshold, true)
                const uboBuffer = PowGpu.#device.createBuffer({
                        size: uboView.byteLength,
                        usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST,
@@ -334,7 +330,8 @@ export class PowGpu extends WorkerInterface {
                // Issue commands and end render pass
                passEncoder.setPipeline(PowGpu.#pipeline)
                passEncoder.setBindGroup(0, bindGroup)
-               passEncoder.dispatchWorkgroups(PowGpu.#workDispatchSize)
+               // passEncoder.dispatchWorkgroups(256 * Math.max(1, navigator.hardwareConcurrency))
+               passEncoder.dispatchWorkgroups(256)
                passEncoder.end()
 
                // Copy result from GPU buffer to CPU buffer
@@ -343,7 +340,7 @@ export class PowGpu extends WorkerInterface {
                        0,
                        PowGpu.#cpuBuffer,
                        0,
-                       PowGpu.#workload
+                       16
                )
 
                // End frame by passing array of command buffers to command queue for execution
@@ -351,29 +348,18 @@ export class PowGpu extends WorkerInterface {
 
                // Read results back to Javascript and then unmap buffer after reading
                await PowGpu.#cpuBuffer.mapAsync(GPUMapMode.READ)
-               const result = new Uint32Array(PowGpu.#cpuBuffer.getMappedRange())
+               await PowGpu.#device.queue.onSubmittedWorkDone()
+               const result = new Uint32Array(PowGpu.#cpuBuffer.getMappedRange()).slice()
                PowGpu.#cpuBuffer.unmap()
 
                console.log(`result`)
                console.dir(result)
-               console.log(`result?`)
-               for (let i = 0; i < result.length; i += 2) {
-                       const work = new Uint8Array([result[i], result[i + 1]])
-                       console.log(`result[${i}]: ${result[i]}`)
-                       console.log(`result[${i + 1}]: ${result[i + 1]}`)
-                       console.log(`work: ${work}`)
-                       if (result[i] !== 0 || result[i + 1] !== 0) {
-                               // const hex = PowGpu.#hexify(work.subarray(4, 8)) + PowGpu.#hexify([
-                               //      result[i + 2],
-                               //      result[i + 3],
-                               //      work[2] ^ (result[i] - 1),
-                               //      work[3] ^ (result[i + 1] - 1)
-                               // ])
-                               const hex = PowGpu.#hexify(work)
-                               typeof callback === 'function' && callback(hex)
-                               return
-                       }
+               if (result[0] !== 0 || result[1] !== 0) {
+                       const hex = PowGpu.#hexify([...result])
+                       typeof callback === 'function' && callback(hex)
+                       return
                }
+
                // No result found. Redraw
                requestAnimationFrame(() => this.#calculate(hashHex, callback, threshold))
        }