]> zoso.dev Git - libnemo.git/commitdiff
Fix invocation ID construction. Add "found" flag as an atomic. Remove randomized...
authorChris Duncan <chris@zoso.dev>
Thu, 2 Jan 2025 16:33:37 +0000 (08:33 -0800)
committerChris Duncan <chris@zoso.dev>
Thu, 2 Jan 2025 16:33:37 +0000 (08:33 -0800)
src/lib/workers/powgpu.ts

index 7d7774db8c3de7afcf3a5433d4dca0e761eceef5..69759ab99a9fc75bb0d7af4521b8ed911a13a26e 100644 (file)
@@ -45,11 +45,15 @@ export class PowGpu extends WorkerInterface {
        static #shader = `
                struct UBO {
                        blockhash: array<vec4<u32>, 2>,
-                       rand: vec2<u32>,
                        threshold: u32
                };
                @group(0) @binding(0) var<uniform> ubo: UBO;
-               @group(0) @binding(1) var<storage, read_write> work: vec3<u32>;
+
+               struct WORK {
+                       nonce: vec2<u32>,
+                       found: atomic<u32>
+               };
+               @group(0) @binding(1) var<storage, read_write> work: WORK;
 
                /**
                * Defined separately from uint v[32] below as the original value is required
@@ -141,14 +145,17 @@ export class PowGpu extends WorkerInterface {
                * Last 4 bytes are defined by index of each thread
                */
                @compute @workgroup_size(256)
-               fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
-                       work.x = 0u;
-                       work.y = 0u;
-                       work.z = 0u;
-
+               fn main(
+                       @builtin(workgroup_id) workgroup_id: vec3<u32>,
+                       @builtin(local_invocation_id) local_id: vec3<u32>
+               ) {
+                       var id: u32 = ((workgroup_id.x & 0xff) << 24) |
+                                                                               ((workgroup_id.y & 0xff) << 16) |
+                                                                               ((workgroup_id.z & 0xff) << 8) |
+                                                                               (local_id.x & 0xff);
                        var m: array<u32, 16>;
                        m[0u] = 0u;
-                       m[1u] = global_id.x;
+                       m[1u] = id;
                        m[2u] = ubo.blockhash[0u].x;
                        m[3u] = ubo.blockhash[0u].y;
                        m[4u] = ubo.blockhash[0u].z;
@@ -159,7 +166,7 @@ export class PowGpu extends WorkerInterface {
                        m[9u] = ubo.blockhash[1u].w;
 
                        var i: u32 = 0u;
-                       while (work.x == 0u) {
+                       while (atomicLoad(&work.found) == 0u) {
                                m[0u] = i;
                                i = i + 1u;
 
@@ -198,23 +205,16 @@ export class PowGpu extends WorkerInterface {
                                }
 
                                // Store the result directly into work array
-                               if ((BLAKE2B_IV32_1 ^ v[1u] ^ v[17u]) > ubo.threshold) {
-                                       work.x = 1u;
-                                       work.y = m[0u];
-                                       work.z = m[1u];
+                               if ((BLAKE2B_IV32_1 ^ v[1u] ^ v[17u]) > 0) {
+                                       atomicStore(&work.found, 1u);
+                                       work.nonce.x = i;
+                                       work.nonce.y = id;
+                                       return;
                                }
                        }
                }
        `;
 
-       static #hexify (arr: number[] | Uint8Array): string {
-               let out = ''
-               for (let i = arr.length - 1; i >= 0; i--) {
-                       out += arr[i].toString(16).padStart(2, '0')
-               }
-               return out
-       }
-
        // Initialize WebGPU
        static #device: GPUDevice | null = null
        static #gpuBuffer: GPUBuffer
@@ -300,11 +300,7 @@ export class PowGpu extends WorkerInterface {
                        const uint32 = hashHex.slice(i, i + 8)
                        uboView.setUint32(i / 2, parseInt(uint32, 16))
                }
-               const rand = crypto.getRandomValues(new Uint32Array(2))
-               console.log(rand)
-               uboView.setUint32(32, rand[0], true)
-               uboView.setUint32(36, rand[1], true)
-               uboView.setUint32(40, threshold, true)
+               uboView.setUint32(32, threshold, true)
                const uboBuffer = PowGpu.#device.createBuffer({
                        size: uboView.byteLength,
                        usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST,
@@ -337,16 +333,16 @@ export class PowGpu extends WorkerInterface {
                // Issue commands and end render pass
                passEncoder.setPipeline(PowGpu.#pipeline)
                passEncoder.setBindGroup(0, bindGroup)
-               passEncoder.dispatchWorkgroups(256, 256, 256)
+               passEncoder.dispatchWorkgroups(256)
                passEncoder.end()
 
-               // Copy result from GPU buffer to CPU buffer
+               // Copy 8-byte nonce and 4-byte found flag from GPU to CPU for reading
                commandEncoder.copyBufferToBuffer(
                        PowGpu.#gpuBuffer,
                        0,
                        PowGpu.#cpuBuffer,
                        0,
-                       16
+                       12
                )
 
                // End frame by passing array of command buffers to command queue for execution
@@ -355,11 +351,16 @@ export class PowGpu extends WorkerInterface {
                // Read results back to Javascript and then unmap buffer after reading
                await PowGpu.#cpuBuffer.mapAsync(GPUMapMode.READ)
                await PowGpu.#device.queue.onSubmittedWorkDone()
-               const result = new Uint32Array(PowGpu.#cpuBuffer.getMappedRange()).slice()
+               const data = new DataView(PowGpu.#cpuBuffer.getMappedRange())
+               const nonce = data.getBigUint64(0, true)
+               const found = !!data.getUint32(8)
+               console.log(new Uint32Array(data.buffer))
                PowGpu.#cpuBuffer.unmap()
 
-               if (result[0] !== 0) {
-                       const hex = PowGpu.#hexify([result[1], result[2]])
+               console.log(`found: ${found}`)
+               console.log(`nonce: ${nonce}`)
+               if (found) {
+                       const hex = nonce.toString(16)
                        typeof callback === 'function' && callback(hex)
                        return
                }