]> zoso.dev Git - libnemo.git/commitdiff
Test how max workgroup size with single dimension dispatch affects mobile because...
authorChris Duncan <chris@zoso.dev>
Fri, 10 Jan 2025 14:53:10 +0000 (06:53 -0800)
committerChris Duncan <chris@zoso.dev>
Fri, 10 Jan 2025 15:02:59 +0000 (07:02 -0800)
benchmarks.md
global.min.0.js [moved from global.min.js with 99% similarity]
index.html
src/lib/nano-pow/classes/gpu.ts
src/lib/nano-pow/shaders/gpu-compute.ts
test/perf.block.js

index e78b85140e36cf58c2792d0a03a0d5ebfd606689..da54b214ed28b0c2f4a891e0be28c620d1332eac 100644 (file)
@@ -128,6 +128,24 @@ Geometric: 171.8797089689105 ms
 Minimum: 20.80000001192093 ms
 Maximum: 2093.199999988079 ms
 
+libnemo: Time to calculate proof-of-work for a send block 512 times
+(after increasing workgroup_size to 256 from 64 and decreasing dispatch size)
+Total: 149857.0999999717 ms
+Average: 292.6896484374447 ms
+Harmonic: 73.49751645489904 ms
+Geometric: 174.9560632035056 ms
+Minimum: 2.600000001490116 ms
+Maximum: 2364.5999999940395 ms
+
+GLOBALS.mjs:46 PASS  libnemo: Time to calculate proof-of-work for a send block 512 times
+(after increasing workgroup_size to 256 from 64 and decreasing dispatch size)
+Total: 145201.70000004023 ms
+Average: 283.5970703125786 ms
+Harmonic: 54.2894989554052 ms
+Geometric: 155.58659283933008 ms
+Minimum: 2.5 ms
+Maximum: 1812.3999999985099 ms
+
 
 
 PowGpu: Time to calculate proof-of-work for a send block 32 times
similarity index 99%
rename from global.min.js
rename to global.min.0.js
index 1d9336f3113d20a7666a9954bec38cabf94603ba..50114ccd38f0db4933a1185fb9c1960da1de6c69 100644 (file)
@@ -3265,28 +3265,17 @@ const BLAKE2B_IV32_1: u32 = 0x6A09E667u;
 * 8-byte work is split into two 4-byte u32. Low 4 bytes are random u32 from
 * UBO. High 4 bytes are the random value XOR'd with index of each thread.
 */
-@compute @workgroup_size(64)
-fn main(
-       @builtin(workgroup_id) workgroup_id: vec3<u32>,
-       @builtin(local_invocation_id) local_id: vec3<u32>
-) {
+@compute @workgroup_size(256)
+fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
        if (atomicLoad(&work.found) != 0u) { return; }
 
        let threshold: u32 = ubo.threshold;
 
-       /**
-       * Flatten 3D workgroup and local identifiers into u32 for each thread
-       */
-       var id: u32 = ((workgroup_id.x & 0xFFu) << 24u) |
-               ((workgroup_id.y & 0xFFu) << 16u) |
-               ((workgroup_id.z & 0xFFu) << 8u) |
-               (local_id.x & 0xFFu);
-
        /**
        * Initialize (nonce||blockhash) concatenation
        */
-       var m0: u32 = ubo.random;
-       var m1: u32 = ubo.random ^ id;
+       var m0: u32 = ubo.random ^ global_id.x;
+       var m1: u32 = ubo.random ^ global_id.y;
        var m2: u32 = ubo.blockhash[0u].x;
        var m3: u32 = ubo.blockhash[0u].y;
        var m4: u32 = ubo.blockhash[0u].z;
@@ -11513,7 +11502,7 @@ var init_gpu = __esm({
           const passEncoder = commandEncoder.beginComputePass();
           passEncoder.setPipeline(this.#pipeline);
           passEncoder.setBindGroup(0, bindGroup);
-          passEncoder.dispatchWorkgroups(256, 256, 256);
+          passEncoder.dispatchWorkgroups(255, 255);
           passEncoder.end();
           commandEncoder.copyBufferToBuffer(this.#gpuBuffer, 0, this.#cpuBuffer, 0, 12);
           this.#device.queue.submit([commandEncoder.finish()]);
index c9d4dbe1493cd116ed01066a3a87b6de29897dfc..ec4d5c01b86b0c8c63a9455b0ebf8ea8bf692089 100644 (file)
@@ -3,7 +3,7 @@
 <head>
        <link rel="icon" href="./favicon.ico">
        <script type="module"
-               src="https://zoso.dev/?p=libnemo.git;a=blob_plain;f=global.min.js;hb=refs/heads/ios"></script>
+               src="https://zoso.dev/?p=libnemo.git;a=blob_plain;f=global.min.0.js;hb=refs/heads/threads"></script>
        <!-- <script type="module" src="https://cdn.jsdelivr.net/npm/nano-webgl-pow@1.1.1/nano-webgl-pow.js"></script> -->
        <script type="module">
                (async () => {
index 89cb9a0e0e7c043c730a3086c8d4795c1417901f..a09a11c053366c5695ee9c3112421970c35430af 100644 (file)
@@ -173,7 +173,7 @@ export class NanoPowGpu {
                        // Issue commands and end compute pass structure
                        passEncoder.setPipeline(this.#pipeline)
                        passEncoder.setBindGroup(0, bindGroup)
-                       passEncoder.dispatchWorkgroups(256, 256, 256)
+                       passEncoder.dispatchWorkgroups(0xff, 0xff)
                        passEncoder.end()
 
                        // Copy 8-byte nonce and 4-byte found flag from GPU to CPU for reading
index 784560b80de1b1dce82b03ad0a49cf592f85c990..2e7c140a734a5c79c7fe6a7857c96ca7db7d6dec 100644 (file)
@@ -23,28 +23,17 @@ const BLAKE2B_IV32_1: u32 = 0x6A09E667u;
 * 8-byte work is split into two 4-byte u32. Low 4 bytes are random u32 from
 * UBO. High 4 bytes are the random value XOR'd with index of each thread.
 */
-@compute @workgroup_size(64)
-fn main(
-       @builtin(workgroup_id) workgroup_id: vec3<u32>,
-       @builtin(local_invocation_id) local_id: vec3<u32>
-) {
+@compute @workgroup_size(256)
+fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
        if (atomicLoad(&work.found) != 0u) { return; }
 
        let threshold: u32 = ubo.threshold;
 
-       /**
-       * Flatten 3D workgroup and local identifiers into u32 for each thread
-       */
-       var id: u32 = ((workgroup_id.x & 0xFFu) << 24u) |
-               ((workgroup_id.y & 0xFFu) << 16u) |
-               ((workgroup_id.z & 0xFFu) << 8u) |
-               (local_id.x & 0xFFu);
-
        /**
        * Initialize (nonce||blockhash) concatenation
        */
-       var m0: u32 = ubo.random;
-       var m1: u32 = ubo.random ^ id;
+       var m0: u32 = ubo.random ^ global_id.x;
+       var m1: u32 = ubo.random ^ global_id.y;
        var m2: u32 = ubo.blockhash[0u].x;
        var m3: u32 = ubo.blockhash[0u].y;
        var m4: u32 = ubo.blockhash[0u].z;
index 3d4320346cd6e178711aadb215c03f944722dbe3..73f8b916a5a34fe073731eb722a85527530c45c2 100644 (file)
@@ -9,7 +9,7 @@ import { PowGl, NanoPowGpu, SendBlock } from '#dist/main.js'
 import 'nano-webgl-pow'
 
 await suite('Block performance', async () => {
-       const COUNT = 0x20
+       const COUNT = 0x200
 
        await test(`PowGpu: Calculate proof-of-work for 6 unique block hashes`, async () => {
                const times = []