Minimum: 20.80000001192093 ms
Maximum: 2093.199999988079 ms
+libnemo: Time to calculate proof-of-work for a send block 512 times
+(after increasing workgroup_size to 256 from 64 and decreasing dispatch size)
+Total: 149857.0999999717 ms
+Average: 292.6896484374447 ms
+Harmonic: 73.49751645489904 ms
+Geometric: 174.9560632035056 ms
+Minimum: 2.600000001490116 ms
+Maximum: 2364.5999999940395 ms
+
+GLOBALS.mjs:46 PASS libnemo: Time to calculate proof-of-work for a send block 512 times
+(after increasing workgroup_size to 256 from 64 and decreasing dispatch size)
+Total: 145201.70000004023 ms
+Average: 283.5970703125786 ms
+Harmonic: 54.2894989554052 ms
+Geometric: 155.58659283933008 ms
+Minimum: 2.5 ms
+Maximum: 1812.3999999985099 ms
+
PowGpu: Time to calculate proof-of-work for a send block 32 times
* 8-byte work is split into two 4-byte u32. Low 4 bytes are random u32 from
* UBO. High 4 bytes are the random value XOR'd with index of each thread.
*/
-@compute @workgroup_size(64)
-fn main(
- @builtin(workgroup_id) workgroup_id: vec3<u32>,
- @builtin(local_invocation_id) local_id: vec3<u32>
-) {
+@compute @workgroup_size(256)
+fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
if (atomicLoad(&work.found) != 0u) { return; }
let threshold: u32 = ubo.threshold;
- /**
- * Flatten 3D workgroup and local identifiers into u32 for each thread
- */
- var id: u32 = ((workgroup_id.x & 0xFFu) << 24u) |
- ((workgroup_id.y & 0xFFu) << 16u) |
- ((workgroup_id.z & 0xFFu) << 8u) |
- (local_id.x & 0xFFu);
-
/**
* Initialize (nonce||blockhash) concatenation
*/
- var m0: u32 = ubo.random;
- var m1: u32 = ubo.random ^ id;
+ var m0: u32 = ubo.random ^ global_id.x;
+ var m1: u32 = ubo.random ^ global_id.y;
var m2: u32 = ubo.blockhash[0u].x;
var m3: u32 = ubo.blockhash[0u].y;
var m4: u32 = ubo.blockhash[0u].z;
const passEncoder = commandEncoder.beginComputePass();
passEncoder.setPipeline(this.#pipeline);
passEncoder.setBindGroup(0, bindGroup);
- passEncoder.dispatchWorkgroups(256, 256, 256);
+ passEncoder.dispatchWorkgroups(255, 255);
passEncoder.end();
commandEncoder.copyBufferToBuffer(this.#gpuBuffer, 0, this.#cpuBuffer, 0, 12);
this.#device.queue.submit([commandEncoder.finish()]);
<head>
<link rel="icon" href="./favicon.ico">
<script type="module"
- src="https://zoso.dev/?p=libnemo.git;a=blob_plain;f=global.min.js;hb=refs/heads/ios"></script>
+ src="https://zoso.dev/?p=libnemo.git;a=blob_plain;f=global.min.0.js;hb=refs/heads/threads"></script>
<!-- <script type="module" src="https://cdn.jsdelivr.net/npm/nano-webgl-pow@1.1.1/nano-webgl-pow.js"></script> -->
<script type="module">
(async () => {
// Issue commands and end compute pass structure
passEncoder.setPipeline(this.#pipeline)
passEncoder.setBindGroup(0, bindGroup)
- passEncoder.dispatchWorkgroups(256, 256, 256)
+ passEncoder.dispatchWorkgroups(0xff, 0xff)
passEncoder.end()
// Copy 8-byte nonce and 4-byte found flag from GPU to CPU for reading
* 8-byte work is split into two 4-byte u32. Low 4 bytes are random u32 from
* UBO. High 4 bytes are the random value XOR'd with index of each thread.
*/
-@compute @workgroup_size(64)
-fn main(
- @builtin(workgroup_id) workgroup_id: vec3<u32>,
- @builtin(local_invocation_id) local_id: vec3<u32>
-) {
+@compute @workgroup_size(256)
+fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
if (atomicLoad(&work.found) != 0u) { return; }
let threshold: u32 = ubo.threshold;
- /**
- * Flatten 3D workgroup and local identifiers into u32 for each thread
- */
- var id: u32 = ((workgroup_id.x & 0xFFu) << 24u) |
- ((workgroup_id.y & 0xFFu) << 16u) |
- ((workgroup_id.z & 0xFFu) << 8u) |
- (local_id.x & 0xFFu);
-
/**
* Initialize (nonce||blockhash) concatenation
*/
- var m0: u32 = ubo.random;
- var m1: u32 = ubo.random ^ id;
+ var m0: u32 = ubo.random ^ global_id.x;
+ var m1: u32 = ubo.random ^ global_id.y;
var m2: u32 = ubo.blockhash[0u].x;
var m3: u32 = ubo.blockhash[0u].y;
var m4: u32 = ubo.blockhash[0u].z;
import 'nano-webgl-pow'
await suite('Block performance', async () => {
- const COUNT = 0x20
+ const COUNT = 0x200
await test(`PowGpu: Calculate proof-of-work for 6 unique block hashes`, async () => {
const times = []