From 959e540670c6b5cf96f5c2f3496d3e2871d0c85c Mon Sep 17 00:00:00 2001 From: Chris Duncan Date: Thu, 16 Jan 2025 10:59:32 -0800 Subject: [PATCH] Log more benchmarks, and note that dispatch 0x800 and workgroup 8,8 was the best combination. --- benchmarks.md | 181 +++++++++++++++++++++++++++++++++++++++ src/shaders/compute.wgsl | 2 +- 2 files changed, 182 insertions(+), 1 deletion(-) diff --git a/benchmarks.md b/benchmarks.md index e9fa62e..8dd42d0 100644 --- a/benchmarks.md +++ b/benchmarks.md @@ -159,6 +159,187 @@ NanoPow (WebGPU) 3070 (dispatch 0x400 workgroup 32) "geometric": 178.59991348235997 } +NanoPow (WebGPU) 3070 (dispatch 0x2000 workgroup 16,16) +{ + "count": 512, + "total": 171064.90000003576, + "rate": 3.6180268186231834, + "min": 81.3999999910593, + "max": 1453.3999999910593, + "arithmetic": 334.11113281256985, + "truncated": 276.39375000004657, + "harmonic": 217.83822531129073, + "geometric": 267.8978578209064 +} + +NanoPow (WebGPU) 3070 (dispatch 0x1000 workgroup 16,16) +{ + "count": 512, + "total": 167172.10000005364, + "rate": 3.8445942901746104, + "min": 78.29999999701977, + "max": 3207.6000000089407, + "arithmetic": 326.5080078126048, + "truncated": 260.1054687501455, + "harmonic": 210.8258370543174, + "geometric": 257.37154756448814 +} + +NanoPow (WebGPU) 3070 (dispatch 0x800 workgroup 16,16) +{ + "count": 512, + "total": 161936.60000008345, + "rate": 3.740251239688252, + "min": 80.6000000089407, + "max": 1553.8999999910593, + "arithmetic": 316.282421875163, + "truncated": 267.3617187500349, + "harmonic": 204.59200263160372, + "geometric": 250.96434072996382 +} + +NanoPow (WebGPU) 3070 (dispatch 0x400 workgroup 16,16) +{ + "count": 512, + "total": 158915.09999985993, + "rate": 3.7711889999175563, + "min": 81, + "max": 2144.199999988079, + "arithmetic": 310.3810546872264, + "truncated": 265.1683593746857, + "harmonic": 201.944298796899, + "geometric": 246.41513206426896 +} + +NanoPow (WebGPU) 3070 (dispatch 0x2000 workgroup 8,8) +{ + "count": 512, + "total": 139888.0000000596, + "rate": 4.623788851791469, + "min": 24.100000008940697, + "max": 1754.5, + "arithmetic": 273.2187500001164, + "truncated": 216.27285156253492, + "harmonic": 124.13430182526332, + "geometric": 186.66857347407046 +} + +NanoPow (WebGPU) 3070 (dispatch 0x1000 workgroup 8,8) +{ + "count": 512, + "total": 141471.0000000447, + "rate": 4.8109313380457674, + "min": 25.799999997019768, + "max": 1512.4000000059605, + "arithmetic": 276.3105468750873, + "truncated": 207.8599609376688, + "harmonic": 127.00893713470423, + "geometric": 192.06862657670237 +} + +NanoPow (WebGPU) 3070 (dispatch 0x800 workgroup 8,8) +{ + "count": 512, + "total": 133226.09999994934, + "rate": 4.905215336077563, + "min": 24.599999994039536, + "max": 1432.3999999910593, + "arithmetic": 260.20722656240105, + "truncated": 203.86464843756403, + "harmonic": 116.05589569169062, + "geometric": 178.05592700404114 +} + +NanoPow (WebGPU) 3070 (dispatch 0x400 workgroup 8,8) +{ + "count": 512, + "total": 146197.10000024736, + "rate": 4.400326586729279, + "min": 25.399999991059303, + "max": 1748.5, + "arithmetic": 285.5412109379831, + "truncated": 227.25585937549477, + "harmonic": 130.29367565620703, + "geometric": 196.68629173860154 +} + +NanoPow (WebGPU) 3070 (dispatch 0x2000 workgroup 8,4) +{ + "count": 512, + "total": 146798.40000000596, + "rate": 4.501937943601046, + "min": 18.099999994039536, + "max": 1979.1000000089407, + "arithmetic": 286.71562500001164, + "truncated": 222.12656249990687, + "harmonic": 103.21952194085847, + "geometric": 176.78141792063872 +} + +NanoPow (WebGPU) 3070 (dispatch 0x1000 workgroup 8,4) +{ + "count": 512, + "total": 138210.7000002265, + "rate": 4.76931689781462, + "min": 16.799999997019768, + "max": 1626.0999999940395, + "arithmetic": 269.9427734379424, + "truncated": 209.67363281274447, + "harmonic": 101.34635015953711, + "geometric": 172.06200959967907 +} + +NanoPow (WebGPU) 3070 (dispatch 0x800 workgroup 8,4) +{ + "count": 512, + "total": 149949.19999992847, + "rate": 4.415311818463056, + "min": 16.799999997019768, + "max": 1790.7999999970198, + "arithmetic": 292.8695312498603, + "truncated": 226.48457031243015, + "harmonic": 112.43872189933657, + "geometric": 189.39141120585325 +} + +NanoPow (WebGPU) 3070 (dispatch 0x400 workgroup 8,4) +{ + "count": 512, + "total": 147114.90000009537, + "rate": 4.581196336470157, + "min": 17.600000008940697, + "max": 3584.5999999940395, + "arithmetic": 287.33378906268626, + "truncated": 218.28359375020955, + "harmonic": 118.69066922246898, + "geometric": 188.03357141542313 +} + +NanoPow (WebGPU) 3070 (dispatch 0x2000 workgroup 4,4) +{ + "count": 512, + "total": 275257.4999998361, + "rate": 2.5033687912081977, + "min": 16.700000002980232, + "max": 4439.20000000298, + "arithmetic": 537.6123046871799, + "truncated": 399.461718749546, + "harmonic": 179.34949948078622, + "geometric": 340.32029726440055 +} + +NanoPow (WebGPU) 3070 (dispatch 0x1000 workgroup 4,4) +{ + "count": 512, + "total": 265632.5000000298, + "rate": 2.4961071453998693, + "min": 19, + "max": 4817.0999999940395, + "arithmetic": 518.8134765625582, + "truncated": 400.6238281248952, + "harmonic": 152.2455042043822, + "geometric": 300.11657404770995 +} ----- diff --git a/src/shaders/compute.wgsl b/src/shaders/compute.wgsl index 2895232..79486d0 100644 --- a/src/shaders/compute.wgsl +++ b/src/shaders/compute.wgsl @@ -33,7 +33,7 @@ const ROTATE_31 = vec2(31u, 31u); * Search compute function * Calls main with a workgroup size of 64 which has been tested as optimal */ -@compute @workgroup_size(64) +@compute @workgroup_size(8,8) fn search(@builtin(global_invocation_id) global_id: vec3) { main(global_id); } -- 2.34.1