*/
@compute @workgroup_size(256)
fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
- var m: array<u32, 16>;
-
- m[0u] = ubo.rand.x;
- m[1u] = (ubo.rand.y << 16u) ^ global_id.x;
+ work.x = 0u;
+ work.y = 0u;
+ work.z = 0u;
+ var m: array<u32, 16>;
+ m[0u] = 0u;
+ m[1u] = global_id.x;
m[2u] = ubo.blockhash[0u].x;
m[3u] = ubo.blockhash[0u].y;
m[4u] = ubo.blockhash[0u].z;
m[8u] = ubo.blockhash[1u].z;
m[9u] = ubo.blockhash[1u].w;
- /**
- * Compression buffer, intialized to 2 instances of the initialization vector
- * The following values have been modified from the BLAKE2B_IV:
- * OUTLEN is constant 8 bytes
- * v[0] ^= 0x01010000u ^ uint(OUTLEN);
- * INLEN is constant 40 bytes: work value (8) + block hash (32)
- * v[24] ^= uint(INLEN);
- * It's always the "last" compression at this INLEN
- * v[28] = ~v[28];
- * v[29] = ~v[29];
- */
- var v = array<u32, 32>(
- 0xF2BDC900u, 0x6A09E667u, 0x84CAA73Bu, 0xBB67AE85u,
- 0xFE94F82Bu, 0x3C6EF372u, 0x5F1D36F1u, 0xA54FF53Au,
- 0xADE682D1u, 0x510E527Fu, 0x2B3E6C1Fu, 0x9B05688Cu,
- 0xFB41BD6Bu, 0x1F83D9ABu, 0x137E2179u, 0x5BE0CD19u,
- 0xF3BCC908u, 0x6A09E667u, 0x84CAA73Bu, 0xBB67AE85u,
- 0xFE94F82Bu, 0x3C6EF372u, 0x5F1D36F1u, 0xA54FF53Au,
- 0xADE682F9u, 0x510E527Fu, 0x2B3E6C1Fu, 0x9B05688Cu,
- 0x04BE4294u, 0xE07C2654u, 0x137E2179u, 0x5BE0CD19u
- );
-
- // twelve rounds of mixing
- for (var i: u32 = 0u; i < 12u; i = i + 1u) {
- B2B_G(&v, &m, 0, 8, 16, 24, SIGMA82[i * 16 + 0], SIGMA82[i * 16 + 1]);
- B2B_G(&v, &m, 2, 10, 18, 26, SIGMA82[i * 16 + 2], SIGMA82[i * 16 + 3]);
- B2B_G(&v, &m, 4, 12, 20, 28, SIGMA82[i * 16 + 4], SIGMA82[i * 16 + 5]);
- B2B_G(&v, &m, 6, 14, 22, 30, SIGMA82[i * 16 + 6], SIGMA82[i * 16 + 7]);
- B2B_G(&v, &m, 0, 10, 20, 30, SIGMA82[i * 16 + 8], SIGMA82[i * 16 + 9]);
- B2B_G(&v, &m, 2, 12, 22, 24, SIGMA82[i * 16 + 10], SIGMA82[i * 16 + 11]);
- B2B_G(&v, &m, 4, 14, 16, 26, SIGMA82[i * 16 + 12], SIGMA82[i * 16 + 13]);
- B2B_G(&v, &m, 6, 8, 18, 28, SIGMA82[i * 16 + 14], SIGMA82[i * 16 + 15]);
- }
+ var i: u32 = 0u;
+ while (work.x == 0u) {
+ m[0u] = i;
+ i = i + 1u;
+
+ /**
+ * Compression buffer, intialized to 2 instances of the initialization vector
+ * The following values have been modified from the BLAKE2B_IV:
+ * OUTLEN is constant 8 bytes
+ * v[0] ^= 0x01010000u ^ uint(OUTLEN);
+ * INLEN is constant 40 bytes: work value (8) + block hash (32)
+ * v[24] ^= uint(INLEN);
+ * It's always the "last" compression at this INLEN
+ * v[28] = ~v[28];
+ * v[29] = ~v[29];
+ */
+ var v = array<u32, 32>(
+ 0xF2BDC900u, 0x6A09E667u, 0x84CAA73Bu, 0xBB67AE85u,
+ 0xFE94F82Bu, 0x3C6EF372u, 0x5F1D36F1u, 0xA54FF53Au,
+ 0xADE682D1u, 0x510E527Fu, 0x2B3E6C1Fu, 0x9B05688Cu,
+ 0xFB41BD6Bu, 0x1F83D9ABu, 0x137E2179u, 0x5BE0CD19u,
+ 0xF3BCC908u, 0x6A09E667u, 0x84CAA73Bu, 0xBB67AE85u,
+ 0xFE94F82Bu, 0x3C6EF372u, 0x5F1D36F1u, 0xA54FF53Au,
+ 0xADE682F9u, 0x510E527Fu, 0x2B3E6C1Fu, 0x9B05688Cu,
+ 0x04BE4294u, 0xE07C2654u, 0x137E2179u, 0x5BE0CD19u
+ );
+
+ // twelve rounds of mixing
+ for (var i: u32 = 0u; i < 12u; i = i + 1u) {
+ B2B_G(&v, &m, 0, 8, 16, 24, SIGMA82[i * 16 + 0], SIGMA82[i * 16 + 1]);
+ B2B_G(&v, &m, 2, 10, 18, 26, SIGMA82[i * 16 + 2], SIGMA82[i * 16 + 3]);
+ B2B_G(&v, &m, 4, 12, 20, 28, SIGMA82[i * 16 + 4], SIGMA82[i * 16 + 5]);
+ B2B_G(&v, &m, 6, 14, 22, 30, SIGMA82[i * 16 + 6], SIGMA82[i * 16 + 7]);
+ B2B_G(&v, &m, 0, 10, 20, 30, SIGMA82[i * 16 + 8], SIGMA82[i * 16 + 9]);
+ B2B_G(&v, &m, 2, 12, 22, 24, SIGMA82[i * 16 + 10], SIGMA82[i * 16 + 11]);
+ B2B_G(&v, &m, 4, 14, 16, 26, SIGMA82[i * 16 + 12], SIGMA82[i * 16 + 13]);
+ B2B_G(&v, &m, 6, 8, 18, 28, SIGMA82[i * 16 + 14], SIGMA82[i * 16 + 15]);
+ }
- // Store the result directly into work array
- if ((BLAKE2B_IV32_1 ^ v[1u] ^ v[17u]) > 0xffffffffu) {
- work.x = ubo.rand.x;
- work.y = (ubo.rand.y << 16u) ^ global_id.x;
+ // Store the result directly into work array
+ if ((BLAKE2B_IV32_1 ^ v[1u] ^ v[17u]) > ubo.threshold) {
+ work.x = 1u;
+ work.y = m[0u];
+ work.z = m[1u];
+ }
}
}
`;
// Issue commands and end render pass
passEncoder.setPipeline(PowGpu.#pipeline)
passEncoder.setBindGroup(0, bindGroup)
- // passEncoder.dispatchWorkgroups(256 * Math.max(1, navigator.hardwareConcurrency))
- passEncoder.dispatchWorkgroups(256)
+ passEncoder.dispatchWorkgroups(256, 256, 256)
passEncoder.end()
// Copy result from GPU buffer to CPU buffer
const result = new Uint32Array(PowGpu.#cpuBuffer.getMappedRange()).slice()
PowGpu.#cpuBuffer.unmap()
- console.log(`result: ${[...result]}`)
- if (result[0] !== 0 || result[1] !== 0) {
- const hex = PowGpu.#hexify([result[0], result[1]])
+ if (result[0] !== 0) {
+ const hex = PowGpu.#hexify([result[1], result[2]])
typeof callback === 'function' && callback(hex)
return
}