static #shader = `
struct UBO {
blockhash: array<vec4<u32>, 2>,
+ random: u32,
threshold: u32
};
@group(0) @binding(0) var<uniform> ubo: UBO;
/**
* Main compute function
- *
- * 8-byte work is split into two 4-byte u32
- * First 4 bytes will be iterated by shader
- * Last 4 bytes are defined by index of each thread
+ * 8-byte work is split into two 4-byte u32. Low 4 bytes are random u32 from
+ * UBO. High 4 bytes are the random value XOR'd with index of each thread.
*/
@compute @workgroup_size(256)
fn main(
((workgroup_id.z & 0xff) << 8) |
(local_id.x & 0xff);
var m: array<u32, 16>;
- m[0u] = id;
- m[1u] = reverseBits(id);
+ m[0u] = ubo.random;
+ m[1u] = id ^ ubo.random;
m[2u] = ubo.blockhash[0u].x;
m[3u] = ubo.blockhash[0u].y;
m[4u] = ubo.blockhash[0u].z;
);
/**
- * Iterate and hash until nonce found
+ * Twelve rounds of mixing as part of compression step
*/
- for (var j: u32 = 0u; j < 0x1u; j = j + 1u) {
- m[0u] = m[0u] ^ j;
-
- // twelve rounds of mixing
- for (var i: u32 = 0u; i < 12u; i = i + 1u) {
- G(&v, &m, 0u, 8u, 16u, 24u, SIGMA82[i * 16u + 0u], SIGMA82[i * 16u + 1u]);
- G(&v, &m, 2u, 10u, 18u, 26u, SIGMA82[i * 16u + 2u], SIGMA82[i * 16u + 3u]);
- G(&v, &m, 4u, 12u, 20u, 28u, SIGMA82[i * 16u + 4u], SIGMA82[i * 16u + 5u]);
- G(&v, &m, 6u, 14u, 22u, 30u, SIGMA82[i * 16u + 6u], SIGMA82[i * 16u + 7u]);
- G(&v, &m, 0u, 10u, 20u, 30u, SIGMA82[i * 16u + 8u], SIGMA82[i * 16u + 9u]);
- G(&v, &m, 2u, 12u, 22u, 24u, SIGMA82[i * 16u + 10u], SIGMA82[i * 16u + 11u]);
- G(&v, &m, 4u, 14u, 16u, 26u, SIGMA82[i * 16u + 12u], SIGMA82[i * 16u + 13u]);
- G(&v, &m, 6u, 8u, 18u, 28u, SIGMA82[i * 16u + 14u], SIGMA82[i * 16u + 15u]);
- }
+ for (var i: u32 = 0u; i < 12u; i = i + 1u) {
+ G(&v, &m, 0u, 8u, 16u, 24u, SIGMA82[i * 16u + 0u], SIGMA82[i * 16u + 1u]);
+ G(&v, &m, 2u, 10u, 18u, 26u, SIGMA82[i * 16u + 2u], SIGMA82[i * 16u + 3u]);
+ G(&v, &m, 4u, 12u, 20u, 28u, SIGMA82[i * 16u + 4u], SIGMA82[i * 16u + 5u]);
+ G(&v, &m, 6u, 14u, 22u, 30u, SIGMA82[i * 16u + 6u], SIGMA82[i * 16u + 7u]);
+ G(&v, &m, 0u, 10u, 20u, 30u, SIGMA82[i * 16u + 8u], SIGMA82[i * 16u + 9u]);
+ G(&v, &m, 2u, 12u, 22u, 24u, SIGMA82[i * 16u + 10u], SIGMA82[i * 16u + 11u]);
+ G(&v, &m, 4u, 14u, 16u, 26u, SIGMA82[i * 16u + 12u], SIGMA82[i * 16u + 13u]);
+ G(&v, &m, 6u, 8u, 18u, 28u, SIGMA82[i * 16u + 14u], SIGMA82[i * 16u + 15u]);
+ }
- // Store the result directly into work array
- if (atomicLoad(&work.found) == 0u && (BLAKE2B_IV32_1 ^ v[1u] ^ v[17u]) > ubo.threshold) {
- atomicStore(&work.found, 1u);
- work.nonce.x = m[0];
- work.nonce.y = m[1];
- return;
- }
+ /**
+ * Set nonce if it passes the threshold and no other thread has set it
+ */
+ if (atomicLoad(&work.found) == 0u && (BLAKE2B_IV32_1 ^ v[1u] ^ v[17u]) > ubo.threshold) {
+ atomicStore(&work.found, 1u);
+ work.nonce.x = m[0];
+ work.nonce.y = m[1];
+ return;
}
return;
}
// Set up uniform buffer object
- const uboView = new DataView(new ArrayBuffer(48))
+ const uboView = new DataView(new ArrayBuffer(64))
for (let i = 0; i < 64; i += 8) {
const uint32 = hashHex.slice(i, i + 8)
uboView.setUint32(i / 2, parseInt(uint32, 16))
}
- uboView.setUint32(32, threshold, true)
+ const random = crypto.getRandomValues(new Uint32Array(1))[0]
+ console.log(`random: ${random}`)
+ uboView.setUint32(32, random, true)
+ uboView.setUint32(48, threshold, true)
const uboBuffer = PowGpu.#device.createBuffer({
size: uboView.byteLength,
usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST,