From 972f59f5fb51a9a97696bf4a44224115b39d27d9 Mon Sep 17 00:00:00 2001 From: Chris Duncan Date: Tue, 22 Apr 2025 14:52:14 -0700 Subject: [PATCH] In CLI, capture logging from spawned server process. In compute shader, fix difficulty comparison of low bits, reduce workgroup size to 64, and update doc comments. In GPU, fix bug in load loop that checks for the GPU device, share bind group as static variable between pipelines, dispatch once on setup to compile and cache shader prior to actual work calls, save compute results in static variable to reduce memory footprint, lower timeout when tool is busy, allow difficulty of zero, eliminate a Math.random() call by implementing a sliding randomness value for the search seed, and throw an error if work to validate does not match nonce returned by compute result. In launch script, add max HTTP header size limit to protect server process. In test script, check more generate calls. In package, reduce run size of benchmark. In server, replace temp file for puppeteer page with intercepted HTTPS request to gain secure context for WebGPU, send logs over IPC to CLI, implement more constants for attack vector protection, add rate limiter using tokens per IP address, tweak error messaging, cache NanoPow library in puppeteer handle, split work calls by action for puppeteer evaluation, tweak puppeteer browser launch flags, and explicitly listen on localhost. --- package.json | 2 +- src/bin/cli.ts | 5 +- src/bin/nano-pow.sh | 2 +- src/bin/server.ts | 162 ++++++++++++++++++++--------- src/lib/gpu/compute.wgsl | 216 ++++++++------------------------------- src/lib/gpu/index.ts | 92 +++++++++-------- test/script.sh | 6 +- 7 files changed, 211 insertions(+), 274 deletions(-) diff --git a/package.json b/package.json index 5fb2bbf..0ebf913 100644 --- a/package.json +++ b/package.json @@ -45,7 +45,7 @@ "url": "git+https://zoso.dev/nano-pow.git" }, "scripts": { - "benchmark": "npm run build && ./dist/bin/nano-pow.sh --benchmark 1000", + "benchmark": "npm run build && ./dist/bin/nano-pow.sh --benchmark 100", "build": "rm -rf {dist,types} && tsc && node esbuild.mjs && cp -p src/bin/nano-pow.sh dist/bin", "prepare": "npm run build", "start": "./dist/bin/nano-pow.sh --server", diff --git a/src/bin/cli.ts b/src/bin/cli.ts index bf8720b..baac060 100755 --- a/src/bin/cli.ts +++ b/src/bin/cli.ts @@ -137,7 +137,10 @@ for (const stdinErr of stdinErrors) { log('Starting NanoPow CLI') const server = spawn(process.execPath, [new URL(import.meta.resolve('./server.js')).pathname], { stdio: ['pipe', 'pipe', 'pipe', 'ipc'] }) const port = await new Promise((resolve, reject): void => { - server.on('message', (msg: { type: string, port: number }): void => { + server.on('message', (msg: { type: string, port: number, text: string }): void => { + if (msg.type === 'console') { + log(msg.text) + } if (msg.type === 'listening') { if (msg.port != null) { log(`Server listening on port ${msg.port}`) diff --git a/src/bin/nano-pow.sh b/src/bin/nano-pow.sh index cc56e44..b9f930d 100755 --- a/src/bin/nano-pow.sh +++ b/src/bin/nano-pow.sh @@ -10,7 +10,7 @@ NANO_POW_LOGS="$NANO_POW_HOME"/logs; mkdir -p "$NANO_POW_LOGS"; if [ "$1" = '--server' ]; then shift; - node "$SCRIPT_DIR"/server.js >> "$NANO_POW_LOGS"/nano-pow-server-$(date -I).log 2>&1 & echo "$!" > "$NANO_POW_HOME"/server.pid; + node "$SCRIPT_DIR"/server.js --max-http-header-size=1024 >> "$NANO_POW_LOGS"/nano-pow-server-$(date -I).log 2>&1 & echo "$!" > "$NANO_POW_HOME"/server.pid; sleep 0.1; if [ "$(ps | grep $(cat $NANO_POW_HOME/server.pid))" = '' ]; then cat $(ls -td "$NANO_POW_LOGS"/* | head -n1); diff --git a/src/bin/server.ts b/src/bin/server.ts index 7b571ee..298d777 100755 --- a/src/bin/server.ts +++ b/src/bin/server.ts @@ -4,9 +4,9 @@ import { launch } from 'puppeteer' import { subtle } from 'node:crypto' -import { readFile, unlink, writeFile } from 'node:fs/promises' +import { readFile } from 'node:fs/promises' import * as http from 'node:http' -import { AddressInfo } from 'node:net' +import { AddressInfo, Socket } from 'node:net' import { homedir } from 'node:os' import { join } from 'node:path' import type { NanoPowOptions, WorkGenerateRequest, WorkGenerateResponse, WorkValidateRequest, WorkValidateResponse } from '#types' @@ -17,19 +17,29 @@ import type { NanoPowOptions, WorkGenerateRequest, WorkGenerateResponse, WorkVal */ function log (...args: any[]): void { if (CONFIG.DEBUG) { - const d = new Date(Date.now()) - const opts: Intl.DateTimeFormatOptions = { - hour12: false, - dateStyle: 'medium', - timeStyle: 'medium' - } - console.log(d.toLocaleString(navigator.language, opts), `NanoPow[${process.pid}]:`, args) + const text = `${new Date(Date.now()).toLocaleString(Intl.DateTimeFormat().resolvedOptions().locale ?? 'en-US', { hour12: false, dateStyle: 'medium', timeStyle: 'medium' })} NanoPow[${process.pid}]: ${args}` + console.log(text) + process.send?.({ type: 'console', text }) } } process.title = 'NanoPow Server' +const MAX_BODY_SIZE = 256 +const MAX_CONNECTIONS = 1024 +const MAX_HEADER_COUNT = 32 +const MAX_IDLE_TIME = 5000 +const MAX_REQUEST_COUNT = 10 const MAX_REQUEST_SIZE = 1024 -const MAX_BODY_SIZE = 158 +const MAX_REQUEST_TIME = 60000 + +const requests: Map = new Map() +setInterval(() => { + for (const [i, t] of requests) { + if (t.time < Date.now() - MAX_REQUEST_TIME) { + requests.delete(i) + } + } +}, Math.max(MAX_REQUEST_TIME, 0)) const CONFIG = { DEBUG: false, @@ -77,42 +87,57 @@ process.on('SIGHUP', async (): Promise => { await loadConfig() }) -async function respond (res: http.ServerResponse, data: Buffer[]): Promise { +async function respond (res: http.ServerResponse, dataBuffer: Buffer[]): Promise { let statusCode: number = 500 let headers: http.OutgoingHttpHeaders = { 'Content-Type': 'application/json' } - let response: string = 'work_validate failed' + let response: string = 'request failed' try { - const datastring = Buffer.concat(data).toString().replace(/\s+/g, '') + const datastring = Buffer.concat(dataBuffer).toString() if (Buffer.byteLength(datastring) > MAX_BODY_SIZE) { - throw new Error('Invalid data.') + throw new Error('Data too large.') } - const { action, hash, work, difficulty }: WorkGenerateRequest | WorkValidateRequest = JSON.parse(datastring) + const data: WorkGenerateRequest | WorkValidateRequest = JSON.parse(datastring) + if (Object.getPrototypeOf(data) !== Object.prototype) { + throw new Error('Data corrupted.') + } + const { action, hash, work, difficulty } = data if (action !== 'work_generate' && action !== 'work_validate') { - throw new Error('Invalid action. Must be work_generate or work_validate.') + throw new Error('Action must be work_generate or work_validate.') } + response = `${action} failed` if (!/^[0-9A-Fa-f]{64}$/.test(hash ?? '')) { - throw new Error('Invalid hash. Must be a 64-character hex string.') + throw new Error('Hash must be a 64-character hex string.') } - if (difficulty && !/^[1-9A-Fa-f][0-9A-Fa-f]{0,15}$/.test(difficulty)) { - throw new Error('Invalid difficulty. Must be a hexadecimal string between 1-FFFFFFFFFFFFFFFF.') + if (difficulty && !/^[0-9A-Fa-f]{0,16}$/.test(difficulty)) { + throw new Error('Difficulty must be a hex string between 0-FFFFFFFFFFFFFFFF.') } if (action === 'work_validate' && !/^[0-9A-Fa-f]{16}$/.test(work ?? '')) { - throw new Error('Invalid work. Must be a 16-character hex string.') + throw new Error('Work must be a 16-character hex string.') } - response = `${action} failed` const options: NanoPowOptions = { debug: CONFIG.DEBUG, effort: CONFIG.EFFORT, difficulty } - const args = [] - if (work) args.push(work) - args.push(hash) - args.push(options) - response = JSON.stringify(await page.evaluate(async (action: string, args: (string | NanoPowOptions)[]): Promise => { - if (window.NanoPow == null) throw new Error('NanoPow not found') - return await window.NanoPow[action](...args) - }, action, args)) + switch (action) { + case ('work_generate'): { + response = JSON.stringify(await page.evaluate(async (np, hash: string, options: NanoPowOptions): Promise => { + if (np == null) throw new Error('NanoPow not found') + return await np.work_generate(hash, options) + }, npHandle, hash, options)) + break + } + case ('work_validate'): { + response = JSON.stringify(await page.evaluate(async (np, work: string, hash: string, options: NanoPowOptions): Promise => { + if (np == null) throw new Error('NanoPow not found') + return await np.work_validate(work, hash, options) + }, npHandle, work, hash, options)) + break + } + default: { + throw new Error('Action must be work_generate or work_validate.') + } + } statusCode = 200 } catch (err) { log(err) @@ -124,14 +149,37 @@ async function respond (res: http.ServerResponse, data: Buffer[]): Promise // Create server const server = http.createServer((req, res): void => { + const xff = req.headers['x-forwarded-for'] + const ip = (typeof xff === 'string') + ? xff.split(',')[0].trim().replace(/^::ffff:/, '') + : req.socket.remoteAddress + if (ip == null) { + res.writeHead(401).end('Unauthorized') + return + } + const client = requests.get(ip) + if (ip === '127.0.0.1' || process.send != null || client == null || client.time < Date.now() - MAX_REQUEST_TIME) { + requests.set(ip, { tokens: MAX_REQUEST_COUNT, time: Date.now() }) + } else { + if (--client.tokens <= 0) { + log(`${ip} potential abuse`) + res.writeHead(429).end('Too Many Requests') + return + } + } let data: Buffer[] = [] let reqSize = 0 if (req.method === 'POST') { + const contentLength = +(req.headers['content-length'] ?? 0) + if (contentLength == 0 || contentLength > MAX_BODY_SIZE) { + res.writeHead(413).end('Content Too Large') + req.socket.destroy() + return + } req.on('data', (chunk: Buffer): void => { reqSize += chunk.byteLength if (reqSize > MAX_REQUEST_SIZE) { - res.writeHead(413, { 'Content-Type': 'text/plain' }) - res.end('Content Too Large') + res.writeHead(413).end('Content Too Large') req.socket.destroy() return } @@ -163,7 +211,16 @@ Full documentation: } }) -server.on('error', (e): void => { +server.headersTimeout = MAX_IDLE_TIME +server.keepAliveTimeout = MAX_IDLE_TIME +server.maxConnections = MAX_CONNECTIONS +server.maxHeadersCount = MAX_HEADER_COUNT + +server.on('connection', (c: Socket): void => { + c.setTimeout(MAX_IDLE_TIME, () => c.destroy()) +}) + +server.on('error', (e: Error): void => { log('Server error', e) try { shutdown() @@ -200,39 +257,44 @@ const browser = await launch({ headless: true, args: [ '--headless=new', - '--use-angle=vulkan', - '--enable-features=Vulkan', '--disable-vulkan-surface', + '--enable-features=Vulkan,DefaultANGLEVulkan,VulkanFromANGLE', + '--enable-gpu', '--enable-unsafe-webgpu' ] }) const page = await browser.newPage() -page.on('console', msg => log(msg.text())) -const path: string = new URL(import.meta.url).pathname -const dir = path.slice(0, path.lastIndexOf('/')) -const filename = join(dir, `${process.pid}.html`) -await writeFile(filename, '') -await page.goto(import.meta.resolve(filename)) -await page.waitForFunction(async (): Promise => { - return await navigator['gpu'].requestAdapter() -}) - -const src = `${NanoPow};window.NanoPow=NanoPow;` +const src = `${NanoPow};window.NanoPow=NanoPowGpu;` const hash = await subtle.digest('SHA-256', Buffer.from(src)) const enc = `sha256-${Buffer.from(hash).toString('base64')}` - -await page.setContent(` +const body = ` -`) -await unlink(filename) +` + +await page.setRequestInterception(true) +page.on('request', async (req): Promise => { + if (req.isInterceptResolutionHandled()) return + if (req.url() === 'https://nanopow.invalid/') { + req.respond({ status: 200, contentType: 'text/html', body }) + } else { + req.continue() + } +}) +page.on('console', msg => log(msg.text())) +await page.goto('https://nanopow.invalid/') +await page.waitForFunction(async (): Promise => { + return window.NanoPow != null +}) +const npHandle = await page.evaluateHandle(() => window.NanoPow) + log('Puppeteer initialized') // Listen on configured port -server.listen(CONFIG.PORT, async (): Promise => { +server.listen(CONFIG.PORT, '127.0.0.1', async (): Promise => { const { port } = server.address() as AddressInfo CONFIG.PORT = port log(`Server listening on port ${port}`) diff --git a/src/lib/gpu/compute.wgsl b/src/lib/gpu/compute.wgsl index 9c46abe..e06df02 100644 --- a/src/lib/gpu/compute.wgsl +++ b/src/lib/gpu/compute.wgsl @@ -139,13 +139,12 @@ var m4: vec2; /** * Search compute function -* Calls main with a workgroup size of 96 which in testing was the lowest value -* that would saturate the GPU active thread count and warp occupancy which -* provides a decent balance with the power-sensitive requirements of mobile +* Calls main with a workgroup size of 64 which balances warps between NVIDIA and +* AMD cards while still considering the power-sensitive requirements of mobile * devices. The entire workgroup exits immediately if a nonce was already found * by a previous workgroup. */ -@compute @workgroup_size(96) +@compute @workgroup_size(64) fn search(@builtin(global_invocation_id) global_id: vec3, @builtin(local_invocation_id) local_id: vec3) { if (local_id.x == 0u) { found = atomicLoad(&work.found) != 0u; @@ -248,316 +247,181 @@ fn main(id: vec3, validate: bool) { * due to the lack of both a native rotate function and 64-bit support in WGSL. */ - /**************************************************************************** - * ROUND(0) * - ****************************************************************************/ - /** - * r=0, i=0, a=v[0], b=v[4], c=v[8], d=v[12], m[sigma]=0, m[sigma+1]=1 - * r=0, i=1, a=v[1], b=v[5], c=v[9], d=v[13], m[sigma]=2, m[sigma+1]=3 - * r=0, i=2, a=v[2], b=v[6], c=v[10], d=v[14], m[sigma]=4, m[sigma+1]=5 - * r=0, i=3, a=v[3], b=v[7], c=v[11], d=v[15], m[sigma]=6, m[sigma+1]=7 + * ROUND(0) + * m[sigma]=(0,1),(2,3),(4,5),(6,7) + * m[sigma]=(8,9),(10,11),(12,13),(14,15) */ G(&v0, &v4, &v8, &vC, m0, m1); G(&v1, &v5, &v9, &vD, m2, m3); G(&v2, &v6, &vA, &vE, m4, Z); G(&v3, &v7, &vB, &vF, Z, Z); - /** - * r=0, i=4, a=v[0], b=v[5], c=v[10], d=v[15], m[sigma]=8, m[sigma+1]=9 - * r=0, i=5, a=v[1], b=v[6], c=v[11], d=v[12], m[sigma]=10, m[sigma+1]=11 - * r=0, i=6, a=v[2], b=v[7], c=v[8], d=v[13], m[sigma]=12, m[sigma+1]=13 - * r=0, i=7, a=v[3], b=v[4], c=v[9], d=v[14], m[sigma]=14, m[sigma+1]=15 - */ G(&v0, &v5, &vA, &vF, Z, Z); G(&v1, &v6, &vB, &vC, Z, Z); G(&v2, &v7, &v8, &vD, Z, Z); G(&v3, &v4, &v9, &vE, Z, Z); - /**************************************************************************** - * ROUND(1) * - ****************************************************************************/ - /** - * r=1, i=0, a=v[0], b=v[4], c=v[8], d=v[12], m[sigma]=14, m[sigma+1]=10 - * r=1, i=1, a=v[1], b=v[5], c=v[9], d=v[13], m[sigma]=4, m[sigma+1]=8 - * r=1, i=2, a=v[2], b=v[6], c=v[10], d=v[14], m[sigma]=9, m[sigma+1]=15 - * r=1, i=3, a=v[3], b=v[7], c=v[11], d=v[15], m[sigma]=13, m[sigma+1]=6 + * ROUND(1) + * m[sigma]=(14,10),(4,8),(9,15),(13,6) + * m[sigma]=(1,12),(0,2),(11,7),(5,3) */ G(&v0, &v4, &v8, &vC, Z, Z); G(&v1, &v5, &v9, &vD, m4, Z); G(&v2, &v6, &vA, &vE, Z, Z); G(&v3, &v7, &vB, &vF, Z, Z); - /** - * r=1, i=4, a=v[0], b=v[5], c=v[10], d=v[15], m[sigma]=1, m[sigma+1]=12 - * r=1, i=5, a=v[1], b=v[6], c=v[11], d=v[12], m[sigma]=0, m[sigma+1]=2 - * r=1, i=6, a=v[2], b=v[7], c=v[8], d=v[13], m[sigma]=11, m[sigma+1]=7 - * r=1, i=7, a=v[3], b=v[4], c=v[9], d=v[14], m[sigma]=5, m[sigma+1]=3 - */ G(&v0, &v5, &vA, &vF, m1, Z); G(&v1, &v6, &vB, &vC, m0, m2); G(&v2, &v7, &v8, &vD, Z, Z); G(&v3, &v4, &v9, &vE, Z, m3); - /**************************************************************************** - * ROUND(2) * - ****************************************************************************/ - /** - * r=2, i=0, a=v[0], b=v[4], c=v[8], d=v[12], m[sigma]=11, m[sigma+1]=8 - * r=2, i=1, a=v[1], b=v[5], c=v[9], d=v[13], m[sigma]=12, m[sigma+1]=0 - * r=2, i=2, a=v[2], b=v[6], c=v[10], d=v[14], m[sigma]=5, m[sigma+1]=2 - * r=2, i=3, a=v[3], b=v[7], c=v[11], d=v[15], m[sigma]=15, m[sigma+1]=13 + * ROUND(2) + * m[sigma]=(11,8),(12,0),(5,2),(15,13) + * m[sigma]=(10,14),(3,6),(7,1),(9,4) */ G(&v0, &v4, &v8, &vC, Z, Z); G(&v1, &v5, &v9, &vD, Z, m0); G(&v2, &v6, &vA, &vE, Z, m2); G(&v3, &v7, &vB, &vF, Z, Z); - /** - * r=2, i=4, a=v[0], b=v[5], c=v[10], d=v[15], m[sigma]=10, m[sigma+1]=14 - * r=2, i=5, a=v[1], b=v[6], c=v[11], d=v[12], m[sigma]=3, m[sigma+1]=6 - * r=2, i=6, a=v[2], b=v[7], c=v[8], d=v[13], m[sigma]=7, m[sigma+1]=1 - * r=2, i=7, a=v[3], b=v[4], c=v[9], d=v[14], m[sigma]=9, m[sigma+1]=4 - */ G(&v0, &v5, &vA, &vF, Z, Z); G(&v1, &v6, &vB, &vC, m3, Z); G(&v2, &v7, &v8, &vD, Z, m1); G(&v3, &v4, &v9, &vE, Z, m4); - /**************************************************************************** - * ROUND(3) * - ****************************************************************************/ - /** - * r=3, i=0, a=v[0], b=v[4], c=v[8], d=v[12], m[sigma]=7, m[sigma+1]=9 - * r=3, i=1, a=v[1], b=v[5], c=v[9], d=v[13], m[sigma]=3, m[sigma+1]=1 - * r=3, i=2, a=v[2], b=v[6], c=v[10], d=v[14], m[sigma]=13, m[sigma+1]=12 - * r=3, i=3, a=v[3], b=v[7], c=v[11], d=v[15], m[sigma]=11, m[sigma+1]=14 + * ROUND(3) + * m[sigma]=(7,9),(3,1),(13,12),(11,14) + * m[sigma]=(2,6),(5,10),(4,0),(15,8) */ G(&v0, &v4, &v8, &vC, Z, Z); G(&v1, &v5, &v9, &vD, m3, m1); G(&v2, &v6, &vA, &vE, Z, Z); G(&v3, &v7, &vB, &vF, Z, Z); - /** - * r=3, i=4, a=v[0], b=v[5], c=v[10], d=v[15], m[sigma]=2, m[sigma+1]=6 - * r=3, i=5, a=v[1], b=v[6], c=v[11], d=v[12], m[sigma]=5, m[sigma+1]=10 - * r=3, i=6, a=v[2], b=v[7], c=v[8], d=v[13], m[sigma]=4, m[sigma+1]=0 - * r=3, i=7, a=v[3], b=v[4], c=v[9], d=v[14], m[sigma]=15, m[sigma+1]=8 - */ G(&v0, &v5, &vA, &vF, m2, Z); G(&v1, &v6, &vB, &vC, Z, Z); G(&v2, &v7, &v8, &vD, m4, m0); G(&v3, &v4, &v9, &vE, Z, Z); - /**************************************************************************** - * ROUND(4) * - ****************************************************************************/ - /** - * r=4, i=0, a=v[0], b=v[4], c=v[8], d=v[12], m[sigma]=9, m[sigma+1]=0 - * r=4, i=1, a=v[1], b=v[5], c=v[9], d=v[13], m[sigma]=5, m[sigma+1]=7 - * r=4, i=2, a=v[2], b=v[6], c=v[10], d=v[14], m[sigma]=2, m[sigma+1]=4 - * r=4, i=3, a=v[3], b=v[7], c=v[11], d=v[15], m[sigma]=10, m[sigma+1]=15 + * ROUND(4) + * m[sigma]=(9,0),(5,7),(2,4),(10,15) + * m[sigma]=(14,1),(11,12),(6,8),(3,13) */ G(&v0, &v4, &v8, &vC, Z, m0); G(&v1, &v5, &v9, &vD, Z, Z); G(&v2, &v6, &vA, &vE, m2, m4); G(&v3, &v7, &vB, &vF, Z, Z); - /** - */ - - /** - * r=4, i=4, a=v[0], b=v[5], c=v[10], d=v[15], m[sigma]=14, m[sigma+1]=1 - * r=4, i=5, a=v[1], b=v[6], c=v[11], d=v[12], m[sigma]=11, m[sigma+1]=12 - * r=4, i=6, a=v[2], b=v[7], c=v[8], d=v[13], m[sigma]=6, m[sigma+1]=8 - * r=4, i=7, a=v[3], b=v[4], c=v[9], d=v[14], m[sigma]=3, m[sigma+1]=13 - */ G(&v0, &v5, &vA, &vF, Z, m1); G(&v1, &v6, &vB, &vC, Z, Z); G(&v2, &v7, &v8, &vD, Z, Z); G(&v3, &v4, &v9, &vE, m3, Z); - /**************************************************************************** - * ROUND(5) * - ****************************************************************************/ - /** - * r=5, i=0, a=v[0], b=v[4], c=v[8], d=v[12], m[sigma]=2, m[sigma+1]=12 - * r=5, i=1, a=v[1], b=v[5], c=v[9], d=v[13], m[sigma]=6, m[sigma+1]=10 - * r=5, i=2, a=v[2], b=v[6], c=v[10], d=v[14], m[sigma]=0, m[sigma+1]=11 - * r=5, i=3, a=v[3], b=v[7], c=v[11], d=v[15], m[sigma]=8, m[sigma+1]=3 + * ROUND(5) + * m[sigma]=(2,12),(6,10),(0,11),(8,3) + * m[sigma]=(4,13),(7,5),(15,14),(1,9) */ G(&v0, &v4, &v8, &vC, m2, Z); G(&v1, &v5, &v9, &vD, Z, Z); G(&v2, &v6, &vA, &vE, m0, Z); G(&v3, &v7, &vB, &vF, Z, m3); - /** - * r=5, i=4, a=v[0], b=v[5], c=v[10], d=v[15], m[sigma]=4, m[sigma+1]=13 - * r=5, i=5, a=v[1], b=v[6], c=v[11], d=v[12], m[sigma]=7, m[sigma+1]=5 - * r=5, i=6, a=v[2], b=v[7], c=v[8], d=v[13], m[sigma]=15, m[sigma+1]=14 - * r=5, i=7, a=v[3], b=v[4], c=v[9], d=v[14], m[sigma]=1, m[sigma+1]=9 - */ G(&v0, &v5, &vA, &vF, m4, Z); G(&v1, &v6, &vB, &vC, Z, Z); G(&v2, &v7, &v8, &vD, Z, Z); G(&v3, &v4, &v9, &vE, m1, Z); - /**************************************************************************** - * ROUND(6) * - ****************************************************************************/ - /** - * r=6, i=0, a=v[0], b=v[4], c=v[8], d=v[12], m[sigma]=12, m[sigma+1]=5 - * r=6, i=1, a=v[1], b=v[5], c=v[9], d=v[13], m[sigma]=1, m[sigma+1]=15 - * r=6, i=2, a=v[2], b=v[6], c=v[10], d=v[14], m[sigma]=14, m[sigma+1]=13 - * r=6, i=3, a=v[3], b=v[7], c=v[11], d=v[15], m[sigma]=4, m[sigma+1]=10 + * ROUND(6) + * m[sigma]=(12,5),(1,15),(14,13),(4,10) + * m[sigma]=(0,7),(6,3),(9,2),(8,11) */ G(&v0, &v4, &v8, &vC, Z, Z); G(&v1, &v5, &v9, &vD, m1, Z); G(&v2, &v6, &vA, &vE, Z, Z); G(&v3, &v7, &vB, &vF, m4, Z); - /** - * r=6, i=4, a=v[0], b=v[5], c=v[10], d=v[15], m[sigma]=0, m[sigma+1]=7 - * r=6, i=5, a=v[1], b=v[6], c=v[11], d=v[12], m[sigma]=6, m[sigma+1]=3 - * r=6, i=6, a=v[2], b=v[7], c=v[8], d=v[13], m[sigma]=9, m[sigma+1]=2 - * r=6, i=7, a=v[3], b=v[4], c=v[9], d=v[14], m[sigma]=8, m[sigma+1]=11 - */ G(&v0, &v5, &vA, &vF, m0, Z); G(&v1, &v6, &vB, &vC, Z, m3); G(&v2, &v7, &v8, &vD, Z, m2); G(&v3, &v4, &v9, &vE, Z, Z); - /**************************************************************************** - * ROUND(7) * - ****************************************************************************/ - /** - * r=7, i=0, a=v[0], b=v[4], c=v[8], d=v[12], m[sigma]=13, m[sigma+1]=11 - * r=7, i=1, a=v[1], b=v[5], c=v[9], d=v[13], m[sigma]=7, m[sigma+1]=14 - * r=7, i=2, a=v[2], b=v[6], c=v[10], d=v[14], m[sigma]=12, m[sigma+1]=1 - * r=7, i=3, a=v[3], b=v[7], c=v[11], d=v[15], m[sigma]=3, m[sigma+1]=9 + * ROUND(7) + * m[sigma]=(13,11),(7,14),(12,1),(3,9) + * m[sigma]=(5,0),(15,4),(8,6),(2,10) */ G(&v0, &v4, &v8, &vC, Z, Z); G(&v1, &v5, &v9, &vD, Z, Z); G(&v2, &v6, &vA, &vE, Z, m1); G(&v3, &v7, &vB, &vF, m3, Z); - /** - * r=7, i=4, a=v[0], b=v[5], c=v[10], d=v[15], m[sigma]=5, m[sigma+1]=0 - * r=7, i=5, a=v[1], b=v[6], c=v[11], d=v[12], m[sigma]=15, m[sigma+1]=4 - * r=7, i=6, a=v[2], b=v[7], c=v[8], d=v[13], m[sigma]=8, m[sigma+1]=6 - * r=7, i=7, a=v[3], b=v[4], c=v[9], d=v[14], m[sigma]=2, m[sigma+1]=10 - */ G(&v0, &v5, &vA, &vF, Z, m0); G(&v1, &v6, &vB, &vC, Z, m4); G(&v2, &v7, &v8, &vD, Z, Z); G(&v3, &v4, &v9, &vE, m2, Z); - /**************************************************************************** - * ROUND(8) * - ****************************************************************************/ - /** - * r=8, i=0, a=v[0], b=v[4], c=v[8], d=v[12], m[sigma]=6, m[sigma+1]=15 - * r=8, i=1, a=v[1], b=v[5], c=v[9], d=v[13], m[sigma]=14, m[sigma+1]=9 - * r=8, i=2, a=v[2], b=v[6], c=v[10], d=v[14], m[sigma]=11, m[sigma+1]=3 - * r=8, i=3, a=v[3], b=v[7], c=v[11], d=v[15], m[sigma]=0, m[sigma+1]=8 + * ROUND(8) + * m[sigma]=(6,15),(14,9),(11,3),(0,8) + * m[sigma]=(12,2),(13,7),(1,4),(10,5) */ G(&v0, &v4, &v8, &vC, Z, Z); G(&v1, &v5, &v9, &vD, Z, Z); G(&v2, &v6, &vA, &vE, Z, m3); G(&v3, &v7, &vB, &vF, m0, Z); - /** - * r=8, i=4, a=v[0], b=v[5], c=v[10], d=v[15], m[sigma]=12, m[sigma+1]=2 - * r=8, i=5, a=v[1], b=v[6], c=v[11], d=v[12], m[sigma]=13, m[sigma+1]=7 - * r=8, i=6, a=v[2], b=v[7], c=v[8], d=v[13], m[sigma]=1, m[sigma+1]=4 - * r=8, i=7, a=v[3], b=v[4], c=v[9], d=v[14], m[sigma]=10, m[sigma+1]=5 - */ G(&v0, &v5, &vA, &vF, Z, m2); G(&v1, &v6, &vB, &vC, Z, Z); G(&v2, &v7, &v8, &vD, m1, m4); G(&v3, &v4, &v9, &vE, Z, Z); - /**************************************************************************** - * ROUND(9) * - ****************************************************************************/ - /** - * r=9, i=0, a=v[0], b=v[4], c=v[8], d=v[12], m[sigma]=10, m[sigma+1]=2 - * r=9, i=1, a=v[1], b=v[5], c=v[9], d=v[13], m[sigma]=8, m[sigma+1]=4 - * r=9, i=2, a=v[2], b=v[6], c=v[10], d=v[14], m[sigma]=7, m[sigma+1]=6 - * r=9, i=3, a=v[3], b=v[7], c=v[11], d=v[15], m[sigma]=1, m[sigma+1]=5 + * ROUND(9) + * m[sigma]=(10,2),(8,4),(7,6),(1,5) + * m[sigma]=(15,11),(9,14),(3,12),(13,0) */ G(&v0, &v4, &v8, &vC, Z, m2); G(&v1, &v5, &v9, &vD, Z, m4); G(&v2, &v6, &vA, &vE, Z, Z); G(&v3, &v7, &vB, &vF, m1, Z); - /** - * r=9, i=4, a=v[0], b=v[5], c=v[10], d=v[15], m[sigma]=15, m[sigma+1]=11 - * r=9, i=5, a=v[1], b=v[6], c=v[11], d=v[12], m[sigma]=9, m[sigma+1]=14 - * r=9, i=6, a=v[2], b=v[7], c=v[8], d=v[13], m[sigma]=3, m[sigma+1]=12 - * r=9, i=7, a=v[3], b=v[4], c=v[9], d=v[14], m[sigma]=13, m[sigma+1]=0 - */ G(&v0, &v5, &vA, &vF, Z, Z); G(&v1, &v6, &vB, &vC, Z, Z); G(&v2, &v7, &v8, &vD, m3, Z); G(&v3, &v4, &v9, &vE, Z, m0); - /**************************************************************************** - * ROUND(10) * - ****************************************************************************/ - /** - * r=10, i=0, a=v[0], b=v[4], c=v[8], d=v[12], m[sigma]=0, m[sigma+1]=1 - * r=10, i=1, a=v[1], b=v[5], c=v[9], d=v[13], m[sigma]=2, m[sigma+1]=3 - * r=10, i=2, a=v[2], b=v[6], c=v[10], d=v[14], m[sigma]=4, m[sigma+1]=5 - * r=10, i=3, a=v[3], b=v[7], c=v[11], d=v[15], m[sigma]=6, m[sigma+1]=7 + * ROUND(10) + * m[sigma]=(0,1),(2,3),(4,5),(6,7) + * m[sigma]=(8,9),(10,11),(12,13),(14,15) */ G(&v0, &v4, &v8, &vC, m0, m1); G(&v1, &v5, &v9, &vD, m2, m3); G(&v2, &v6, &vA, &vE, m4, Z); G(&v3, &v7, &vB, &vF, Z, Z); - /** - * r=10, i=4, a=v[0], b=v[5], c=v[10], d=v[15], m[sigma]=8, m[sigma+1]=9 - * r=10, i=5, a=v[1], b=v[6], c=v[11], d=v[12], m[sigma]=10, m[sigma+1]=11 - * r=10, i=6, a=v[2], b=v[7], c=v[8], d=v[13], m[sigma]=12, m[sigma+1]=13 - * r=10, i=7, a=v[3], b=v[4], c=v[9], d=v[14], m[sigma]=14, m[sigma+1]=15 - */ G(&v0, &v5, &vA, &vF, Z, Z); G(&v1, &v6, &vB, &vC, Z, Z); G(&v2, &v7, &v8, &vD, Z, Z); G(&v3, &v4, &v9, &vE, Z, Z); - /**************************************************************************** - * ROUND(11) * - ****************************************************************************/ - /** - * r=11, i=0, a=v[0], b=v[4], c=v[8], d=v[12], m[sigma]=14, m[sigma+1]=10 - * r=11, i=1, a=v[1], b=v[5], c=v[9], d=v[13], m[sigma]=4, m[sigma+1]=8 - * r=11, i=2, a=v[2], b=v[6], c=v[10], d=v[14], m[sigma]=9, m[sigma+1]=15 - * r=11, i=3, a=v[3], b=v[7], c=v[11], d=v[15], m[sigma]=13, m[sigma+1]=6 + * ROUND(11) + * m[sigma]=(14,10),(4,8),(9,15),(13,6) + * m[sigma]=(1,12),(0,2),(11,7),(5,3) */ G(&v0, &v4, &v8, &vC, Z, Z); G(&v1, &v5, &v9, &vD, m4, Z); G(&v2, &v6, &vA, &vE, Z, Z); G(&v3, &v7, &vB, &vF, Z, Z); - /** - * r=11, i=4, a=v[0], b=v[5], c=v[10], d=v[15], m[sigma]=1, m[sigma+1]=12 - * r=11, i=5, a=v[1], b=v[6], c=v[11], d=v[12], m[sigma]=0, m[sigma+1]=2 - * r=11, i=6, a=v[2], b=v[7], c=v[8], d=v[13], m[sigma]=11, m[sigma+1]=7 - * r=11, i=7, a=v[3], b=v[4], c=v[9], d=v[14], m[sigma]=5, m[sigma+1]=3 - */ G(&v0, &v5, &vA, &vF, m1, Z); // G(&v1, &v6, &vB, &vC, m0, m2); G(&v2, &v7, &v8, &vD, Z, Z); @@ -571,7 +435,7 @@ fn main(id: vec3, validate: bool) { * Set nonce if it passes the difficulty threshold and no other thread has set it. */ var result: vec2 = BLAKE2B_INIT[0u] ^ v0 ^ v8; - if (validate || ((result.y > ubo.difficulty.y || (result.y == ubo.difficulty.y && result.y >= ubo.difficulty.y)) && atomicLoad(&work.found) == 0u)) { + if (validate || ((result.y > ubo.difficulty.y || (result.y == ubo.difficulty.y && result.x >= ubo.difficulty.x)) && atomicLoad(&work.found) == 0u)) { atomicStore(&work.found, 1u); work.nonce = m0; work.result = result; diff --git a/src/lib/gpu/index.ts b/src/lib/gpu/index.ts index 8f484aa..b9a8b13 100644 --- a/src/lib/gpu/index.ts +++ b/src/lib/gpu/index.ts @@ -21,7 +21,9 @@ export class NanoPowGpu { static #cpuBuffer: GPUBuffer static #uboBuffer: GPUBuffer static #uboView: DataView + static #resultView: DataView static #bindGroupLayout: GPUBindGroupLayout + static #bindGroup: GPUBindGroup static #searchPipeline: GPUComputePipeline static #validatePipeline: GPUComputePipeline @@ -38,7 +40,7 @@ export class NanoPowGpu { if (!(device instanceof GPUDevice)) throw new Error('WebGPU device failed to load.') device.lost?.then(this.reset) this.#device = device - this.setup() + await this.setup() } catch (err) { throw new Error('WebGPU initialization failed.', { cause: err }) } finally { @@ -47,7 +49,7 @@ export class NanoPowGpu { this.#isInitialized = true } - static setup (): void { + static async setup (): Promise { if (this.#device == null) throw new Error(`WebGPU device failed to load.`) // Create buffers for writing GPU calculations and reading from Javascript this.#gpuBuffer = this.#device.createBuffer({ @@ -91,6 +93,24 @@ export class NanoPowGpu { module: shaderModule } }) + // Bind UBO read and GPU write buffers + this.#bindGroup = this.#device.createBindGroup({ + layout: this.#bindGroupLayout, + entries: [ + { + binding: 0, + resource: { + buffer: this.#uboBuffer + }, + }, + { + binding: 1, + resource: { + buffer: this.#gpuBuffer + }, + }, + ], + }) // Create pipeline to connect compute shader to binding layout this.#validatePipeline = this.#device.createComputePipeline({ layout: this.#device.createPipelineLayout({ @@ -101,6 +121,11 @@ export class NanoPowGpu { module: shaderModule } }) + // Compile and cache shader prior to actual dispatch + const cmd = this.#device.createCommandEncoder() + cmd.beginComputePass().end() + this.#device.queue.submit([cmd.finish()]) + await this.#device.queue.onSubmittedWorkDone() console.log(`NanoPow WebGPU initialized. Recommended effort: ${Math.max(1, Math.floor(navigator.hardwareConcurrency / 2))}`) } @@ -146,7 +171,7 @@ export class NanoPowGpu { console.table(averages) } - static async #dispatch (pipeline: GPUComputePipeline, seed: bigint, hash: string, difficulty: bigint, passes: number): Promise { + static async #dispatch (pipeline: GPUComputePipeline, seed: bigint, hash: string, difficulty: bigint, passes: number): Promise { if (this.#device == null) throw new Error(`WebGPU device failed to load.`) // Set up uniform buffer object // Note: u32 size is 4, but total alignment must be multiple of 16 @@ -164,32 +189,13 @@ export class NanoPowGpu { this.#device.queue.writeBuffer(this.#gpuBuffer, 0, this.#bufferReset) this.#device.queue.writeBuffer(this.#cpuBuffer, 0, this.#bufferReset) - // Bind UBO read and GPU write buffers - const bindGroup = this.#device.createBindGroup({ - layout: this.#bindGroupLayout, - entries: [ - { - binding: 0, - resource: { - buffer: this.#uboBuffer - }, - }, - { - binding: 1, - resource: { - buffer: this.#gpuBuffer - }, - }, - ], - }) - // Create command encoder to issue commands to GPU and initiate computation const commandEncoder = this.#device.createCommandEncoder() const passEncoder = commandEncoder.beginComputePass() // Issue commands and end compute pass structure passEncoder.setPipeline(pipeline) - passEncoder.setBindGroup(0, bindGroup) + passEncoder.setBindGroup(0, this.#bindGroup) passEncoder.dispatchWorkgroups(passes, passes) passEncoder.end() @@ -200,20 +206,18 @@ export class NanoPowGpu { this.#device.queue.submit([commandEncoder.finish()]) // Read results back to Javascript and then unmap buffer after reading - let data = null try { await this.#cpuBuffer.mapAsync(GPUMapMode.READ) await this.#device.queue.onSubmittedWorkDone() - data = new DataView(this.#cpuBuffer.getMappedRange().slice(0)) + this.#resultView = new DataView(this.#cpuBuffer.getMappedRange().slice(0)) this.#cpuBuffer.unmap() } catch (err) { console.warn(`Error getting data from GPU. ${err}`) this.#cpuBuffer.unmap() this.reset() } - if (this.#debug) console.log('gpuBuffer data', data) - if (data == null) throw new Error(`Failed to get data from buffer.`) - return data + if (this.#debug) console.log('gpuBuffer data', this.#resultView) + if (this.#resultView == null) throw new Error(`Failed to get data from buffer.`) } /** @@ -230,7 +234,7 @@ export class NanoPowGpu { setTimeout(async (): Promise => { const result = this.work_generate(hash, options) resolve(result) - }, 500) + }, 100) }) } if (this.#isInitialized === false) this.init() @@ -243,7 +247,7 @@ export class NanoPowGpu { throw new TypeError(`Invalid difficulty ${options.difficulty}`) } } - const difficulty = (typeof options?.difficulty !== 'bigint' || options.difficulty < 1n || options.difficulty > 0xffffffffffffffffn) + const difficulty = (typeof options?.difficulty !== 'bigint' || options.difficulty < 0n || options.difficulty > 0xffffffffffffffffn) ? 0xfffffff800000000n : options.difficulty const effort = (typeof options?.effort !== 'number' || options.effort < 0x1 || options.effort > 0x20) @@ -256,7 +260,7 @@ export class NanoPowGpu { // Ensure WebGPU is initialized before calculating let loads = 0 - while (this.#device == null && loads < 20) { + while (this.#device == null && loads++ < 20) { await new Promise(resolve => { setTimeout(resolve, 500) }) @@ -270,16 +274,17 @@ export class NanoPowGpu { let start = performance.now() let nonce = 0n let result = 0n + let random = BigInt(Math.floor(Math.random() * 0xffffffff)) + let seed = random do { start = performance.now() - const random0 = Math.floor(Math.random() * 0xffffffff) - const random1 = Math.floor(Math.random() * 0xffffffff) - const seed = (BigInt(random0) << 32n) | BigInt(random1) + random = BigInt(Math.floor(Math.random() * 0xffffffff)) + seed = (seed & 0xffffffffn) << 32n | random if (this.#debug) console.log('seed', seed.toString(16).padStart(16, '0')) - const data = await this.#dispatch(this.#searchPipeline, seed, hash, difficulty, effort) - const found = !!data.getUint32(0) - nonce = data.getBigUint64(8, true) - result = data.getBigUint64(16, true) + await this.#dispatch(this.#searchPipeline, seed, hash, difficulty, effort) + const found = !!this.#resultView.getUint32(0) + nonce = this.#resultView.getBigUint64(8, true) + result = this.#resultView.getBigUint64(16, true) this.#busy = !found times.push(performance.now() - start) } while (this.#busy) @@ -309,7 +314,7 @@ export class NanoPowGpu { setTimeout(async (): Promise => { const result = this.work_validate(work, hash, options) resolve(result) - }, 500) + }, 100) }) } if (this.#isInitialized === false) this.init() @@ -322,7 +327,7 @@ export class NanoPowGpu { throw new TypeError(`Invalid difficulty ${options.difficulty}`) } } - const difficulty = (typeof options?.difficulty !== 'bigint' || options.difficulty < 1n || options.difficulty > 0xffffffffffffffffn) + const difficulty = (typeof options?.difficulty !== 'bigint' || options.difficulty < 0n || options.difficulty > 0xffffffffffffffffn) ? 0xfffffff800000000n : options.difficulty this.#debug = !!(options?.debug) @@ -347,10 +352,11 @@ export class NanoPowGpu { const seed = BigInt(`0x${work}`) if (this.#debug) console.log('work', work) - const data = await this.#dispatch(this.#validatePipeline, seed, hash, difficulty, 1) - nonce = data.getBigUint64(8, true) - result = data.getBigUint64(16, true) + await this.#dispatch(this.#validatePipeline, seed, hash, difficulty, 1) + nonce = this.#resultView.getBigUint64(8, true) + result = this.#resultView.getBigUint64(16, true) this.#busy = false + if (seed !== nonce) throw new Error('Result does not match work') if (this.#debug) console.log('nonce', nonce, nonce.toString(16).padStart(16, '0')) if (this.#debug) console.log('result', result, result.toString(16).padStart(16, '0')) const response: WorkValidateResponse = { diff --git a/test/script.sh b/test/script.sh index daa75aa..65bd8ce 100755 --- a/test/script.sh +++ b/test/script.sh @@ -24,21 +24,23 @@ curl -d '{ "action": "work_validate", "work": "47c83266398728cf", "hash": "92BA7 printf '\nValidate good hashes\n' curl -d '{ "action": "work_validate", "work": "47c83266398728cf", "hash": "92BA74A7D6DC7557F3EDA95ADC6341D51AC777A0A6FF0688A5C492AB2B2CB40D" }' localhost:3001 curl -d '{ "action": "work_validate", "work": "4a8fb104eebbd336", "hash": "8797585D56B8AEA3A62899C31FC088F9BE849BA8298A88E94F6E3112D4E55D01" }' localhost:3001 -curl -d '{ "action": "work_validate", "work": "326f310d629a8a98", "hash": "204076E3364D16A018754FF67D418AB2FBEB38799FF9A29A1D5F9E34F16BEEEA", "difficulty": "ffffffff00000000" }' localhost:3001 curl -d '{ "action": "work_validate", "work": "c5d5d6f7c5d6ccd1", "hash": "281E89AC73B1082B464B9C3C1168384F846D39F6DF25105F8B4A22915E999117" }' localhost:3001 +curl -d '{ "action": "work_validate", "work": "326f310d629a8a98", "hash": "204076E3364D16A018754FF67D418AB2FBEB38799FF9A29A1D5F9E34F16BEEEA", "difficulty": "ffffffff00000000" }' localhost:3001 curl -d '{ "action": "work_validate", "work": "6866c1ac3831a891", "hash": "7069D9CD1E85D6204301D254B0927F06ACC794C9EA5DF70EA5578458FB597090", "difficulty": "fffffe0000000000" }' localhost:3001 printf '\nValidate bad hashes\n' curl -d '{ "action": "work_validate", "work": "0000000000000000", "hash": "0000000000000000000000000000000000000000000000000000000000000000" }' localhost:3001 curl -d '{ "action": "work_validate", "work": "c5d5d6f7c5d6ccd1", "hash": "BA1E946BA3D778C2F30A83D44D2132CC6EEF010D8D06FF10A8ABD0100D8FB47E" }' localhost:3001 -curl -d '{ "action": "work_validate", "work": "ae238556213c3624", "hash": "BF41D87DA3057FDC6050D2B00C06531F89F4AA6195D7C6C2EAAF15B6E703F8F6", "difficulty": "ffffffff00000000" }' localhost:3001 curl -d '{ "action": "work_validate", "work": "29a9ae0236990e2e", "hash": "32721F4BD2AFB6F6A08D41CD0DF3C0D9C0B5294F68D0D12422F52B28F0800B5F" }' localhost:3001 +curl -d '{ "action": "work_validate", "work": "ae238556213c3624", "hash": "BF41D87DA3057FDC6050D2B00C06531F89F4AA6195D7C6C2EAAF15B6E703F8F6", "difficulty": "ffffffff00000000" }' localhost:3001 curl -d '{ "action": "work_validate", "work": "7d903b18d03f9820", "hash": "39C57C28F904DFE4012288FFF64CE80C0F42601023A9C82108E8F7B2D186C150", "difficulty": "fffffe0000000000" }' localhost:3001 curl -d '{ "action": "work_validate", "work": "e45835c3b291c3d1", "hash": "9DCD89E2B92FD59D7358C2C2E4C225DF94C88E187B27882F50FEFC3760D3994F", "difficulty": "ffffffff00000000" }' localhost:3001 printf '\nGenerate\n' curl -d '{ "action": "work_generate", "hash": "92BA74A7D6DC7557F3EDA95ADC6341D51AC777A0A6FF0688A5C492AB2B2CB40D" }' localhost:3001 & +curl -d '{ "action": "work_generate", "hash": "8797585D56B8AEA3A62899C31FC088F9BE849BA8298A88E94F6E3112D4E55D01" }' localhost:3001 & +curl -d '{ "action": "work_generate", "hash": "281E89AC73B1082B464B9C3C1168384F846D39F6DF25105F8B4A22915E999117" }' localhost:3001 & curl -d '{ "action": "work_generate", "hash": "204076E3364D16A018754FF67D418AB2FBEB38799FF9A29A1D5F9E34F16BEEEA", "difficulty": "ffffffff00000000" }' localhost:3001 & curl -d '{ "action": "work_generate", "hash": "7069D9CD1E85D6204301D254B0927F06ACC794C9EA5DF70EA5578458FB597090", "difficulty": "fffffe0000000000" }' localhost:3001 & wait -- 2.34.1