From 09a096418352ab448721ce7a888b383df6d5fb9d Mon Sep 17 00:00:00 2001
From: Chris Duncan <chris@zoso.dev>
Date: Sat, 15 Mar 2025 12:22:11 -0700
Subject: [PATCH] Updated cli to accept input from stdin like redirects and
 pipes. Add JSON output option to cli. Updated documentation for new cli
 features. Refactor gl draw shader to align with gpu compute shader on blake2b
 initialization. Update README license section. Rename file of test
 blockhashes since they are not work seeds.

---
 CHANGELOG.md                   | 158 +++++++++++++++++++++++++++++++++
 README.md                      |   3 +-
 cli.js                         |  71 ++++++++++++---
 nano-pow.1                     |   8 +-
 package.json                   |  14 +--
 src/shaders/gl-draw.ts         |  94 +++++++++++++-------
 test-seeds => test-blockhashes |   0
 7 files changed, 294 insertions(+), 54 deletions(-)
 create mode 100644 CHANGELOG.md
 rename test-seeds => test-blockhashes (100%)

diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..30b31ba
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,158 @@
+<!--
+SPDX-FileCopyrightText: 2025 Chris Duncan <chris@zoso.dev>
+SPDX-License-Identifier: GPL-3.0-or-later
+-->
+
+## v3.1.2
+
+### Notable Changes
+
+Made minor changes to CLI inline help and manual.
+
+Added file of blockhashes used in `test.html`.
+
+
+
+## v3.1.1
+
+### Notable Changes
+
+Fixed minor issues with CLI inline help and manual.
+
+
+
+## v3.1.0
+
+### Notable Changes
+
+#### Use NanoPow from the command line
+
+A command line tool is now available to accomodate systems without a graphical
+user interface. Under the hood, it launches a headless Chrome browser using
+puppeteer to access the required WebGPU or WebGL APIs. Use the `--global` flag
+when installing to add the executable script and documentation manual to your
+system.
+
+```console
+npm i -g nano-pow
+```
+
+Use it from a command line shell to search for a work value using default
+settings or customizing behavior with options.
+
+```console
+nano-pow --effort 32 --threshold FFFFFFC0 0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef
+```
+
+Validate an existing work nonce against a blockhash.
+
+```console
+nano-pow --validate fedcba9876543210 0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef
+```
+
+Due to the overhead of launching puppeteer, it is recommended to process
+blockhashes in batches.
+
+```console
+nano-pow 0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef 0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef [...]
+```
+
+or
+
+```console
+nano-pow $(cat /path/to/hashes/file)
+```
+
+Abbreviated documentation is available with `nano-pow --help`, and full
+documentation is available with `man nano-pow`.
+
+### Other Changes
+
+Fixed a bug in the compute shader where the final 64-bit addition missed a carry
+bit and resulted in a slightly different, though still valid, work value than
+expected, which only came to light when testing a custom max-value threshold
+(0xFFFFFFFF) well beyond Nano requirements.
+
+Fixed a bug in the final hash digest and subsequent threshold comparison which
+used the original blake2b_IV[0] value instead of the value modified by the
+parameter block.
+
+Fixed types.d.ts parameter definitions.
+
+Delayed canvas setup in WebGL implementation to allow for a cleaner
+initialization.
+
+Added more comprehensive documentation of BLAKE2b configuration and
+initialization in compute shader, for those who are interested in learning how
+it works.
+
+Add more logging when the debug option is used.
+
+Updated dependencies.
+
+
+
+## v3.0.1
+
+### Notable Changes
+
+Implemented `esbuild` legal comment retention.
+
+
+
+## v3.0.0
+
+### Notable Changes
+
+#### Search more values per frame with faster WebGL GPU-CPU transfers
+
+A WebGL downsampling fragment shader is now implemented which massively reduces
+the overhead of calling `readPixels()` which is stalls the pipeline while it
+executes. This downsampling stage enables much larger canvases and therefore
+more nonces searched per frame without introducing lag due to synchronous data
+transfer from GPU to CPU. This change alone makes WebGL competitive with WebGPU
+on certain platforms, so even older browsers can get faster PoW.
+
+#### Execute shader statements in parallel
+
+Both WebGPU and WebGL shaders have been overhauled to use `vec4` data types to
+store and operate on two sets of 64-bit operands in parallel. This allows the
+compiler to optimize for vector operations; although not all hardware will
+benefit from the change, no regressions were observed.
+
+### Other Changes
+
+Simplify the WebGL pixel-coordinate-based nonce variation.
+
+Unroll WebGL main G mix function loop for improved performance.
+
+Add input to WebGL draw shader to better differentiate between search and
+validate processes.
+
+Simplify WebGL vertex shader now that it is only required for drawing the
+fullscreen quad and not for pixel coordinates.
+
+Maintain WebGL canvas between draw calls, unless level-of-effort has changed, to
+reduce overhead of executing a search.
+
+Attempt to handle WebGL context loss, with improved reset function, by
+reinitializing class.
+
+Reduce promise stack buildup when waiting for WebGL query result.
+
+Fix WebGL color buffer clearing by using correct API function.
+
+Improve nonce seed generation in both WebGL and WebGPU by switching from
+`crypto.getRandomValues()` to insecure `Math.random()` which is justified by the
+fact that NanoPow generates proof-of-work, not keys.
+
+Reduce garbage collection by reusing static variables.
+
+Add debug logging that obeys user-provided debug flag which is now stored as a
+static variable as well.
+
+Add Typescript typings for new WebGL types.
+
+Fix minor issues with test page.
+
+Add benchmark results table.
diff --git a/README.md b/README.md
index 308f086..5a8664c 100644
--- a/README.md
+++ b/README.md
@@ -143,7 +143,8 @@ Email: <bug-nano-pow@zoso.dev>
 implementation
 
 ## Licenses
-GPLv3 (or later) & MIT
+GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>
+Portions of this code are also provided under the MIT License: <https://spdx.org/licenses/MIT.html>
 
 ## Donations
 If you find this package helpful, please consider tipping the developer.
diff --git a/cli.js b/cli.js
index 35219fa..f3dd9b7 100755
--- a/cli.js
+++ b/cli.js
@@ -2,11 +2,28 @@
 //! SPDX-FileCopyrightText: 2025 Chris Duncan <chris@zoso.dev>
 //! SPDX-License-Identifier: GPL-3.0-or-later
 
-import * as puppeteer from 'puppeteer'
 import * as fs from 'node:fs/promises'
+import * as readline from 'node:readline/promises'
+import * as puppeteer from 'puppeteer'
+
+const hashes = []
+
+const stdinErrors = []
+if (!process.stdin.isTTY) {
+	const stdin = readline.createInterface({
+		input: process.stdin
+	})
+	for await (const line of stdin) {
+		if (/^[0-9A-Fa-f]{64}$/.test(line)) {
+			hashes.push(line)
+		} else {
+			stdinErrors.push(`Skipping invalid stdin input: ${line}`)
+		}
+	}
+}
 
 const args = process.argv.slice(2)
-if (args.length === 0 || args.some(v => v === '--help' || v === '-h')) {
+if ((hashes.length === 0 && args.length === 0) || (args.some(v => v === '--help' || v === '-h'))) {
 	console.log(`Usage: nano-pow [OPTION]... BLOCKHASH...
 Generate work for BLOCKHASH, or multiple work values for BLOCKHASH(es)
 BLOCKHASH is a 64-character hexadecimal string. Multiple blockhashes must be separated by whitespace or line breaks.
@@ -14,6 +31,7 @@ Prints a 16-character hexadecimal work value to standard output. If using --vali
 
   -h, --help                  show this dialog
   -d, --debug                 enable additional logging output
+	-j, --json                  format output as JSON
   -e, --effort=<value>        increase demand on GPU processing
   -t, --threshold=<value>     override the minimum threshold value
   -v, --validate=<value>      check an existing work value instead of searching for one
@@ -28,18 +46,17 @@ Full documentation: <https://www.npmjs.com/package/nano-pow>
 	process.exit()
 }
 
-const hashes = []
+const inArgs = []
 while (/^[0-9A-Fa-f]{64}$/.test(args[args.length - 1] ?? '')) {
-	hashes.unshift(args.pop())
-}
-if (hashes.length === 0) {
-	console.error('Invalid block hash input')
-	process.exit(1)
+	inArgs.unshift(args.pop())
 }
+hashes.push(...inArgs)
 
 let fn = 'search'
 let work = ''
+let isJson = false
 const options = {}
+
 for (let i = 0; i < args.length; i++) {
 	switch (args[i]) {
 		case ('--validate'):
@@ -69,13 +86,27 @@ for (let i = 0; i < args.length; i++) {
 			options['debug'] = true
 			break
 		}
+		case ('--json'):
+		case ('-j'): {
+			isJson = true
+			break
+		}
+	}
+}
+
+if (options['debug']) {
+	console.log(`NanoPowCli.${fn}()`)
+	console.log(`${fn} options`, JSON.stringify(options))
+	for (const stdinErr of stdinErrors) {
+		console.warn(stdinErr)
 	}
 }
 
-if (options['debug']) console.log(`NanoPowCli.${fn}()`)
-if (options['debug']) console.log(`${fn} options`, JSON.stringify(options))
+if (hashes.length === 0) {
+	console.error('Invalid block hash input')
+	process.exit(1)
+}
 
-	;
 /**
 * Main
 */
@@ -102,10 +133,22 @@ if (options['debug']) console.log(`${fn} options`, JSON.stringify(options))
 		const output = msg.text().split(' ')
 		if (output[0] === 'cli') {
 			if (output[1] === 'exit') {
+				if (isJson) {
+					const results = await page.evaluate(() => {
+						return window.results
+					})
+					for (let i = 0; i < results.length; i++) {
+						results[i] = {
+							blockhash: hashes[i],
+							work: results[i]
+						}
+					}
+					console.log(JSON.stringify(results, null, 4))
+				}
 				const end = performance.now()
 				if (options['debug']) console.log(end - start, 'ms total |', (end - start) / hashes.length, 'ms avg')
-				process.exit()
-			} else {
+				await browser.close()
+			} else if (!isJson) {
 				console.log(output[1])
 			}
 		} else if (options['debug']) {
@@ -125,10 +168,12 @@ if (options['debug']) console.log(`${fn} options`, JSON.stringify(options))
 				createScript: string => string,
 			})
 			${NanoPow}
+			window.results = []
 			const hashes = ["${hashes.join('","')}"]
 			for (const hash of hashes) {
 				try {
 					const work = await NanoPow.${fn}(${work}hash, ${JSON.stringify(options)})
+					window.results.push(work)
 					console.log(\`cli \${work}\`)
 				} catch (err) {
 					console.error(\`cli \${err}\`)
diff --git a/nano-pow.1 b/nano-pow.1
index a804f90..aa67ac2 100644
--- a/nano-pow.1
+++ b/nano-pow.1
@@ -24,6 +24,9 @@ Show this help dialog and exit.
 \fB\-d\fR, \fB\-\-debug\fR
 Enable additional logging output.
 .TP
+\fB\-j\fR, \fB\-\-json\fR
+Format output as JSON.
+.TP
 \fB\-e\fR, \fB\-\-effort\fR=\fIEFFORT\fR
 Increase demand on GPU processing. Must be between 1 and 32 inclusive.
 .TP
@@ -47,9 +50,9 @@ $ nano-pow \fB\-t fffffe00 \-e 32 0123456789abcdef0123456789abcdef0123456789abcd
 .EE
 
 .PP
-Search for multiple nonces from a file:
+Read one or more lines of blockhashes from a file and output to another file:
 .EX
-$ nano-pow \fB$(cat /path/to/file.txt)\fR
+$ cat /path/to/file.txt | nano-pow --json > work.json
 .EE
 
 .PP
@@ -70,4 +73,5 @@ Email <bug-nano-pow@zoso.dev>.
 Copyright \(co 2025 Chris Duncan <chris@zoso.dev>
 Nano PoW documentation: <https://docs.nano.org/integration-guides/work-generation/#work-calculation-details>
 License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>
+Portions of this code are also provided under the MIT License: <https://spdx.org/licenses/MIT.html>
 .EE
diff --git a/package.json b/package.json
index fdd38b0..c754f61 100644
--- a/package.json
+++ b/package.json
@@ -8,6 +8,7 @@
 		"cash",
 		"crypto",
 		"currency",
+		"cryptocurrency",
 		"coin",
 		"nonce",
 		"pow",
@@ -32,8 +33,7 @@
 	],
 	"main": "dist/main.min.js",
 	"browser": {
-		"dist/main.min.js": true,
-		"node:worker_threads": false
+		"./dist/main.min.js": true
 	},
 	"bin": "./dist/cli.js",
 	"man": "./dist/nano-pow.1",
@@ -53,10 +53,12 @@
 		"typescript": "^5.8.2"
 	},
 	"type": "module",
-	"exports": [
-		"./dist/main.min.js",
-		"./dist/types.d.ts"
-	],
+	"exports": {
+		".": {
+			"types": "./dist/types.d.ts",
+			"default": "./dist/main.min.js"
+		}
+	},
 	"types": "./dist/types.d.ts",
 	"unpkg": "./dist/main.min.js",
 	"optionalDependencies": {
diff --git a/src/shaders/gl-draw.ts b/src/shaders/gl-draw.ts
index 2f230e8..ca113f5 100644
--- a/src/shaders/gl-draw.ts
+++ b/src/shaders/gl-draw.ts
@@ -26,6 +26,66 @@ layout(std140) uniform WORK {
 	uvec2 seed;
 };
 
+/**
+* Initialization vector defined by BLAKE2. Each vec2<u32> represents two halves
+* of the original u64 value from the reference implementation. They appear
+* reversed pairwise as defined below, but this is an illusion due to endianness:
+* the \`x\` component of the vector is the low bits and the \`y\` component is the
+* high bits, and if you laid the bits out individually, they would match the
+* little-endian 64-bit representation.
+*/
+const uvec2 BLAKE2B_IV[8] = uvec2[8](
+	uvec2(0xF3BCC908u, 0x6A09E667u),
+	uvec2(0x84CAA73Bu, 0xBB67AE85u),
+	uvec2(0xFE94F82Bu, 0x3C6EF372u),
+	uvec2(0x5F1D36F1u, 0xA54FF53Au),
+	uvec2(0xADE682D1u, 0x510E527Fu),
+	uvec2(0x2B3E6C1Fu, 0x9B05688Cu),
+	uvec2(0xFB41BD6Bu, 0x1F83D9ABu),
+	uvec2(0x137E2179u, 0x5BE0CD19u)
+);
+
+/**
+* Parameter block as defined in BLAKE2 section 2.8 and configured as follows:
+* maximal depth = 1, fanout = 1, digest byte length = 8
+*/
+const uvec2 BLAKE2B_PARAM = uvec2(0x01010008u, 0u);
+
+/**
+* Message input length which is always 40 for Nano.
+* 8 nonce bytes + 32 block hash bytes
+*/
+const uvec2 BLAKE2B_INLEN = uvec2(0x00000028u, 0u);
+
+/**
+* Finalization flag as defined in BLAKE2 section 2.4 and set to ~0 since this is
+* the final (and only) message block being hashed.
+*/
+const uvec2 BLAKE2B_FINAL = uvec2(0xFFFFFFFFu, 0xFFFFFFFFu);
+
+/**
+* Fully initialized state array that is locally copied at each thread start.
+* Application of each XOR is defined by BLAKE2 section 2.4 compression function.
+*/
+const uvec2 BLAKE2B_INIT[16] = uvec2[16](
+	BLAKE2B_IV[0u] ^ BLAKE2B_PARAM,
+	BLAKE2B_IV[1u],
+	BLAKE2B_IV[2u],
+	BLAKE2B_IV[3u],
+	BLAKE2B_IV[4u],
+	BLAKE2B_IV[5u],
+	BLAKE2B_IV[6u],
+	BLAKE2B_IV[7u],
+	BLAKE2B_IV[0u],
+	BLAKE2B_IV[1u],
+	BLAKE2B_IV[2u],
+	BLAKE2B_IV[3u],
+	BLAKE2B_IV[4u] ^ BLAKE2B_INLEN,
+	BLAKE2B_IV[5u],
+	BLAKE2B_IV[6u] ^ BLAKE2B_FINAL,
+	BLAKE2B_IV[7u]
+);
+
 // Defined separately from uint v[0].y below as the original value is required
 // to calculate the second uint32 of the digest for threshold comparison
 const uint BLAKE2B_IV32_1 = 0x6A09E667u;
@@ -37,36 +97,6 @@ const uvec4 ROTATE_16 = uvec4(16u);
 const uvec4 ROTATE_24 = uvec4(24u);
 const uvec4 ROTATE_31 = uvec4(31u);
 
-// Both buffers represent 16 uint64s as 32 uint32s
-// because that's what GLSL offers, just like Javascript
-
-// Compression buffer, intialized to 2 instances of the initialization vector
-// The following values have been modified from the BLAKE2B_IV:
-// OUTLEN is constant 8 bytes
-// v[0] ^= 0x01010000u ^ uint(OUTLEN);
-// INLEN is constant 40 bytes: work value (8) + block hash (32)
-// v[12] ^= uint(INLEN);
-// It's always the "last" compression at this INLEN
-// v[14] = ~v[14];
-const uvec2 blake2b_iv[16] = uvec2[16](
-	uvec2(0xF2BDC900u, 0x6A09E667u),
-	uvec2(0x84CAA73Bu, 0xBB67AE85u),
-	uvec2(0xFE94F82Bu, 0x3C6EF372u),
-	uvec2(0x5F1D36F1u, 0xA54FF53Au),
-	uvec2(0xADE682D1u, 0x510E527Fu),
-	uvec2(0x2B3E6C1Fu, 0x9B05688Cu),
-	uvec2(0xFB41BD6Bu, 0x1F83D9ABu),
-	uvec2(0x137E2179u, 0x5BE0CD19u),
-	uvec2(0xF3BCC908u, 0x6A09E667u),
-	uvec2(0x84CAA73Bu, 0xBB67AE85u),
-	uvec2(0xFE94F82Bu, 0x3C6EF372u),
-	uvec2(0x5F1D36F1u, 0xA54FF53Au),
-	uvec2(0xADE682F9u, 0x510E527Fu),
-	uvec2(0x2B3E6C1Fu, 0x9B05688Cu),
-	uvec2(0x04BE4294u, 0xE07C2654u),
-	uvec2(0x137E2179u, 0x5BE0CD19u)
-);
-
 // Iterated initialization vector
 uvec2 v[16];
 
@@ -120,7 +150,7 @@ void main() {
 	m[4u] = uvec2(blockhash[6u], blockhash[7u]);
 
 	// Reset v
-	v = blake2b_iv;
+	v = BLAKE2B_INIT;
 
 	// Twelve rounds of G mixing
 
@@ -198,7 +228,7 @@ void main() {
 
 	// Pixel data set from work seed values
 	// Finalize digest from high bits, low bits can be safely ignored
-	if ((BLAKE2B_IV32_1 ^ v[0u].y ^ v[8u].y) >= threshold && (search || uvec2(gl_FragCoord) == uvec2(0u))) {
+	if ((BLAKE2B_INIT[0u].y ^ v[0u].y ^ v[8u].y) >= threshold && (search || uvec2(gl_FragCoord) == uvec2(0u))) {
 		nonce = uvec4(1u, m[0u].y, m[0u].x, (uint(gl_FragCoord.x) << 16u) | uint(gl_FragCoord.y));
 	}
 
diff --git a/test-seeds b/test-blockhashes
similarity index 100%
rename from test-seeds
rename to test-blockhashes
-- 
2.34.1