]> zoso.dev Git - libnemo.git/commitdiff
Combine work bytes for upload to GPU and split them in the shader. Clear performance...
authorChris Duncan <chris@zoso.dev>
Sat, 21 Dec 2024 15:25:35 +0000 (07:25 -0800)
committerChris Duncan <chris@zoso.dev>
Sat, 21 Dec 2024 15:25:35 +0000 (07:25 -0800)
src/lib/workers/powgl.ts

index 973213d99a74989a4bb80594ac080aa9d6b6a939..af0f312c1b1c8956fe5a42e76332f255e06edee6 100644 (file)
@@ -62,12 +62,6 @@ precision highp int;
 in vec2 uv_pos;
 out vec4 fragColor;
 
-// Random work values
-// First 2 bytes will be overwritten by texture pixel position
-// Second 2 bytes will be modified if the canvas size is greater than 256x256
-uniform uvec4 u_work0;
-// Last 4 bytes remain as generated externally
-uniform uvec4 u_work1;
 // Precalculated block hash components
 uniform uint blockHash[8];
 // Threshold is 0xfffffff8 for send/change blocks and 0xfffffe for all else
@@ -75,6 +69,12 @@ uniform uint threshold;
 // Defines canvas size
 uniform float workload;
 
+// Random work values
+// First 2 bytes will be overwritten by texture pixel position
+// Second 2 bytes will be modified if the canvas size is greater than 256x256
+// Last 4 bytes remain as generated externally
+uniform uvec4 work[2];
+
 // Defined separately from uint v[32] below as the original value is required
 // to calculate the second uint32 of the digest for threshold comparison
 #define BLAKE2B_IV32_1 0x6A09E667u
@@ -174,6 +174,8 @@ void B2B_G (int a, int b, int c, int d, int ix, int iy) {
 
 void main() {
        int i;
+       uvec4 u_work0 = work[0];
+       uvec4 u_work1 = work[1];
        uint uv_x = uint(uv_pos.x * workload);
        uint uv_y = uint(uv_pos.y * workload);
        uint x_pos = uv_x % 256u;
@@ -221,7 +223,7 @@ void main() {
 }`
 
        /** Used to set canvas size. Must be a multiple of 256. */
-       static #WORKLOAD: number = 256 * Math.max(1, Math.floor(navigator.hardwareConcurrency) / 2)
+       static #WORKLOAD: number = 256 * Math.max(1, Math.floor(navigator.hardwareConcurrency))
 
        static #hexify (arr: number[] | Uint8Array): string {
                let out = ''
@@ -233,8 +235,7 @@ void main() {
 
        static #gl: WebGL2RenderingContext | null
        static #program: WebGLProgram | null
-       static #work0Location: WebGLUniformLocation | null
-       static #work1Location: WebGLUniformLocation | null
+       static #workLocation: WebGLUniformLocation | null
        static #blockHashLocation: WebGLUniformLocation | null
        static #thresholdLocation: WebGLUniformLocation | null
        static #workloadLocation: WebGLUniformLocation | null
@@ -303,8 +304,7 @@ void main() {
                this.#pixels = new Uint8Array(this.#gl.drawingBufferWidth * this.#gl.drawingBufferHeight * 4)
                this.#query = this.#gl.createQuery()
 
-               this.#work0Location = this.#gl.getUniformLocation(this.#program, 'u_work0')
-               this.#work1Location = this.#gl.getUniformLocation(this.#program, 'u_work1')
+               this.#workLocation = this.#gl.getUniformLocation(this.#program, 'work')
                this.#blockHashLocation = this.#gl.getUniformLocation(this.#program, "blockHash")
                this.#thresholdLocation = this.#gl.getUniformLocation(this.#program, "threshold")
                this.#workloadLocation = this.#gl.getUniformLocation(this.#program, "workload")
@@ -326,8 +326,7 @@ void main() {
                Pow.#gl.uniform1uiv(Pow.#blockHashLocation, hashBytes)
                Pow.#gl.uniform1ui(Pow.#thresholdLocation, threshold)
                Pow.#gl.uniform1f(Pow.#workloadLocation, Pow.#WORKLOAD - 1)
-               const work0 = new Uint8Array(4)
-               const work1 = new Uint8Array(4)
+               const work = new Uint8Array(8)
 
                // Draw output until success or progressCallback says to stop
                let n = 0
@@ -337,12 +336,10 @@ void main() {
                        if (Pow.#gl == null) throw new Error('WebGL 2 is required')
                        if (Pow.#query == null) throw new Error('WebGL 2 is required to run queries')
                        performance.mark('start')
-                       crypto.getRandomValues(work0)
-                       crypto.getRandomValues(work1)
+                       crypto.getRandomValues(work)
                        Pow.#gl.clear(Pow.#gl.COLOR_BUFFER_BIT)
 
-                       Pow.#gl.uniform4uiv(Pow.#work0Location, work0)
-                       Pow.#gl.uniform4uiv(Pow.#work1Location, work1)
+                       Pow.#gl.uniform4uiv(Pow.#workLocation, work)
 
                        Pow.#gl.beginQuery(Pow.#gl.ANY_SAMPLES_PASSED_CONSERVATIVE, Pow.#query)
                        Pow.#gl.drawArrays(Pow.#gl.TRIANGLES, 0, 6)
@@ -363,6 +360,7 @@ void main() {
                                        performance.mark('end')
                                        frameTimes.push(performance.measure('draw', 'start', 'end').duration)
                                        performance.clearMarks()
+                                       performance.clearMeasures()
                                        // No valid nonce found, start the next draw call
                                        requestAnimationFrame(draw)
                                }
@@ -392,11 +390,11 @@ void main() {
                                        console.log(`Average: ${sum / count} ms`)
                                        console.log(`Harmonic: ${count / reciprocals} ms`)
                                        console.log(`Geometric: ${Math.pow(product, 1 / count)} ms`)
-                                       const hex = Pow.#hexify(work1) + Pow.#hexify([
+                                       const hex = Pow.#hexify(work.subarray(4, 8)) + Pow.#hexify([
                                                Pow.#pixels[i + 2],
                                                Pow.#pixels[i + 3],
-                                               work0[2] ^ (Pow.#pixels[i] - 1),
-                                               work0[3] ^ (Pow.#pixels[i + 1] - 1)
+                                               work[2] ^ (Pow.#pixels[i] - 1),
+                                               work[3] ^ (Pow.#pixels[i + 1] - 1)
                                        ])
                                        // Return the work value with the custom bits
                                        typeof callback === 'function' && callback(hex)