From: Chris Duncan Date: Tue, 14 Jan 2025 21:03:10 +0000 (-0800) Subject: Start replacing m scalar operations. X-Git-Url: https://zoso.dev/?a=commitdiff_plain;h=4cd24b5466b3a17f7eccabf1607a7588cf6f83ac;p=nano-pow.git Start replacing m scalar operations. --- diff --git a/src/shaders/compute.wgsl b/src/shaders/compute.wgsl index f213234..a93a421 100644 --- a/src/shaders/compute.wgsl +++ b/src/shaders/compute.wgsl @@ -170,11 +170,7 @@ fn main(@builtin(global_invocation_id) id: vec3) { // a = a + m[sigma[r][2*i+1]] - o0 = v_01.x + m2; - o1 = v_01.y + m3; - o1 = o1 + select(0u, 1u, o0 < v_01.x); - v_01.x = o0; - v_01.y = o1; + v_01 = v_01 + vec2(m2, m3) + select(vec2(0u), vec2(0u, 1u), v_01.x + m2 < v_01.x); @@ -220,20 +216,16 @@ fn main(@builtin(global_invocation_id) id: vec3) { // a = a + b v_23.x = v2; v_23.y = v3; - - v_23 = v_23 + v_1011 + select(vec2(0u), vec2(0u, 1u), v_23.x + v_1011.x < v_23.x); v2 = v_23.x; v3 = v_23.y; - - // a = a + m[sigma[r][2*i+0]] - o0 = v2 + m4; - o1 = v3 + m5; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_23 = v_23 + vec2(m4, m5) + select(vec2(0u), vec2(0u, 1u), v_23.x + m4 < v_23.x); + v2 = v_23.x; + v3 = v_23.y; // d = rotr64(d ^ a, 32) v_23.x = v2; @@ -277,11 +269,11 @@ fn main(@builtin(global_invocation_id) id: vec3) { // a = a + m[sigma[r][2*i+1]] - o0 = v2 + m6; - o1 = v3 + m7; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_23 = v_23 + vec2(m6, m7) + select(vec2(0u), vec2(0u, 1u), v_23.x + m6 < v_23.x); + v2 = v_23.x; + v3 = v_23.y; // d = rotr64(d ^ a, 16) v_23.x = v2; @@ -334,11 +326,11 @@ fn main(@builtin(global_invocation_id) id: vec3) { // a = a + m[sigma[r][2*i+0]] - o0 = v4 + m8; - o1 = v5 + m9; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_45 = v_45 + vec2(m8, m9) + select(vec2(0u), vec2(0u, 1u), v_45.x + m8 < v_45.x); + v4 = v_45.x; + v5 = v_45.y; // d = rotr64(d ^ a, 32) v_45.x = v4; @@ -1011,11 +1003,11 @@ fn main(@builtin(global_invocation_id) id: vec3) { // a = a + m[sigma[r][2*i+0]] - o0 = v2 + m8; - o1 = v3 + m9; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_23 = v_23 + vec2(m8, m9) + select(vec2(0u), vec2(0u, 1u), v_23.x + m8 < v_23.x); + v2 = v_23.x; + v3 = v_23.y; // d = rotr64(d ^ a, 32) v_23.x = v2; @@ -1304,11 +1296,8 @@ fn main(@builtin(global_invocation_id) id: vec3) { // a = a + m[sigma[r][2*i+0]] - o0 = v_01.x + m2; - o1 = v_01.y + m3; - o1 = o1 + select(0u, 1u, o0 < v_01.x); - v_01.x = o0; - v_01.y = o1; + v_01 = v_01 + vec2(m2, m3) + select(vec2(0u), vec2(0u, 1u), v_01.x + m2 < v_01.x); + @@ -1453,11 +1442,11 @@ fn main(@builtin(global_invocation_id) id: vec3) { // a = a + m[sigma[r][2*i+1]] - o0 = v2 + m4; - o1 = v3 + m5; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_23 = v_23 + vec2(m4, m5) + select(vec2(0u), vec2(0u, 1u), v_23.x + m4 < v_23.x); + v2 = v_23.x; + v3 = v_23.y; // d = rotr64(d ^ a, 16) v_23.x = v2; @@ -2195,11 +2184,11 @@ fn main(@builtin(global_invocation_id) id: vec3) { // a = a + m[sigma[r][2*i+0]] - o0 = v2 + m6; - o1 = v3 + m7; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_23 = v_23 + vec2(m6, m7) + select(vec2(0u), vec2(0u, 1u), v_23.x + m6 < v_23.x); + v2 = v_23.x; + v3 = v_23.y; // d = rotr64(d ^ a, 32) v_23.x = v2; @@ -2593,11 +2582,11 @@ fn main(@builtin(global_invocation_id) id: vec3) { // a = a + m[sigma[r][2*i+0]] - o0 = v2 + m6; - o1 = v3 + m7; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_23 = v_23 + vec2(m6, m7) + select(vec2(0u), vec2(0u, 1u), v_23.x + m6 < v_23.x); + v2 = v_23.x; + v3 = v_23.y; // d = rotr64(d ^ a, 32) v_23.x = v2; @@ -3088,11 +3077,11 @@ fn main(@builtin(global_invocation_id) id: vec3) { // a = a + m[sigma[r][2*i+0]] - o0 = v4 + m8; - o1 = v5 + m9; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_45 = v_45 + vec2(m8, m9) + select(vec2(0u), vec2(0u, 1u), v_45.x + m8 < v_45.x); + v4 = v_45.x; + v5 = v_45.y; // d = rotr64(d ^ a, 32) v_45.x = v4; @@ -3536,11 +3525,11 @@ fn main(@builtin(global_invocation_id) id: vec3) { // a = a + m[sigma[r][2*i+1]] - o0 = v4 + m8; - o1 = v5 + m9; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_45 = v_45 + vec2(m8, m9) + select(vec2(0u), vec2(0u, 1u), v_45.x + m8 < v_45.x); + v4 = v_45.x; + v5 = v_45.y; // d = rotr64(d ^ a, 16) v_45.x = v4; @@ -3730,11 +3719,7 @@ fn main(@builtin(global_invocation_id) id: vec3) { // a = a + m[sigma[r][2*i+1]] - o0 = v_01.x + m2; - o1 = v_01.y + m3; - o1 = o1 + select(0u, 1u, o0 < v_01.x); - v_01.x = o0; - v_01.y = o1; + v_01 = v_01 + vec2(m2, m3) + select(vec2(0u), vec2(0u, 1u), v_01.x + m2 < v_01.x); @@ -5415,11 +5400,11 @@ fn main(@builtin(global_invocation_id) id: vec3) { // a = a + m[sigma[r][2*i+1]] - o0 = v2 + m6; - o1 = v3 + m7; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_23 = v_23 + vec2(m6, m7) + select(vec2(0u), vec2(0u, 1u), v_23.x + m6 < v_23.x); + v2 = v_23.x; + v3 = v_23.y; // d = rotr64(d ^ a, 16) v_23.x = v2; @@ -6207,11 +6192,11 @@ fn main(@builtin(global_invocation_id) id: vec3) { // a = a + m[sigma[r][2*i+1]] - o0 = v2 + m8; - o1 = v3 + m9; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_23 = v_23 + vec2(m8, m9) + select(vec2(0u), vec2(0u, 1u), v_23.x + m8 < v_23.x); + v2 = v_23.x; + v3 = v_23.y; // d = rotr64(d ^ a, 16) v_23.x = v2; @@ -7100,11 +7085,11 @@ fn main(@builtin(global_invocation_id) id: vec3) { // a = a + m[sigma[r][2*i+1]] - o0 = v4 + m8; - o1 = v5 + m9; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_45 = v_45 + vec2(m8, m9) + select(vec2(0u), vec2(0u, 1u), v_45.x + m8 < v_45.x); + v4 = v_45.x; + v5 = v_45.y; // d = rotr64(d ^ a, 16) v_45.x = v4; @@ -7399,11 +7384,11 @@ fn main(@builtin(global_invocation_id) id: vec3) { // a = a + m[sigma[r][2*i+1]] - o0 = v2 + m8; - o1 = v3 + m9; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_23 = v_23 + vec2(m8, m9) + select(vec2(0u), vec2(0u, 1u), v_23.x + m8 < v_23.x); + v2 = v_23.x; + v3 = v_23.y; // d = rotr64(d ^ a, 16) v_23.x = v2; @@ -8101,11 +8086,7 @@ fn main(@builtin(global_invocation_id) id: vec3) { // a = a + m[sigma[r][2*i+1]] - o0 = v_01.x + m2; - o1 = v_01.y + m3; - o1 = o1 + select(0u, 1u, o0 < v_01.x); - v_01.x = o0; - v_01.y = o1; + v_01 = v_01 + vec2(m2, m3) + select(vec2(0u), vec2(0u, 1u), v_01.x + m2 < v_01.x); @@ -8160,11 +8141,11 @@ fn main(@builtin(global_invocation_id) id: vec3) { // a = a + m[sigma[r][2*i+0]] - o0 = v2 + m4; - o1 = v3 + m5; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_23 = v_23 + vec2(m4, m5) + select(vec2(0u), vec2(0u, 1u), v_23.x + m4 < v_23.x); + v2 = v_23.x; + v3 = v_23.y; // d = rotr64(d ^ a, 32) v_23.x = v2; @@ -8208,11 +8189,11 @@ fn main(@builtin(global_invocation_id) id: vec3) { // a = a + m[sigma[r][2*i+1]] - o0 = v2 + m6; - o1 = v3 + m7; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_23 = v_23 + vec2(m6, m7) + select(vec2(0u), vec2(0u, 1u), v_23.x + m6 < v_23.x); + v2 = v_23.x; + v3 = v_23.y; // d = rotr64(d ^ a, 16) v_23.x = v2; @@ -8265,11 +8246,11 @@ fn main(@builtin(global_invocation_id) id: vec3) { // a = a + m[sigma[r][2*i+0]] - o0 = v4 + m8; - o1 = v5 + m9; - o1 = o1 + select(0u, 1u, o0 < v4); - v4 = o0; - v5 = o1; + v_45.x = v4; + v_45.y = v5; + v_45 = v_45 + vec2(m8, m9) + select(vec2(0u), vec2(0u, 1u), v_45.x + m8 < v_45.x); + v4 = v_45.x; + v5 = v_45.y; // d = rotr64(d ^ a, 32) v_45.x = v4; @@ -8942,11 +8923,11 @@ fn main(@builtin(global_invocation_id) id: vec3) { // a = a + m[sigma[r][2*i+0]] - o0 = v2 + m8; - o1 = v3 + m9; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_23 = v_23 + vec2(m8, m9) + select(vec2(0u), vec2(0u, 1u), v_23.x + m8 < v_23.x); + v2 = v_23.x; + v3 = v_23.y; // d = rotr64(d ^ a, 32) v_23.x = v2; @@ -9235,11 +9216,7 @@ fn main(@builtin(global_invocation_id) id: vec3) { // a = a + m[sigma[r][2*i+0]] - o0 = v_01.x + m2; - o1 = v_01.y + m3; - o1 = o1 + select(0u, 1u, o0 < v_01.x); - v_01.x = o0; - v_01.y = o1; + v_01 = v_01 + vec2(m2, m3) + select(vec2(0u), vec2(0u, 1u), v_01.x + m2 < v_01.x); @@ -9384,11 +9361,11 @@ fn main(@builtin(global_invocation_id) id: vec3) { // a = a + m[sigma[r][2*i+1]] - o0 = v2 + m4; - o1 = v3 + m5; - o1 = o1 + select(0u, 1u, o0 < v2); - v2 = o0; - v3 = o1; + v_23.x = v2; + v_23.y = v3; + v_23 = v_23 + vec2(m4, m5) + select(vec2(0u), vec2(0u, 1u), v_23.x + m4 < v_23.x); + v2 = v_23.x; + v3 = v_23.y; // d = rotr64(d ^ a, 16) v_23.x = v2;