From: Chris Duncan Date: Tue, 14 Jan 2025 18:47:56 +0000 (-0800) Subject: Replace more 32-bit scalar rotations with vector rotations. X-Git-Tag: v2.0.0~119 X-Git-Url: https://zoso.dev/?a=commitdiff_plain;h=16c60de6f5ac874dc1c12539793a6c8c81c06a03;p=nano-pow.git Replace more 32-bit scalar rotations with vector rotations. --- diff --git a/src/shaders/compute.wgsl b/src/shaders/compute.wgsl index b26bb20..ad68c43 100644 --- a/src/shaders/compute.wgsl +++ b/src/shaders/compute.wgsl @@ -608,10 +608,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v3 = o1; // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v2; - xor1 = v25 ^ v3; - v24 = xor1; - v25 = xor0; + v_23.x = v2; + v_23.y = v3; + v_2425.x = v24; + v_2425.y = v25; + v_2425 = v_2425 ^ v_23; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_2223.x = v22; @@ -1357,10 +1360,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { v3 = o1; // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v2; - xor1 = v25 ^ v3; - v24 = xor1; - v25 = xor0; + v_23.x = v2; + v_23.y = v3; + v_2425.x = v24; + v_2425.y = v25; + v_2425 = v_2425 ^ v_23; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_2223.x = v22; @@ -2091,10 +2097,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { v3 = o1; // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v2; - xor1 = v25 ^ v3; - v24 = xor1; - v25 = xor0; + v_23.x = v2; + v_23.y = v3; + v_2425.x = v24; + v_2425.y = v25; + v_2425 = v_2425 ^ v_23; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_2223.x = v22; @@ -2834,10 +2843,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v3 = o1; // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v2; - xor1 = v25 ^ v3; - v24 = xor1; - v25 = xor0; + v_23.x = v2; + v_23.y = v3; + v_2425.x = v24; + v_2425.y = v25; + v_2425 = v_2425 ^ v_23; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_2223.x = v22; @@ -3583,10 +3595,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v3 = o1; // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v2; - xor1 = v25 ^ v3; - v24 = xor1; - v25 = xor0; + v_23.x = v2; + v_23.y = v3; + v_2425.x = v24; + v_2425.y = v25; + v_2425 = v_2425 ^ v_23; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_2223.x = v22; @@ -4335,10 +4350,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v3 = o1; // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v2; - xor1 = v25 ^ v3; - v24 = xor1; - v25 = xor0; + v_23.x = v2; + v_23.y = v3; + v_2425.x = v24; + v_2425.y = v25; + v_2425 = v_2425 ^ v_23; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_2223.x = v22; @@ -5081,10 +5099,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v3 = o1; // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v2; - xor1 = v25 ^ v3; - v24 = xor1; - v25 = xor0; + v_23.x = v2; + v_23.y = v3; + v_2425.x = v24; + v_2425.y = v25; + v_2425 = v_2425 ^ v_23; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_2223.x = v22; @@ -5824,10 +5845,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v3 = o1; // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v2; - xor1 = v25 ^ v3; - v24 = xor1; - v25 = xor0; + v_23.x = v2; + v_23.y = v3; + v_2425.x = v24; + v_2425.y = v25; + v_2425 = v_2425 ^ v_23; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_2223.x = v22; @@ -6567,10 +6591,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v3 = o1; // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v2; - xor1 = v25 ^ v3; - v24 = xor1; - v25 = xor0; + v_23.x = v2; + v_23.y = v3; + v_2425.x = v24; + v_2425.y = v25; + v_2425 = v_2425 ^ v_23; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_2223.x = v22; @@ -7310,10 +7337,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v3 = o1; // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v2; - xor1 = v25 ^ v3; - v24 = xor1; - v25 = xor0; + v_23.x = v2; + v_23.y = v3; + v_2425.x = v24; + v_2425.y = v25; + v_2425 = v_2425 ^ v_23; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_2223.x = v22; @@ -8056,10 +8086,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v3 = o1; // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v2; - xor1 = v25 ^ v3; - v24 = xor1; - v25 = xor0; + v_23.x = v2; + v_23.y = v3; + v_2425.x = v24; + v_2425.y = v25; + v_2425 = v_2425 ^ v_23; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_2223.x = v22; @@ -8805,10 +8838,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { v3 = o1; // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v2; - xor1 = v25 ^ v3; - v24 = xor1; - v25 = xor0; + v_23.x = v2; + v_23.y = v3; + v_2425.x = v24; + v_2425.y = v25; + v_2425 = v_2425 ^ v_23; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_2223.x = v22;