From: Chris Duncan Date: Tue, 14 Jan 2025 17:50:15 +0000 (-0800) Subject: Replace some 32-bit scalar rotations with vector rotations. X-Git-Tag: v2.0.0~125 X-Git-Url: https://zoso.dev/?a=commitdiff_plain;h=e658330eed9429b29ec4984c587d36577b2af2a7;p=nano-pow.git Replace some 32-bit scalar rotations with vector rotations. --- diff --git a/src/shaders/compute.wgsl b/src/shaders/compute.wgsl index 48cfd75..2349da9 100644 --- a/src/shaders/compute.wgsl +++ b/src/shaders/compute.wgsl @@ -145,10 +145,9 @@ fn main(@builtin(global_invocation_id) id: vec3) { v1 = o1; // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v0; - xor1 = v25 ^ v1; - v24 = xor1; - v25 = xor0; + v_2425 = v_2425 ^ v_01; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_1617.x = v16; @@ -878,10 +877,9 @@ fn main(@builtin(global_invocation_id) id: vec3) { // // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v0; - xor1 = v25 ^ v1; - v24 = xor1; - v25 = xor0; + v_2425 = v_2425 ^ v_01; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_1617.x = v16; @@ -1608,10 +1606,9 @@ fn main(@builtin(global_invocation_id) id: vec3) { // // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v0; - xor1 = v25 ^ v1; - v24 = xor1; - v25 = xor0; + v_2425 = v_2425 ^ v_01; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_1617.x = v16; @@ -2329,10 +2326,9 @@ fn main(@builtin(global_invocation_id) id: vec3) { // // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v0; - xor1 = v25 ^ v1; - v24 = xor1; - v25 = xor0; + v_2425 = v_2425 ^ v_01; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_1617.x = v16; @@ -3059,10 +3055,9 @@ fn main(@builtin(global_invocation_id) id: vec3) { // // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v0; - xor1 = v25 ^ v1; - v24 = xor1; - v25 = xor0; + v_2425 = v_2425 ^ v_01; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_1617.x = v16; @@ -3804,10 +3799,9 @@ fn main(@builtin(global_invocation_id) id: vec3) { v1 = o1; // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v0; - xor1 = v25 ^ v1; - v24 = xor1; - v25 = xor0; + v_2425 = v_2425 ^ v_01; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_1617.x = v16; @@ -4537,10 +4531,9 @@ fn main(@builtin(global_invocation_id) id: vec3) { // // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v0; - xor1 = v25 ^ v1; - v24 = xor1; - v25 = xor0; + v_2425 = v_2425 ^ v_01; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_1617.x = v16; @@ -5267,10 +5260,9 @@ fn main(@builtin(global_invocation_id) id: vec3) { // // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v0; - xor1 = v25 ^ v1; - v24 = xor1; - v25 = xor0; + v_2425 = v_2425 ^ v_01; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_1617.x = v16; @@ -5997,10 +5989,9 @@ fn main(@builtin(global_invocation_id) id: vec3) { // // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v0; - xor1 = v25 ^ v1; - v24 = xor1; - v25 = xor0; + v_2425 = v_2425 ^ v_01; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_1617.x = v16; @@ -6727,10 +6718,9 @@ fn main(@builtin(global_invocation_id) id: vec3) { // // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v0; - xor1 = v25 ^ v1; - v24 = xor1; - v25 = xor0; + v_2425 = v_2425 ^ v_01; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_1617.x = v16; @@ -7463,10 +7453,9 @@ fn main(@builtin(global_invocation_id) id: vec3) { v1 = o1; // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v0; - xor1 = v25 ^ v1; - v24 = xor1; - v25 = xor0; + v_2425 = v_2425 ^ v_01; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_1617.x = v16; @@ -8196,10 +8185,9 @@ fn main(@builtin(global_invocation_id) id: vec3) { // // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor0 = v24 ^ v0; - xor1 = v25 ^ v1; - v24 = xor1; - v25 = xor0; + v_2425 = v_2425 ^ v_01; + v24 = v_2425.y; + v25 = v_2425.x; // c = c + d v_1617.x = v16;