From: Chris Duncan Date: Tue, 14 Jan 2025 21:41:45 +0000 (-0800) Subject: The 32-bit rotate was not correctly or completely implemented like the other rotation... X-Git-Url: https://zoso.dev/?a=commitdiff_plain;h=7a36229a8f1c6125c54dcc2f25c688e94af640e2;p=nano-pow.git The 32-bit rotate was not correctly or completely implemented like the other rotations due to getting fooled by the simplicity. Start fixing starting with v[24-25]. --- diff --git a/src/shaders/compute.wgsl b/src/shaders/compute.wgsl index 0808814..b89fe92 100644 --- a/src/shaders/compute.wgsl +++ b/src/shaders/compute.wgsl @@ -108,9 +108,10 @@ fn main(@builtin(global_invocation_id) id: vec3) { // d = rotr64(d ^ a, 32) v24 = v_2425.x; v25 = v_2425.y; - v_2425 = v_2425 ^ v_01; - v24 = v_2425.y; - v25 = v_2425.x; + xor = v_2425 ^ v_01; + v_2425 = vec2(xor.y, xor.x); + v24 = v_2425.x; + v25 = v_2425.y; // c = c + d v_2425.x = v24; @@ -574,9 +575,10 @@ fn main(@builtin(global_invocation_id) id: vec3) { // d = rotr64(d ^ a, 32) v24 = v_2425.x; v25 = v_2425.y; - v_2425 = v_2425 ^ v_01; - v24 = v_2425.y; - v25 = v_2425.x; + xor = v_2425 ^ v_01; + v_2425 = vec2(xor.y, xor.x); + v24 = v_2425.x; + v25 = v_2425.y; // c = c + d @@ -1364,9 +1366,10 @@ fn main(@builtin(global_invocation_id) id: vec3) { // d = rotr64(d ^ a, 32) v24 = v_2425.x; v25 = v_2425.y; - v_2425 = v_2425 ^ v_01; - v24 = v_2425.y; - v25 = v_2425.x; + xor = v_2425 ^ v_01; + v_2425 = vec2(xor.y, xor.x); + v24 = v_2425.x; + v25 = v_2425.y; // c = c + d @@ -2154,9 +2157,10 @@ fn main(@builtin(global_invocation_id) id: vec3) { // d = rotr64(d ^ a, 32) v24 = v_2425.x; v25 = v_2425.y; - v_2425 = v_2425 ^ v_01; - v24 = v_2425.y; - v25 = v_2425.x; + xor = v_2425 ^ v_01; + v_2425 = vec2(xor.y, xor.x); + v24 = v_2425.x; + v25 = v_2425.y; // c = c + d @@ -2943,9 +2947,10 @@ fn main(@builtin(global_invocation_id) id: vec3) { // d = rotr64(d ^ a, 32) v24 = v_2425.x; v25 = v_2425.y; - v_2425 = v_2425 ^ v_01; - v24 = v_2425.y; - v25 = v_2425.x; + xor = v_2425 ^ v_01; + v_2425 = vec2(xor.y, xor.x); + v24 = v_2425.x; + v25 = v_2425.y; // c = c + d @@ -3732,9 +3737,10 @@ fn main(@builtin(global_invocation_id) id: vec3) { // d = rotr64(d ^ a, 32) v24 = v_2425.x; v25 = v_2425.y; - v_2425 = v_2425 ^ v_01; - v24 = v_2425.y; - v25 = v_2425.x; + xor = v_2425 ^ v_01; + v_2425 = vec2(xor.y, xor.x); + v24 = v_2425.x; + v25 = v_2425.y; // c = c + d @@ -4517,9 +4523,10 @@ fn main(@builtin(global_invocation_id) id: vec3) { // d = rotr64(d ^ a, 32) v24 = v_2425.x; v25 = v_2425.y; - v_2425 = v_2425 ^ v_01; - v24 = v_2425.y; - v25 = v_2425.x; + xor = v_2425 ^ v_01; + v_2425 = vec2(xor.y, xor.x); + v24 = v_2425.x; + v25 = v_2425.y; // c = c + d @@ -5305,9 +5312,10 @@ fn main(@builtin(global_invocation_id) id: vec3) { // d = rotr64(d ^ a, 32) v24 = v_2425.x; v25 = v_2425.y; - v_2425 = v_2425 ^ v_01; - v24 = v_2425.y; - v25 = v_2425.x; + xor = v_2425 ^ v_01; + v_2425 = vec2(xor.y, xor.x); + v24 = v_2425.x; + v25 = v_2425.y; // c = c + d @@ -6093,9 +6101,10 @@ fn main(@builtin(global_invocation_id) id: vec3) { // d = rotr64(d ^ a, 32) v24 = v_2425.x; v25 = v_2425.y; - v_2425 = v_2425 ^ v_01; - v24 = v_2425.y; - v25 = v_2425.x; + xor = v_2425 ^ v_01; + v_2425 = vec2(xor.y, xor.x); + v24 = v_2425.x; + v25 = v_2425.y; // c = c + d @@ -6881,9 +6890,10 @@ fn main(@builtin(global_invocation_id) id: vec3) { // d = rotr64(d ^ a, 32) v24 = v_2425.x; v25 = v_2425.y; - v_2425 = v_2425 ^ v_01; - v24 = v_2425.y; - v25 = v_2425.x; + xor = v_2425 ^ v_01; + v_2425 = vec2(xor.y, xor.x); + v24 = v_2425.x; + v25 = v_2425.y; // c = c + d @@ -7671,9 +7681,10 @@ fn main(@builtin(global_invocation_id) id: vec3) { // d = rotr64(d ^ a, 32) v24 = v_2425.x; v25 = v_2425.y; - v_2425 = v_2425 ^ v_01; - v24 = v_2425.y; - v25 = v_2425.x; + xor = v_2425 ^ v_01; + v_2425 = vec2(xor.y, xor.x); + v24 = v_2425.x; + v25 = v_2425.y; // c = c + d @@ -8455,9 +8466,10 @@ fn main(@builtin(global_invocation_id) id: vec3) { // d = rotr64(d ^ a, 32) v24 = v_2425.x; v25 = v_2425.y; - v_2425 = v_2425 ^ v_01; - v24 = v_2425.y; - v25 = v_2425.x; + xor = v_2425 ^ v_01; + v_2425 = vec2(xor.y, xor.x); + v24 = v_2425.x; + v25 = v_2425.y; // c = c + d