From 7a36229a8f1c6125c54dcc2f25c688e94af640e2 Mon Sep 17 00:00:00 2001 From: Chris Duncan Date: Tue, 14 Jan 2025 13:41:45 -0800 Subject: [PATCH] The 32-bit rotate was not correctly or completely implemented like the other rotations due to getting fooled by the simplicity. Start fixing starting with v[24-25]. --- src/shaders/compute.wgsl | 84 +++++++++++++++++++++++----------------- 1 file changed, 48 insertions(+), 36 deletions(-) diff --git a/src/shaders/compute.wgsl b/src/shaders/compute.wgsl index 0808814..b89fe92 100644 --- a/src/shaders/compute.wgsl +++ b/src/shaders/compute.wgsl @@ -108,9 +108,10 @@ fn main(@builtin(global_invocation_id) id: vec3) { // d = rotr64(d ^ a, 32) v24 = v_2425.x; v25 = v_2425.y; - v_2425 = v_2425 ^ v_01; - v24 = v_2425.y; - v25 = v_2425.x; + xor = v_2425 ^ v_01; + v_2425 = vec2(xor.y, xor.x); + v24 = v_2425.x; + v25 = v_2425.y; // c = c + d v_2425.x = v24; @@ -574,9 +575,10 @@ fn main(@builtin(global_invocation_id) id: vec3) { // d = rotr64(d ^ a, 32) v24 = v_2425.x; v25 = v_2425.y; - v_2425 = v_2425 ^ v_01; - v24 = v_2425.y; - v25 = v_2425.x; + xor = v_2425 ^ v_01; + v_2425 = vec2(xor.y, xor.x); + v24 = v_2425.x; + v25 = v_2425.y; // c = c + d @@ -1364,9 +1366,10 @@ fn main(@builtin(global_invocation_id) id: vec3) { // d = rotr64(d ^ a, 32) v24 = v_2425.x; v25 = v_2425.y; - v_2425 = v_2425 ^ v_01; - v24 = v_2425.y; - v25 = v_2425.x; + xor = v_2425 ^ v_01; + v_2425 = vec2(xor.y, xor.x); + v24 = v_2425.x; + v25 = v_2425.y; // c = c + d @@ -2154,9 +2157,10 @@ fn main(@builtin(global_invocation_id) id: vec3) { // d = rotr64(d ^ a, 32) v24 = v_2425.x; v25 = v_2425.y; - v_2425 = v_2425 ^ v_01; - v24 = v_2425.y; - v25 = v_2425.x; + xor = v_2425 ^ v_01; + v_2425 = vec2(xor.y, xor.x); + v24 = v_2425.x; + v25 = v_2425.y; // c = c + d @@ -2943,9 +2947,10 @@ fn main(@builtin(global_invocation_id) id: vec3) { // d = rotr64(d ^ a, 32) v24 = v_2425.x; v25 = v_2425.y; - v_2425 = v_2425 ^ v_01; - v24 = v_2425.y; - v25 = v_2425.x; + xor = v_2425 ^ v_01; + v_2425 = vec2(xor.y, xor.x); + v24 = v_2425.x; + v25 = v_2425.y; // c = c + d @@ -3732,9 +3737,10 @@ fn main(@builtin(global_invocation_id) id: vec3) { // d = rotr64(d ^ a, 32) v24 = v_2425.x; v25 = v_2425.y; - v_2425 = v_2425 ^ v_01; - v24 = v_2425.y; - v25 = v_2425.x; + xor = v_2425 ^ v_01; + v_2425 = vec2(xor.y, xor.x); + v24 = v_2425.x; + v25 = v_2425.y; // c = c + d @@ -4517,9 +4523,10 @@ fn main(@builtin(global_invocation_id) id: vec3) { // d = rotr64(d ^ a, 32) v24 = v_2425.x; v25 = v_2425.y; - v_2425 = v_2425 ^ v_01; - v24 = v_2425.y; - v25 = v_2425.x; + xor = v_2425 ^ v_01; + v_2425 = vec2(xor.y, xor.x); + v24 = v_2425.x; + v25 = v_2425.y; // c = c + d @@ -5305,9 +5312,10 @@ fn main(@builtin(global_invocation_id) id: vec3) { // d = rotr64(d ^ a, 32) v24 = v_2425.x; v25 = v_2425.y; - v_2425 = v_2425 ^ v_01; - v24 = v_2425.y; - v25 = v_2425.x; + xor = v_2425 ^ v_01; + v_2425 = vec2(xor.y, xor.x); + v24 = v_2425.x; + v25 = v_2425.y; // c = c + d @@ -6093,9 +6101,10 @@ fn main(@builtin(global_invocation_id) id: vec3) { // d = rotr64(d ^ a, 32) v24 = v_2425.x; v25 = v_2425.y; - v_2425 = v_2425 ^ v_01; - v24 = v_2425.y; - v25 = v_2425.x; + xor = v_2425 ^ v_01; + v_2425 = vec2(xor.y, xor.x); + v24 = v_2425.x; + v25 = v_2425.y; // c = c + d @@ -6881,9 +6890,10 @@ fn main(@builtin(global_invocation_id) id: vec3) { // d = rotr64(d ^ a, 32) v24 = v_2425.x; v25 = v_2425.y; - v_2425 = v_2425 ^ v_01; - v24 = v_2425.y; - v25 = v_2425.x; + xor = v_2425 ^ v_01; + v_2425 = vec2(xor.y, xor.x); + v24 = v_2425.x; + v25 = v_2425.y; // c = c + d @@ -7671,9 +7681,10 @@ fn main(@builtin(global_invocation_id) id: vec3) { // d = rotr64(d ^ a, 32) v24 = v_2425.x; v25 = v_2425.y; - v_2425 = v_2425 ^ v_01; - v24 = v_2425.y; - v25 = v_2425.x; + xor = v_2425 ^ v_01; + v_2425 = vec2(xor.y, xor.x); + v24 = v_2425.x; + v25 = v_2425.y; // c = c + d @@ -8455,9 +8466,10 @@ fn main(@builtin(global_invocation_id) id: vec3) { // d = rotr64(d ^ a, 32) v24 = v_2425.x; v25 = v_2425.y; - v_2425 = v_2425 ^ v_01; - v24 = v_2425.y; - v25 = v_2425.x; + xor = v_2425 ^ v_01; + v_2425 = vec2(xor.y, xor.x); + v24 = v_2425.x; + v25 = v_2425.y; // c = c + d -- 2.34.1