From: Chris Duncan Date: Tue, 14 Jan 2025 18:31:48 +0000 (-0800) Subject: Replace more 32-bit scalar rotations with vector rotations. X-Git-Url: https://zoso.dev/?a=commitdiff_plain;h=b03be9a1ab8c9dfca564a3540f74ae13a16750e9;p=nano-pow.git Replace more 32-bit scalar rotations with vector rotations. --- diff --git a/src/shaders/compute.wgsl b/src/shaders/compute.wgsl index d202d42..e025ff9 100644 --- a/src/shaders/compute.wgsl +++ b/src/shaders/compute.wgsl @@ -236,10 +236,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { v3 = o1; // d = rotr64(d ^ a, 32) - xor0 = v26 ^ v2; - xor1 = v27 ^ v3; - v26 = xor1; - v27 = xor0; + v_23.x = v2; + v_23.y = v3; + v_2627.x = v26; + v_2627.y = v27; + v_2627 = v_2627 ^ v_23; + v26 = v_2627.y; + v27 = v_2627.x; // c = c + d v_1819.x = v18; @@ -964,10 +967,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { v3 = o1; // d = rotr64(d ^ a, 32) - xor0 = v26 ^ v2; - xor1 = v27 ^ v3; - v26 = xor1; - v27 = xor0; + v_23.x = v2; + v_23.y = v3; + v_2627.x = v26; + v_2627.y = v27; + v_2627 = v_2627 ^ v_23; + v26 = v_2627.y; + v27 = v_2627.x; // c = c + d v_1819.x = v18; @@ -1698,10 +1704,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v3 = o1; // d = rotr64(d ^ a, 32) - xor0 = v26 ^ v2; - xor1 = v27 ^ v3; - v26 = xor1; - v27 = xor0; + v_23.x = v2; + v_23.y = v3; + v_2627.x = v26; + v_2627.y = v27; + v_2627 = v_2627 ^ v_23; + v26 = v_2627.y; + v27 = v_2627.x; // c = c + d v_1819.x = v18; @@ -2417,10 +2426,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { v3 = o1; // d = rotr64(d ^ a, 32) - xor0 = v26 ^ v2; - xor1 = v27 ^ v3; - v26 = xor1; - v27 = xor0; + v_23.x = v2; + v_23.y = v3; + v_2627.x = v26; + v_2627.y = v27; + v_2627 = v_2627 ^ v_23; + v26 = v_2627.y; + v27 = v_2627.x; // c = c + d v_1819.x = v18; @@ -3157,10 +3169,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v3 = o1; // d = rotr64(d ^ a, 32) - xor0 = v26 ^ v2; - xor1 = v27 ^ v3; - v26 = xor1; - v27 = xor0; + v_23.x = v2; + v_23.y = v3; + v_2627.x = v26; + v_2627.y = v27; + v_2627 = v_2627 ^ v_23; + v26 = v_2627.y; + v27 = v_2627.x; // c = c + d v_1819.x = v18; @@ -3897,10 +3912,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v3 = o1; // d = rotr64(d ^ a, 32) - xor0 = v26 ^ v2; - xor1 = v27 ^ v3; - v26 = xor1; - v27 = xor0; + v_23.x = v2; + v_23.y = v3; + v_2627.x = v26; + v_2627.y = v27; + v_2627 = v_2627 ^ v_23; + v26 = v_2627.y; + v27 = v_2627.x; // c = c + d v_1819.x = v18; @@ -4628,10 +4646,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { v3 = o1; // d = rotr64(d ^ a, 32) - xor0 = v26 ^ v2; - xor1 = v27 ^ v3; - v26 = xor1; - v27 = xor0; + v_23.x = v2; + v_23.y = v3; + v_2627.x = v26; + v_2627.y = v27; + v_2627 = v_2627 ^ v_23; + v26 = v_2627.y; + v27 = v_2627.x; // c = c + d v_1819.x = v18; @@ -5362,10 +5383,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v3 = o1; // d = rotr64(d ^ a, 32) - xor0 = v26 ^ v2; - xor1 = v27 ^ v3; - v26 = xor1; - v27 = xor0; + v_23.x = v2; + v_23.y = v3; + v_2627.x = v26; + v_2627.y = v27; + v_2627 = v_2627 ^ v_23; + v26 = v_2627.y; + v27 = v_2627.x; // c = c + d v_1819.x = v18; @@ -6093,10 +6117,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v3 = o1; // d = rotr64(d ^ a, 32) - xor0 = v26 ^ v2; - xor1 = v27 ^ v3; - v26 = xor1; - v27 = xor0; + v_23.x = v2; + v_23.y = v3; + v_2627.x = v26; + v_2627.y = v27; + v_2627 = v_2627 ^ v_23; + v26 = v_2627.y; + v27 = v_2627.x; // c = c + d v_1819.x = v18; @@ -6830,10 +6857,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // v3 = o1; // d = rotr64(d ^ a, 32) - xor0 = v26 ^ v2; - xor1 = v27 ^ v3; - v26 = xor1; - v27 = xor0; + v_23.x = v2; + v_23.y = v3; + v_2627.x = v26; + v_2627.y = v27; + v_2627 = v_2627 ^ v_23; + v26 = v_2627.y; + v27 = v_2627.x; // c = c + d v_1819.x = v18; @@ -7564,10 +7594,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { v3 = o1; // d = rotr64(d ^ a, 32) - xor0 = v26 ^ v2; - xor1 = v27 ^ v3; - v26 = xor1; - v27 = xor0; + v_23.x = v2; + v_23.y = v3; + v_2627.x = v26; + v_2627.y = v27; + v_2627 = v_2627 ^ v_23; + v26 = v_2627.y; + v27 = v_2627.x; // c = c + d v_1819.x = v18; @@ -8292,10 +8325,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { v3 = o1; // d = rotr64(d ^ a, 32) - xor0 = v26 ^ v2; - xor1 = v27 ^ v3; - v26 = xor1; - v27 = xor0; + v_23.x = v2; + v_23.y = v3; + v_2627.x = v26; + v_2627.y = v27; + v_2627 = v_2627 ^ v_23; + v26 = v_2627.y; + v27 = v_2627.x; // c = c + d v_1819.x = v18;