From: Chris Duncan Date: Tue, 14 Jan 2025 19:30:29 +0000 (-0800) Subject: Replace more 24-bit scalar rotations with vector rotations. X-Git-Url: https://zoso.dev/?a=commitdiff_plain;h=2346f32707372dbc597f8ad1f4938a35d05f3f08;p=nano-pow.git Replace more 24-bit scalar rotations with vector rotations. --- diff --git a/src/shaders/compute.wgsl b/src/shaders/compute.wgsl index a8e99a9..2267891 100644 --- a/src/shaders/compute.wgsl +++ b/src/shaders/compute.wgsl @@ -855,10 +855,14 @@ fn main(@builtin(global_invocation_id) id: vec3) { v29 = v_2829.y; // b = rotr64(b ^ c, 24) - xor0 = v8 ^ v18; - xor1 = v9 ^ v19; - v8 = (xor0 >> 24u) ^ (xor1 << 8u); - v9 = (xor1 >> 24u) ^ (xor0 << 8u); + v_1819.x = v18; + v_1819.y = v19; + v_89.x = v8; + v_89.y = v9; + xor = v_89 ^ v_1819; + v_89 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); + v8 = v_89.x; + v9 = v_89.y; // a = a + b v_67.x = v6; @@ -1634,10 +1638,14 @@ fn main(@builtin(global_invocation_id) id: vec3) { v29 = v_2829.y; // b = rotr64(b ^ c, 24) - xor0 = v8 ^ v18; - xor1 = v9 ^ v19; - v8 = (xor0 >> 24u) ^ (xor1 << 8u); - v9 = (xor1 >> 24u) ^ (xor0 << 8u); + v_1819.x = v18; + v_1819.y = v19; + v_89.x = v8; + v_89.y = v9; + xor = v_89 ^ v_1819; + v_89 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); + v8 = v_89.x; + v9 = v_89.y; // a = a + b v_67.x = v6; @@ -2401,10 +2409,14 @@ fn main(@builtin(global_invocation_id) id: vec3) { v29 = v_2829.y; // b = rotr64(b ^ c, 24) - xor0 = v8 ^ v18; - xor1 = v9 ^ v19; - v8 = (xor0 >> 24u) ^ (xor1 << 8u); - v9 = (xor1 >> 24u) ^ (xor0 << 8u); + v_1819.x = v18; + v_1819.y = v19; + v_89.x = v8; + v_89.y = v9; + xor = v_89 ^ v_1819; + v_89 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); + v8 = v_89.x; + v9 = v_89.y; // a = a + b v_67.x = v6; @@ -3174,10 +3186,14 @@ fn main(@builtin(global_invocation_id) id: vec3) { v29 = v_2829.y; // b = rotr64(b ^ c, 24) - xor0 = v8 ^ v18; - xor1 = v9 ^ v19; - v8 = (xor0 >> 24u) ^ (xor1 << 8u); - v9 = (xor1 >> 24u) ^ (xor0 << 8u); + v_1819.x = v18; + v_1819.y = v19; + v_89.x = v8; + v_89.y = v9; + xor = v_89 ^ v_1819; + v_89 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); + v8 = v_89.x; + v9 = v_89.y; // a = a + b v_67.x = v6; @@ -3959,10 +3975,14 @@ fn main(@builtin(global_invocation_id) id: vec3) { v29 = v_2829.y; // b = rotr64(b ^ c, 24) - xor0 = v8 ^ v18; - xor1 = v9 ^ v19; - v8 = (xor0 >> 24u) ^ (xor1 << 8u); - v9 = (xor1 >> 24u) ^ (xor0 << 8u); + v_1819.x = v18; + v_1819.y = v19; + v_89.x = v8; + v_89.y = v9; + xor = v_89 ^ v_1819; + v_89 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); + v8 = v_89.x; + v9 = v_89.y; // a = a + b v_67.x = v6; @@ -4744,10 +4764,14 @@ fn main(@builtin(global_invocation_id) id: vec3) { v29 = v_2829.y; // b = rotr64(b ^ c, 24) - xor0 = v8 ^ v18; - xor1 = v9 ^ v19; - v8 = (xor0 >> 24u) ^ (xor1 << 8u); - v9 = (xor1 >> 24u) ^ (xor0 << 8u); + v_1819.x = v18; + v_1819.y = v19; + v_89.x = v8; + v_89.y = v9; + xor = v_89 ^ v_1819; + v_89 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); + v8 = v_89.x; + v9 = v_89.y; // a = a + b v_67.x = v6; @@ -5520,10 +5544,14 @@ fn main(@builtin(global_invocation_id) id: vec3) { v29 = v_2829.y; // b = rotr64(b ^ c, 24) - xor0 = v8 ^ v18; - xor1 = v9 ^ v19; - v8 = (xor0 >> 24u) ^ (xor1 << 8u); - v9 = (xor1 >> 24u) ^ (xor0 << 8u); + v_1819.x = v18; + v_1819.y = v19; + v_89.x = v8; + v_89.y = v9; + xor = v_89 ^ v_1819; + v_89 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); + v8 = v_89.x; + v9 = v_89.y; // a = a + b v_67.x = v6; @@ -6296,10 +6324,14 @@ fn main(@builtin(global_invocation_id) id: vec3) { v29 = v_2829.y; // b = rotr64(b ^ c, 24) - xor0 = v8 ^ v18; - xor1 = v9 ^ v19; - v8 = (xor0 >> 24u) ^ (xor1 << 8u); - v9 = (xor1 >> 24u) ^ (xor0 << 8u); + v_1819.x = v18; + v_1819.y = v19; + v_89.x = v8; + v_89.y = v9; + xor = v_89 ^ v_1819; + v_89 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); + v8 = v_89.x; + v9 = v_89.y; // a = a + b v_67.x = v6; @@ -7072,10 +7104,14 @@ fn main(@builtin(global_invocation_id) id: vec3) { v29 = v_2829.y; // b = rotr64(b ^ c, 24) - xor0 = v8 ^ v18; - xor1 = v9 ^ v19; - v8 = (xor0 >> 24u) ^ (xor1 << 8u); - v9 = (xor1 >> 24u) ^ (xor0 << 8u); + v_1819.x = v18; + v_1819.y = v19; + v_89.x = v8; + v_89.y = v9; + xor = v_89 ^ v_1819; + v_89 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); + v8 = v_89.x; + v9 = v_89.y; // a = a + b v_67.x = v6; @@ -7851,10 +7887,14 @@ fn main(@builtin(global_invocation_id) id: vec3) { v29 = v_2829.y; // b = rotr64(b ^ c, 24) - xor0 = v8 ^ v18; - xor1 = v9 ^ v19; - v8 = (xor0 >> 24u) ^ (xor1 << 8u); - v9 = (xor1 >> 24u) ^ (xor0 << 8u); + v_1819.x = v18; + v_1819.y = v19; + v_89.x = v8; + v_89.y = v9; + xor = v_89 ^ v_1819; + v_89 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); + v8 = v_89.x; + v9 = v_89.y; // a = a + b v_67.x = v6; @@ -8633,10 +8673,14 @@ fn main(@builtin(global_invocation_id) id: vec3) { v29 = v_2829.y; // b = rotr64(b ^ c, 24) - xor0 = v8 ^ v18; - xor1 = v9 ^ v19; - v8 = (xor0 >> 24u) ^ (xor1 << 8u); - v9 = (xor1 >> 24u) ^ (xor0 << 8u); + v_1819.x = v18; + v_1819.y = v19; + v_89.x = v8; + v_89.y = v9; + xor = v_89 ^ v_1819; + v_89 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); + v8 = v_89.x; + v9 = v_89.y; // a = a + b v_67.x = v6; @@ -9412,10 +9456,14 @@ fn main(@builtin(global_invocation_id) id: vec3) { v29 = v_2829.y; // b = rotr64(b ^ c, 24) - xor0 = v8 ^ v18; - xor1 = v9 ^ v19; - v8 = (xor0 >> 24u) ^ (xor1 << 8u); - v9 = (xor1 >> 24u) ^ (xor0 << 8u); + v_1819.x = v18; + v_1819.y = v19; + v_89.x = v8; + v_89.y = v9; + xor = v_89 ^ v_1819; + v_89 = vec2((xor.x >> 24u) | (xor.y << 8u), (xor.y >> 24u) | (xor.x << 8u)); + v8 = v_89.x; + v9 = v_89.y; // a = a + b v_67.x = v6;