+++ /dev/null
-<!--
-SPDX-FileCopyrightText: 2025 Chris Duncan <chris@zoso.dev>
-SPDX-License-Identifier: GPL-3.0-or-later
--->
-
-2025-02-02 (desktop, temp vars, vec2 operations, vec2 type cast for carries, zero threshold, no early return)
-{
- "NanoPow (WebGPU) | Effort: 8 | Dispatch: 4194304 | Threads: 268435456": {
- "count": 1024,
- "total": 154634.60000056028,
- "rate": 6.62815637585526,
- "min": 148.30000001192093,
- "max": 364.80000001192093,
- "median": 150.90000000596046,
- "arithmetic": 151.01035156304715,
- "truncated": 150.8715158928286,
- "harmonic": 150.88339631489575,
- "geometric": 150.9293154354564
- }
-}
-
-2025-02-02 (desktop, vec2 operations, temp vars, select for carries, zero threshold, no early return)
-{
- "NanoPow (WebGPU) | Effort: 8 | Dispatch: 4194304 | Threads: 268435456": {
- "count": 1024,
- "total": 155016.60000002384,
- "rate": 6.623733858698416,
- "min": 148.2000000178814,
- "max": 405.2000000178814,
- "median": 150.90000000596046,
- "arithmetic": 151.38339843752328,
- "truncated": 150.9722493887312,
- "harmonic": 151.2095653216647,
- "geometric": 151.27158853045546
- }
-}
-
-2025-02-02 (desktop, temp vars, vec4 operations, u32 type cast for carries, zero threshold, no early return)
-{
- "NanoPow (WebGPU) | Effort: 8 | Dispatch: 4194304 | Threads: 268435456": {
- "count": 1024,
- "total": 154134.300001204,
- "rate": 6.652845880818371,
- "min": 149.09999999403954,
- "max": 332.90000000596046,
- "median": 150.19999998807907,
- "arithmetic": 150.5217773449258,
- "truncated": 150.31161369350545,
- "harmonic": 150.42122356282434,
- "geometric": 150.45883671227364
- }
-}
-
-2025-02-02 (desktop, compress-inline, no temp vars, vec4 operations, vec2 select for carries, zero threshold, no early return)
-{
- "NanoPow (WebGPU) | Effort: 8 | Dispatch: 4194304 | Threads: 268435456": {
- "count": 1024,
- "total": 153803.49999952316,
- "rate": 6.670591823407945,
- "min": 149.19999998807907,
- "max": 398.59999999403954,
- "median": 149.90000000596046,
- "arithmetic": 150.19873046828434,
- "truncated": 149.91173594086123,
- "harmonic": 150.04617979659852,
- "geometric": 150.09857450851368
- }
-}
-
-2025-02-02 (desktop, compress-inline, no temp vars, vec4 operations, vec4 type cast for carries, zero threshold, no early return)
-{
- "NanoPow (WebGPU) | Effort: 8 | Dispatch: 4194304 | Threads: 268435456": {
- "count": 1024,
- "total": 153660.0999993682,
- "rate": 6.671968353041561,
- "min": 148.59999999403954,
- "max": 299.69999998807907,
- "median": 149.89999997615814,
- "arithmetic": 150.058691405633,
- "truncated": 149.88080684527355,
- "harmonic": 149.98481082698984,
- "geometric": 150.01345917720215
- }
-}
-
-2025-02-02 (desktop, compress-inline, no temp vars, vec4 operations, u32 type cast for carries, zero threshold, no early return)
-{
- "NanoPow (WebGPU) | Effort: 8 | Dispatch: 4194304 | Threads: 268435456": {
- "count": 1024,
- "total": 153737.2999996543,
- "rate": 6.672381966975314,
- "min": 148.10000002384186,
- "max": 399.90000000596046,
- "median": 149.80000001192093,
- "arithmetic": 150.1340820309124,
- "truncated": 149.8715158918449,
- "harmonic": 149.98033249642677,
- "geometric": 150.0330563607954
- }
-}
-
-2025-02-02 (desktop, compress, temp vars, vec4 operations, vec4 type cast for carries, zero threshold, no early return)
-{
- "NanoPow (WebGPU) | Effort: 8 | Dispatch: 4194304 | Threads: 268435456": {
- "count": 1024,
- "total": 153746.30000066757,
- "rate": 6.672626894103991,
- "min": 147.19999998807907,
- "max": 413,
- "median": 149.80000001192093,
- "arithmetic": 150.14287109440193,
- "truncated": 149.8660146701161,
- "harmonic": 149.97814196658746,
- "geometric": 150.03380952128336
- }
-}
-
-2025-02-02 (desktop, main, vec2 operations, no temp vars, u32 type cast for carries, zero threshold, no early return)
-{
- "NanoPow (WebGPU) | Effort: 8 | Dispatch: 4194304 | Threads: 268435456": {
- "count": 1024,
- "total": 153949.80000004172,
- "rate": 6.663815400905096,
- "min": 149.2999999821186,
- "max": 334.40000000596046,
- "median": 149.90000000596046,
- "arithmetic": 150.34160156254075,
- "truncated": 150.06418092916823,
- "harmonic": 150.2393007860643,
- "geometric": 150.2775205781935
- }
-}
-
-
-
------
-
-
-
-2025-02-01 (laptop, main, vec2 operations, no temp vars, u32 type cast for carries, zero threshold, no early return)
-{
- "NanoPow (WebGPU) | Effort: 8 | Dispatch: 4194304 | Threads: 268435456": {
- "count": 128,
- "total": 215110.39999985695,
- "rate": 0.5826656955575709,
- "min": 1231.8999999761581,
- "max": 1940.1000000238419,
- "median": 1731.3999999761581,
- "arithmetic": 1680.5499999988824,
- "truncated": 1716.2499999988313,
- "harmonic": 1658.1870357863268,
- "geometric": 1670.1939377722429
- }
-}
-
-2025-02-01 (desktop, main, vec2 operations, u32 type cast for carries, zero threshold, no early return)
-{
- "NanoPow (WebGPU) | Effort: 8 | Dispatch: 4194304 | Threads: 268435456": {
- "count": 4096,
- "total": 619720.5000002831,
- "rate": 6.6117984273376305,
- "min": 148,
- "max": 363.09999999403954,
- "median": 151.20000000298023,
- "arithmetic": 151.29895019538162,
- "truncated": 151.24478021975474,
- "harmonic": 151.2643921172073,
- "geometric": 151.27733674927475
- }
-}
-
-2025-02-01 (laptop, main, u32 type cast for carries, zero threshold, no early return)
-{
- "NanoPow (WebGPU) | Effort: 8 | Dispatch: 4194304 | Threads: 268435456": {
- "count": 128,
- "total": 223308.59999984503,
- "rate": 0.55975739455978,
- "min": 1235.5999999642372,
- "max": 2072.899999976158,
- "median": 1810.2000000476837,
- "arithmetic": 1744.5984374987893,
- "truncated": 1786.4882352942345,
- "harmonic": 1717.3672232961123,
- "geometric": 1732.0991790493958
- }
-}
-
-
-
-main
-
-2025-01-31
-{
- "NanoPow (WebGPU) | Effort: 16 | Dispatch: 16777216 | Threads: 1073741824": {
- "count": 4096,
- "total": 1068449.1999994218,
- "rate": 3.8973748396630357,
- "min": 12.5,
- "max": 2738.4000000059605,
- "median": 315.40000000596046,
- "arithmetic": 260.85185546860885,
- "truncated": 256.5829670328706,
- "harmonic": 98.0229216514215,
- "geometric": 168.12396106469913
- }
-}
-{
- "NanoPow (WebGPU) | Effort: 8 | Dispatch: 4194304 | Threads: 268435456": {
- "count": 4096,
- "total": 1069178.7000000179,
- "rate": 3.909655452661306,
- "min": 3.699999988079071,
- "max": 2211.300000011921,
- "median": 316.59999999403954,
- "arithmetic": 261.02995605469187,
- "truncated": 255.77701465209654,
- "harmonic": 73.26505112089419,
- "geometric": 156.17227103593234
- }
-}
-
-
-
-2025-01-30
-{
- "NanoPow (WebGPU) | Effort: 16 | Dispatch: 16777216 | Threads: 1073741824": {
- "count": 4096,
- "total": 1101947.3999997675,
- "rate": 3.8753692602297347,
- "min": 13.100000023841858,
- "max": 2062.699999988079,
- "median": 319.7000000178814,
- "arithmetic": 269.03012695306825,
- "truncated": 258.03992673996675,
- "harmonic": 99.43298937394798,
- "geometric": 171.24141726167608
- }
-}
-
-{
- "NanoPow (WebGPU) | Effort: 8 | Dispatch: 4194304 | Threads: 268435456": {
- "count": 4096,
- "total": 1090902.8999999762,
- "rate": 3.799836314741894,
- "min": 3.5999999940395355,
- "max": 2885.5,
- "median": 321.40000000596046,
- "arithmetic": 266.3337158203067,
- "truncated": 263.16923076933267,
- "harmonic": 70.99602668653543,
- "geometric": 156.60774106488077
- }
-}
-
-
-
------
-
-
-
-{
- "NanoPow (WebGPU) | Effort: 8 | Dispatch: 4194304 | Threads: 268435456": {
- "count": 512,
- "total": 138372.20000001788,
- "rate": 3.919394953010938,
- "min": 3.7999999970197678,
- "max": 1618.9000000059605,
- "arithmetic": 270.2582031250349,
- "truncated": 255.14142156859825,
- "harmonic": 72.1287653118942,
- "geometric": 157.72414891838542
- }
-}
-
-Zero threshold benchmark (vectors)
-
-16.0569471600238 average on 512 passes 8 effort when logging mapAsync only
-118.135686272967 average on 256 passes 24 effort when logging mapAsync only
-
-{
- "NanoPow (WebGPU) | Effort: 8 | Dispatch: 4194304 | Threads: 268435456": {
- "count": 512,
- "total": 1851.7000002861023,
- "rate": 282.7638780600227,
- "min": 3.300000011920929,
- "max": 8.5,
- "arithmetic": 3.6166015630587935,
- "truncated": 3.5365196108525874,
- "harmonic": 3.5909492641537284,
- "geometric": 3.6023994761114566
- }
-}
-
-{
- "NanoPow (WebGPU) | Effort: 8 | Dispatch: 4194304 | Threads: 268435456": {
- "count": 512,
- "total": 2059.3000002503395,
- "rate": 273.1655059917369,
- "min": 3.199999988079071,
- "max": 13.399999976158142,
- "arithmetic": 4.022070312988944,
- "truncated": 3.6607843159752735,
- "harmonic": 3.791171787620167,
- "geometric": 3.8764125112547805
- }
-}
-
-
-
------
-
-
-
-iPhone
-88.7974 geometric mean on 256 passes 24 effort
-
-{
- "NanoPow (WebGPU) | Effort: 8 | Dispatch: 4194304 | Threads: 268435456": {
- "count": 512,
- "total": 10057.50000011921,
- "rate": 64.04963847019617,
- "min": 15.399999976158142,
- "max": 36.89999997615814,
- "arithmetic": 19.64355468773283,
- "truncated": 15.612890624906868,
- "harmonic": 19.455959597968477,
- "geometric": 19.54743125733686
- }
-}
-
-{
- "NanoPow (WebGPU) | Effort: 8 | Dispatch: 4194304 | Threads: 268435456": {
- "count": 512,
- "total": 10370.999999821186,
- "rate": 62.232594321365944,
- "min": 17,
- "max": 88.5,
- "arithmetic": 20.255859374650754,
- "truncated": 16.068749999976717,
- "harmonic": 20.060722114122232,
- "geometric": 20.134613393434872
- }
-}
-
-{
- "NanoPow (WebGPU) | Effort: 8 | Dispatch: 4194304 | Threads: 268435456": {
- "count": 4096,
- "total": 75452.59999632835,
- "rate": 67.94629986572919,
- "min": 13.400000035762787,
- "max": 37.60000002384186,
- "arithmetic": 18.421044920978602,
- "truncated": 14.717504882180947,
- "harmonic": 18.21353217876272,
- "geometric": 18.31703265536469
- }
-}
-
-
-
------
-
-
-
-Super benchmarks
-
-{
- (16,16) "NanoPow (WebGPU) | Effort: 4 | Dispatch: 1048576 | Threads: 67108864": {
- "count": 4096,
- "total": 1089734.8000002578,
- "rate": 4.829357209724284,
- "min": 5.899999998509884,
- "max": 2568.10000000149,
- "arithmetic": 266.04853515631294,
- "truncated": 207.06689453131003,
- "harmonic": 81.18982598620626,
- "geometric": 161.6413047635922
- }
-}
-
-{
- (8,8) "NanoPow (WebGPU) | Effort: 16 | Dispatch: 16777216 | Threads: 1073741824": {
- "count": 4096,
- "total": 1083644.400000088,
- "rate": 4.819279376542908,
- "min": 20.399999998509884,
- "max": 2163.1999999955297,
- "arithmetic": 264.56162109377146,
- "truncated": 207.49990234377037,
- "harmonic": 112.25554889616,
- "geometric": 175.89628874036063
- }
-}
-
-
-
------
-
-
-
-After fixing test page bug setting Effort incorrectly
-
-16,16
------
-"NanoPow (WebGPU) | Effort: 16 | Dispatch: 16777216 | Threads: 1073741824": {
- "count": 512,
- "total": 164300.40000003576,
- "rate": 3.8249916888598743,
- "min": 77.30000000074506,
- "max": 1980.5,
- "arithmetic": 320.89921875006985,
- "truncated": 261.4384765625655,
- "harmonic": 198.34621659194366,
- "geometric": 246.96739280242824
- }
-
-"NanoPow (WebGPU) | Effort: 8 | Dispatch: 4194304 | Threads: 268435456": {
- "count": 512,
- "total": 133643.70000002906,
- "rate": 5.0202476786249735,
- "min": 20.699999999254942,
- "max": 1318.199999999255,
- "arithmetic": 261.02285156255675,
- "truncated": 199.1933593750291,
- "harmonic": 106.89582610328786,
- "geometric": 171.1679993707545
- }
-
-"NanoPow (WebGPU) | Effort: 4 | Dispatch: 4194304 | Threads: 268435456": {
- "count": 512,
- "total": 130945.59999998286,
- "rate": 4.937419176281642,
- "min": 7,
- "max": 1650.7999999970198,
- "arithmetic": 255.75312499996653,
- "truncated": 202.53496093744616,
- "harmonic": 78.50458783656828,
- "geometric": 155.70211798018283
- }
-
-
-8,8
------
-"NanoPow (WebGPU) | Effort: 32 | Dispatch: 67108864 | Threads: 4294967296": {
- "count": 512,
- "total": 168521.00000004843,
- "rate": 3.871414593719482,
- "min": 76.30000000074506,
- "max": 2497.300000000745,
- "arithmetic": 329.1425781250946,
- "truncated": 258.3035156250844,
- "harmonic": 209.53630679307048,
- "geometric": 259.087912072887
- }
-
-"NanoPow (WebGPU) | Effort: 16 | Dispatch: 16777216 | Threads: 1073741824": {
- "count": 512,
- "total": 128736.9999999404,
- "rate": 4.969079128705403,
- "min": 20.800000000745058,
- "max": 1565.699999999255,
- "arithmetic": 251.43945312488358,
- "truncated": 201.24453124990396,
- "harmonic": 106.21772967372581,
- "geometric": 167.1924244801166
- }
-
-"NanoPow (WebGPU) | Effort: 8 | Dispatch: 4194304 | Threads: 268435456": {
- "count": 512,
- "total": 133008.6999999974,
- "rate": 4.771005703775073,
- "min": 6.5,
- "max": 1757.5999999996275,
- "arithmetic": 259.7826171874949,
- "truncated": 209.5994140624789,
- "harmonic": 77.38622139525103,
- "geometric": 155.94476005599634
- }
-
-"NanoPow (WebGPU) | Effort: 1 | Dispatch: 65536 | Threads: 4194304": {
- "count": 512,
- "total": 178794.09999999218,
- "rate": 3.724110341315865,
- "min": 2.699999999254942,
- "max": 3011.2999999988824,
- "arithmetic": 349.2072265624847,
- "truncated": 268.5205078125218,
- "harmonic": 79.8080055571974,
- "geometric": 202.6704071324123
- }
-
-
-
------
-
-
-
-NanoPow (WebGPU) 3070 (dispatch 0x2000 workgroup 256)
-{
- "count": 512,
- "total": 159978.0000000596,
- "rate": 3.8420526166080182,
- "min": 81,
- "max": 1383.3999999910593,
- "arithmetic": 312.4570312501164,
- "truncated": 260.277539062663,
- "harmonic": 196.8969789336903,
- "geometric": 244.9313792773437
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x1000 workgroup 256)
-{
- "count": 512,
- "total": 167904.10000032187,
- "rate": 3.772403249271633,
- "min": 77.30000001192093,
- "max": 1771.5,
- "arithmetic": 327.93769531312864,
- "truncated": 265.08300781287835,
- "harmonic": 209.60304208880888,
- "geometric": 258.285687628797
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x800 workgroup 256)
-{
- "count": 512,
- "total": 165119.60000000894,
- "rate": 3.793500222645779,
- "min": 79.90000000596046,
- "max": 2104.0999999940395,
- "arithmetic": 322.49921875001746,
- "truncated": 263.6087890625058,
- "harmonic": 207.56200329474862,
- "geometric": 256.16574636086983
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x400 workgroup 256)
-{
- "count": 512,
- "total": 163759.80000016093,
- "rate": 3.8946941638158328,
- "min": 79.90000000596046,
- "max": 2280,
- "arithmetic": 319.8433593753143,
- "truncated": 256.75957031251164,
- "harmonic": 204.43707515011045,
- "geometric": 252.168610285593
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x2000 workgroup 64)
-{
- "count": 512,
- "total": 150115.60000014305,
- "rate": 4.336664038122414,
- "min": 24.5,
- "max": 1650.6000000089407,
- "arithmetic": 293.1945312502794,
- "truncated": 230.5919921878376,
- "harmonic": 123.67804479239821,
- "geometric": 194.04155076078126
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x1000 workgroup 64)
-{
- "count": 512,
- "total": 138818.7000001073,
- "rate": 4.618541821704324,
- "min": 24.599999994039536,
- "max": 1552.699999988079,
- "arithmetic": 271.13027343770955,
- "truncated": 216.51855468767462,
- "harmonic": 121.22290429039016,
- "geometric": 185.89186266835625
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x800 workgroup 64)
-{
- "count": 512,
- "total": 135636.89999975264,
- "rate": 4.719968508971256,
- "min": 25.900000005960464,
- "max": 1588,
- "arithmetic": 264.9158203120169,
- "truncated": 211.8658203119994,
- "harmonic": 124.27757176664986,
- "geometric": 181.82369853819247
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x400 workgroup 64)
-{
- "count": 512,
- "total": 140658.69999991357,
- "rate": 4.752414960082367,
- "min": 23.700000002980232,
- "max": 1747.2000000029802,
- "arithmetic": 274.7240234373312,
- "truncated": 210.41933593750582,
- "harmonic": 110.33702507461787,
- "geometric": 175.14808439595487
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x2000 workgroup 32)
-{
- "count": 512,
- "total": 141774.90000011027,
- "rate": 4.605960941090885,
- "min": 16.100000008940697,
- "max": 1443.2999999970198,
- "arithmetic": 276.90410156271537,
- "truncated": 217.10996093752328,
- "harmonic": 114.98646010749061,
- "geometric": 184.62279065842682
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x1000 workgroup 32)
-{
- "count": 512,
- "total": 143074.00000026822,
- "rate": 4.8437740946897145,
- "min": 19.399999991059303,
- "max": 1366.8999999910593,
- "arithmetic": 279.44140625052387,
- "truncated": 206.4505859380006,
- "harmonic": 111.60267816366408,
- "geometric": 183.1466099827166
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x800 workgroup 32)
-{
- "count": 512,
- "total": 144550.89999994636,
- "rate": 4.644462748147642,
- "min": 18.5,
- "max": 1449.6000000089407,
- "arithmetic": 282.3259765623952,
- "truncated": 215.31015624979045,
- "harmonic": 114.96709424095182,
- "geometric": 186.24836896995765
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x400 workgroup 32)
-{
- "count": 512,
- "total": 145009.69999994338,
- "rate": 4.465056571918978,
- "min": 14.200000002980232,
- "max": 2710.2999999970198,
- "arithmetic": 283.2220703123894,
- "truncated": 223.9613281249476,
- "harmonic": 105.73248510637895,
- "geometric": 178.59991348235997
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x2000 workgroup 16,16)
-{
- "count": 512,
- "total": 171064.90000003576,
- "rate": 3.6180268186231834,
- "min": 81.3999999910593,
- "max": 1453.3999999910593,
- "arithmetic": 334.11113281256985,
- "truncated": 276.39375000004657,
- "harmonic": 217.83822531129073,
- "geometric": 267.8978578209064
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x1000 workgroup 16,16)
-{
- "count": 512,
- "total": 167172.10000005364,
- "rate": 3.8445942901746104,
- "min": 78.29999999701977,
- "max": 3207.6000000089407,
- "arithmetic": 326.5080078126048,
- "truncated": 260.1054687501455,
- "harmonic": 210.8258370543174,
- "geometric": 257.37154756448814
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x800 workgroup 16,16)
-{
- "count": 512,
- "total": 161936.60000008345,
- "rate": 3.740251239688252,
- "min": 80.6000000089407,
- "max": 1553.8999999910593,
- "arithmetic": 316.282421875163,
- "truncated": 267.3617187500349,
- "harmonic": 204.59200263160372,
- "geometric": 250.96434072996382
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x400 workgroup 16,16)
-{
- "count": 512,
- "total": 158915.09999985993,
- "rate": 3.7711889999175563,
- "min": 81,
- "max": 2144.199999988079,
- "arithmetic": 310.3810546872264,
- "truncated": 265.1683593746857,
- "harmonic": 201.944298796899,
- "geometric": 246.41513206426896
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x2000 workgroup 8,8)
-{
- "count": 512,
- "total": 139888.0000000596,
- "rate": 4.623788851791469,
- "min": 24.100000008940697,
- "max": 1754.5,
- "arithmetic": 273.2187500001164,
- "truncated": 216.27285156253492,
- "harmonic": 124.13430182526332,
- "geometric": 186.66857347407046
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x1000 workgroup 8,8)
-{
- "count": 512,
- "total": 141471.0000000447,
- "rate": 4.8109313380457674,
- "min": 25.799999997019768,
- "max": 1512.4000000059605,
- "arithmetic": 276.3105468750873,
- "truncated": 207.8599609376688,
- "harmonic": 127.00893713470423,
- "geometric": 192.06862657670237
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x800 workgroup 8,8)
-{
- "count": 4096,
- "total": 1061022.09999986,
- "rate": 4.8102132476979635,
- "min": 5.5,
- "max": 2067.7000000029802,
- "arithmetic": 259.0385986327783,
- "truncated": 207.8909912109557,
- "harmonic": 75.27207858671284,
- "geometric": 156.19893668278434
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x800 workgroup 8,8)
-{
- "count": 512,
- "total": 133226.09999994934,
- "rate": 4.905215336077563,
- "min": 24.599999994039536,
- "max": 1432.3999999910593,
- "arithmetic": 260.20722656240105,
- "truncated": 203.86464843756403,
- "harmonic": 116.05589569169062,
- "geometric": 178.05592700404114
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x400 workgroup 8,8)
-{
- "count": 512,
- "total": 146197.10000024736,
- "rate": 4.400326586729279,
- "min": 25.399999991059303,
- "max": 1748.5,
- "arithmetic": 285.5412109379831,
- "truncated": 227.25585937549477,
- "harmonic": 130.29367565620703,
- "geometric": 196.68629173860154
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x2000 workgroup 8,4)
-{
- "count": 512,
- "total": 146798.40000000596,
- "rate": 4.501937943601046,
- "min": 18.099999994039536,
- "max": 1979.1000000089407,
- "arithmetic": 286.71562500001164,
- "truncated": 222.12656249990687,
- "harmonic": 103.21952194085847,
- "geometric": 176.78141792063872
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x1000 workgroup 8,4)
-{
- "count": 512,
- "total": 138210.7000002265,
- "rate": 4.76931689781462,
- "min": 16.799999997019768,
- "max": 1626.0999999940395,
- "arithmetic": 269.9427734379424,
- "truncated": 209.67363281274447,
- "harmonic": 101.34635015953711,
- "geometric": 172.06200959967907
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x800 workgroup 8,4)
-{
- "count": 512,
- "total": 149949.19999992847,
- "rate": 4.415311818463056,
- "min": 16.799999997019768,
- "max": 1790.7999999970198,
- "arithmetic": 292.8695312498603,
- "truncated": 226.48457031243015,
- "harmonic": 112.43872189933657,
- "geometric": 189.39141120585325
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x400 workgroup 8,4)
-{
- "count": 512,
- "total": 147114.90000009537,
- "rate": 4.581196336470157,
- "min": 17.600000008940697,
- "max": 3584.5999999940395,
- "arithmetic": 287.33378906268626,
- "truncated": 218.28359375020955,
- "harmonic": 118.69066922246898,
- "geometric": 188.03357141542313
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x2000 workgroup 4,4)
-{
- "count": 512,
- "total": 275257.4999998361,
- "rate": 2.5033687912081977,
- "min": 16.700000002980232,
- "max": 4439.20000000298,
- "arithmetic": 537.6123046871799,
- "truncated": 399.461718749546,
- "harmonic": 179.34949948078622,
- "geometric": 340.32029726440055
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x1000 workgroup 4,4)
-{
- "count": 512,
- "total": 265632.5000000298,
- "rate": 2.4961071453998693,
- "min": 19,
- "max": 4817.0999999940395,
- "arithmetic": 518.8134765625582,
- "truncated": 400.6238281248952,
- "harmonic": 152.2455042043822,
- "geometric": 300.11657404770995
-}
-
------
-
-
-
-NanoPow (WebGPU) 3070 (dispatch 0x1000 workgroup 256)
-{
- "count": 2048,
- "total": 547002.6999999285,
- "rate": 3.744040020278269,
- "min": 22.599999994039536,
- "max": 2720.5,
- "arithmetic": 267.0911621093401,
- "truncated": 207.97036132813082,
- "harmonic": 126.12851499707206,
- "geometric": 183.20580886813133
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x800 workgroup 256)
-{
- "count": 2048,
- "total": 543507.1000000834,
- "rate": 3.768120048477169,
- "min": 23.400000005960464,
- "max": 2603.699999988079,
- "arithmetic": 265.38432617191575,
- "truncated": 213.63740234381112,
- "harmonic": 130.06039992520343,
- "geometric": 185.4924137470012
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x400 workgroup 256)
-{
- "count": 2048,
- "total": 556310.1000003219,
- "rate": 3.6813999961510944,
- "min": 22.299999982118607,
- "max": 2052.5,
- "arithmetic": 271.63579101578216,
- "truncated": 220.8329589846253,
- "harmonic": 129.12128518320455,
- "geometric": 186.29511532186783
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x1000 workgroup 128)
-{
- "count": 2048,
- "total": 548659.7999989986,
- "rate": 3.7327320135423405,
- "min": 13.699999988079071,
- "max": 2517.0999999940395,
- "arithmetic": 267.90029296826106,
- "truncated": 208.58183593714784,
- "harmonic": 117.53834727081843,
- "geometric": 177.7358800826145
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x800 workgroup 128)
-{
- "count": 2048,
- "total": 566167.6000000024,
- "rate": 3.6173034274656324,
- "min": 15.399999999906868,
- "max": 2000.2999999998137,
- "arithmetic": 276.4490234375012,
- "truncated": 218.4107421874994,
- "harmonic": 122.29973069005368,
- "geometric": 184.8814662429658
-}
-
-
-NanoPow (WebGPU) 3070 (dispatch 0x1000 workgroup 64)
-{
- "count": 2048,
- "total": 539941.3999999762,
- "rate": 3.79300420378969,
- "min": 9.5,
- "max": 2157.300000011921,
- "arithmetic": 263.64326171873836,
- "truncated": 205.91958007811627,
- "harmonic": 106.17587018373969,
- "geometric": 171.19015104475864
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x800 workgroup 64)
-{
- "count": 2048,
- "total": 535917.800000757,
- "rate": 3.821481577953013,
- "min": 10.300000011920929,
- "max": 2099.4000000059605,
- "arithmetic": 261.6786132816196,
- "truncated": 206.90126953163417,
- "harmonic": 108.36494901651302,
- "geometric": 171.00530036092783
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x1000 workgroup 32)
-{
- "count": 2048,
- "total": 582139.9999999758,
- "rate": 3.518054076339171,
- "min": 9.200000000186265,
- "max": 1908.5999999996275,
- "arithmetic": 284.2480468749882,
- "truncated": 218.86826171873827,
- "harmonic": 123.58316256156988,
- "geometric": 190.28283027676605
-}
-
-
-NanoPow (WebGPU) 3070 (dispatch 0x800 workgroup 64)
-{
- "count": 64,
- "total": 14740.50000000745,
- "rate": 4.341779451169747,
- "min": 14.199999999254942,
- "max": 1088.800000000745,
- "arithmetic": 230.32031250011642,
- "truncated": 181.82343750004657,
- "harmonic": 87.12614438828882,
- "geometric": 149.3650172355075
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x800 workgroup 256)
-{
- "count": 64,
- "total": 15690.999999988824,
- "rate": 4.078771270157771,
- "min": 10.400000002235174,
- "max": 989.8000000007451,
- "arithmetic": 245.17187499982538,
- "truncated": 186.7796874998021,
- "harmonic": 76.98798798236564,
- "geometric": 147.89606519234277
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x800 workgroup 256)
-{
- "count": 64,
- "total": 16779.50000002235,
- "rate": 4.4576316046016,
- "min": 24.199999999254942,
- "max": 1552,
- "arithmetic": 262.17968750034925,
- "truncated": 224.33437500032596,
- "harmonic": 108.25402886637463,
- "geometric": 169.6307793361205
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x1000 workgroup 64)
-{
- "count": 64,
- "total": 19302.5,
- "rate": 4.219075495082348,
- "min": 27.699999999254942,
- "max": 1248.5,
- "arithmetic": 301.6015625,
- "truncated": 237.01874999998836,
- "harmonic": 112.89841890437856,
- "geometric": 188.28065698087113
-}
-
-
-
------
-
-
-
-nano-webgl-pow: Time to calculate proof-of-work for a send block 16 times
-Total: 89756 ms
-Average: 5609.75 ms
-Harmonic: 2092.567565254879 ms
-Geometric: 3612.112662613675 ms
-
-NanoPowGl: Time to calculate proof-of-work for a send block 16 times
-Total: 33240 ms
-Average: 2077.5 ms
-Harmonic: 1328.5635414262717 ms
-Geometric: 1663.110986923899 ms
-
-How much faster?
-Total: 56156 ms
-Average: 3532 ms
-Harmonic: 764 ms
-Geometric: 1949 ms
-
-NanoPowGl:
-Total: 22831.300000041723 ms
-Average: 3805.2166666736207 ms
-Harmonic: 928.6432328540742 ms
-Geometric: 2500.810238375608 ms
-Minimum: 193 ms
-Maximum: 8361 ms
-
-NanoPowGl: Time to calculate proof-of-work for a send block 512 times
-Total: 680948 ms
-Average: 1329.9765625 ms
-Harmonic: 749.6552658409396 ms
-
-
-
-CHROMIUM with more accurate timings
-
-NanoPowGpu: Time to calculate proof-of-work for a send block 8192 times
-Total: 2934170.3000008166 ms
-Average: 358.17508544931843 ms
-Harmonic: 218.11823673331645 ms
-Minimum: 76.2000000178814 ms
-Maximum: 2999.9000000059605 ms
-
-
-
-NanoPowGpu: Time to calculate proof-of-work for a send block 512 times
-Total: 187428.40000000596 ms
-Average: 366.07109375001164 ms
-Harmonic: 220.70399520519166 ms
-
-NanoPowGpu: Time to calculate proof-of-work for a send block 512 times
-Total: 187827.7999998629 ms
-Average: 366.85117187473224 ms
-Harmonic: 223.9897252426498 ms
-
-NanoPowGpu: Time to calculate proof-of-work for a send block 512 times
-(after inlining entire first G round)
-Total: 156981.3999993205 ms
-Average: 306.60429687367287 ms
-Harmonic: 128.74904701127866 ms
-Minimum: 21.700000047683716 ms
-Maximum: 1981.199999988079 ms
-
-NanoPowGpu: Time to calculate proof-of-work for a send block 512 times
-(after inlining entire first G round)
-Total: 162225.30000036955 ms
-Average: 316.8462890632218 ms
-Harmonic: 130.50451885939313 ms
-Geometric: 211.25671228925867 ms
-Minimum: 21.600000023841858 ms
-Maximum: 2267.600000023842 ms
-
-NanoPowGpu: Time to calculate proof-of-work for a send block 512 times
-(after inlining 3 rounds of G mixing)
-Total: 155547.09999996424 ms
-Average: 303.80292968743015 ms
-Harmonic: 118.19131857240315 ms
-Geometric: 196.77234360098842 ms
-Minimum: 19.5 ms
-Maximum: 2140.2000000476837 ms
-
-NanoPowGpu: Time to calculate proof-of-work for a send block 512 times
-(after inlining 5 rounds of G mixing)
-Total: 165145.19999998808 ms
-Average: 322.5492187499767 ms
-Harmonic: 126.40822610460997 ms
-Geometric: 205.28427810986508 ms
-Minimum: 20.099999964237213 ms
-Maximum: 1850.5 ms
-
-NanoPowGpu: Time to calculate proof-of-work for a send block 512 times
-(after inlining 5 rounds of G mixing and replacing if with select in original G function)
-Total: 135665.40000021458 ms
-Average: 264.9714843754191 ms
-Harmonic: 118.80915172412905 ms
-Geometric: 181.19191881133972 ms
-Minimum: 19.599999964237213 ms
-Maximum: 1908.5 ms
-
-NanoPowGpu: Time to calculate proof-of-work for a send block 512 times
-(after inlining 9 rounds of G mixing and replacing if with select in original G function)
-Total: 147481.09999907017 ms
-Average: 288.0490234356839 ms
-Harmonic: 122.25335314983455 ms
-Geometric: 192.75325397221323 ms
-Minimum: 22.19999998807907 ms
-Maximum: 1762.800000011921 ms
-
-NanoPowGpu: Time to calculate proof-of-work for a send block 512 times
-(after inlining all rounds of G mixing)
-Total: 165041.20000058413 ms
-Average: 322.34609375114087 ms
-Harmonic: 121.30652845909019 ms
-Geometric: 202.80092012876665 ms
-Minimum: 21.69999998807907 ms
-Maximum: 2303 ms
-
-NanoPowGpu: Time to calculate proof-of-work for a send block 512 times
-(after inlining all rounds of G mixing and all if statements replaced with select function)
-Total: 134865.20000064373 ms
-Average: 263.4085937512573 ms
-Harmonic: 108.28019222737286 ms
-Geometric: 171.8797089689105 ms
-Minimum: 20.80000001192093 ms
-Maximum: 2093.199999988079 ms
-
-NanoPow (WebGPU) 0xff
-{
- "count": 512,
- "total": 149335.80000003055,
- "min": 9.400000000372529,
- "max": 1503.300000000745,
- "arithmetic": 291.67148437505966,
- "truncated": 222.58417968753201,
- "harmonic": 106.71381226989509,
- "geometric": 186.92638314142255
-}
-
-NanoPow (WebGPU) 0xfff
-{
- "count": 512,
- "total": 164261.39999999292,
- "min": 79.5,
- "max": 1424.7000000011176,
- "arithmetic": 320.8230468749862,
- "truncated": 263.8744140625058,
- "harmonic": 209.95457211379528,
- "geometric": 256.8968599479061
-}
-
-NanoPow (WebGPU) 0x800
-{
- "count": 512,
- "total": 125924.59999999404,
- "min": 23,
- "max": 1799.1000000014901,
- "arithmetic": 245.94648437498836,
- "truncated": 198.84531250000146,
- "harmonic": 115.44432001873471,
- "geometric": 171.54249948295475
-}
-
-NanoPow (WebGPU) 0x400
-{
- "count": 512,
- "total": 132129.60000000335,
- "min": 11.799999998882413,
- "max": 2051.9000000003725,
- "arithmetic": 258.06562500000655,
- "truncated": 201.65429687500364,
- "harmonic": 86.37881890351905,
- "geometric": 156.54611901649818
-}
-
-NanoPow (WebGPU) 0x999
-{
- "count": 512,
- "total": 132693.0000000093,
- "min": 32.30000000074506,
- "max": 2258.800000000745,
- "arithmetic": 259.1660156250182,
- "truncated": 208.9763671874971,
- "harmonic": 133.4766737582568,
- "geometric": 185.94074203825846
-}
-
-NanoPow (WebGPU) 0x400
-{
- "count": 512,
- "total": 136912.30000001006,
- "min": 8.900000000372529,
- "max": 1369.9000000003725,
- "arithmetic": 267.40683593751965,
- "truncated": 196.9111328124891,
- "harmonic": 96.43707569252571,
- "geometric": 166.5867151432514
-}
-
-NanoPow (WebGPU) 3070 (0x400)
-{
- "count": 512,
- "total": 138190.59999987483,
- "min": 9.799999997019768,
- "max": 1512.5999999940395,
- "arithmetic": 269.9035156247555,
- "truncated": 201.92070312495343,
- "harmonic": 90.32084803883801,
- "geometric": 166.60778722043577
-}
-
-NanoPow (WebGPU) 3070 (0x400)
-{
- "count": 512,
- "total": 139817.9999998659,
- "rate": 3.6619033314772853,
- "min": 11.599999994039536,
- "max": 1748.4000000059605,
- "arithmetic": 273.08203124973807,
- "truncated": 202.18183593745925,
- "harmonic": 92.89356260079113,
- "geometric": 168.54025739379932
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x400 workgroup 256)
-(after converting `v` scalars to vectors)
-{
- "count": 512,
- "total": 136717.80000030994,
- "rate": 3.74494030769102,
- "min": 11.700000002980232,
- "max": 2057.2999999970198,
- "arithmetic": 267.02695312560536,
- "truncated": 213.24863281298894,
- "harmonic": 94.06525233622521,
- "geometric": 162.9622640163259
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x800 workgroup 64)
-{
- "count": 512,
- "total": 127778.10000002384,
- "rate": 4.006946417264809,
- "min": 15,
- "max": 1386.9000000059605,
- "arithmetic": 249.56660156254657,
- "truncated": 176.96152343729045,
- "harmonic": 88.48986919127314,
- "geometric": 152.85173378506695
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x800 workgroup 64)
-{
- "count": 512,
- "total": 126437.00000026822,
- "rate": 4.04944755094564,
- "min": 8.299999982118607,
- "max": 1592.5999999940395,
- "arithmetic": 246.94726562552387,
- "truncated": 197.15312500030268,
- "harmonic": 83.44136709616109,
- "geometric": 151.69548923418716
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x1000 workgroup 32)
-{
- "count": 512,
- "total": 130610.90000012517,
- "rate": 3.920040364161868,
- "min": 16.899999976158142,
- "max": 1475.5,
- "arithmetic": 255.09941406274447,
- "truncated": 201.2101562502212,
- "harmonic": 108.50998153048026,
- "geometric": 169.9536026991119
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x800 workgroup 64)
-{
- "count": 512,
- "total": 133471.2999998033,
- "rate": 3.836030667272699,
- "min": 9.400000005960464,
- "max": 1703.5,
- "arithmetic": 260.68613281211583,
- "truncated": 205.00488281232538,
- "harmonic": 88.65601312209833,
- "geometric": 160.81454554342605
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x800 workgroup 64) debug mode
-{
- "count": 512,
- "total": 138230.30000001192,
- "rate": 3.70396360276984,
- "min": 11.199999988079071,
- "max": 1709.9000000059605,
- "arithmetic": 269.9810546875233,
- "truncated": 223.11933593766298,
- "harmonic": 86.31741549131624,
- "geometric": 164.2553862861393
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x800 workgroup 64) debug mode
-{
- "count": 512,
- "total": 137174.0999996364,
- "rate": 3.732483027053628,
- "min": 9.300000011920929,
- "max": 1598.0999999940395,
- "arithmetic": 267.91816406178987,
- "truncated": 189.36367187427823,
- "harmonic": 88.46031367791892,
- "geometric": 157.678025720888
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x800 workgroup 64) debug mode
-{
- "count": 512,
- "total": 140824.9999999702,
- "rate": 3.63571808982857,
- "min": 12.100000023841858,
- "max": 1640.300000011921,
- "arithmetic": 275.0488281249418,
- "truncated": 209.06249999988358,
- "harmonic": 97.42050559825007,
- "geometric": 174.1405646344008
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x800 workgroup 256) debug mode
-{
- "count": 512,
- "total": 134499.49999976158,
- "rate": 3.806705601142812,
- "min": 24.200000017881393,
- "max": 1324.5999999940395,
- "arithmetic": 262.69433593703434,
- "truncated": 206.15488281205762,
- "harmonic": 117.14110375247057,
- "geometric": 178.66910197648477
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x800 workgroup 64)
-{
- "count": 512,
- "total": 135615.80000019073,
- "rate": 3.7753713062879095,
- "min": 11.300000011920929,
- "max": 1967.5,
- "arithmetic": 264.87460937537253,
- "truncated": 214.91972656268626,
- "harmonic": 95.29029745129836,
- "geometric": 166.91867186739316
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x800 workgroup 64)
-{
- "count": 512,
- "total": 129097.29999995232,
- "rate": 3.9660008381289855,
- "min": 9.099999994039536,
- "max": 1335.9000000059605,
- "arithmetic": 252.14316406240687,
- "truncated": 200.86796874983702,
- "harmonic": 87.70052168819247,
- "geometric": 159.12504898549835
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x800 workgroup 64)
-{
- "count": 512,
- "total": 129204.69999992847,
- "rate": 3.9627041431177306,
- "min": 14.900000005960464,
- "max": 1968.5,
- "arithmetic": 252.3529296873603,
- "truncated": 205.91660156229045,
- "harmonic": 97.46470054285273,
- "geometric": 162.08582278945588
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x800 workgroup 64)
-{
- "count": 512,
- "total": 140579.30000019073,
- "rate": 3.6420724815054943,
- "min": 11.200000017881393,
- "max": 1399.0999999940395,
- "arithmetic": 274.56894531287253,
- "truncated": 213.73515625024447,
- "harmonic": 104.79799509895179,
- "geometric": 179.31860807870936
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x800 workgroup 256)
-{
- "count": 512,
- "total": 138712.99999949336,
- "rate": 3.691074376603996,
- "min": 24.799999982118607,
- "max": 1613.0999999940395,
- "arithmetic": 270.92382812401047,
- "truncated": 207.68144531175494,
- "harmonic": 117.38847021261739,
- "geometric": 179.3126486381509
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x800 workgroup 64)
-{
- "count": 64,
- "total": 13121.699999928474,
- "rate": 4.877416798154878,
- "min": 9.700000017881393,
- "max": 1030.5,
- "arithmetic": 205.0265624988824,
- "truncated": 142.53124999860302,
- "harmonic": 63.87965522037639,
- "geometric": 117.9588307327579
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x800 workgroup 64)
-{
- "count": 64,
- "total": 16865.300000220537,
- "rate": 3.7947738847908496,
- "min": 18.099999994039536,
- "max": 1783.199999988079,
- "arithmetic": 263.5203125034459,
- "truncated": 240.13593750307336,
- "harmonic": 113.00040689556182,
- "geometric": 184.71274112138641
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x400 workgroup 64)
-{
- "count": 64,
- "total": 19725,
- "rate": 3.244613434727503,
- "min": 9.400000005960464,
- "max": 1713.7999999821186,
- "arithmetic": 308.203125,
- "truncated": 248.60625000065193,
- "harmonic": 80.57924794605395,
- "geometric": 182.49585952465603
-}
-
-NanoPow (WebGPU) 3070 (dispatch 0x1000 workgroup 64)
-{
- "count": 64,
- "total": 16167.400000214577,
- "rate": 3.958583321941102,
- "min": 9.599999994039536,
- "max": 1210.800000011921,
- "arithmetic": 252.61562500335276,
- "truncated": 208.5312500023283,
- "harmonic": 79.1288243036302,
- "geometric": 160.41093372644522
-}
-NanoPow (WebGPU) 3070 (dispatch 0x1000 workgroup 256)
-{
- "count": 64,
- "total": 16317.899999916553,
- "rate": 3.9220733060214417,
- "min": 24.400000005960464,
- "max": 858.2999999821186,
- "arithmetic": 254.96718749869615,
- "truncated": 208.06562499934807,
- "harmonic": 90.51419688879719,
- "geometric": 152.3002389506704
-}
-
-
-NanoPowGpu: Time to calculate proof-of-work for a send block 32 times
-Total: 8909.500000029802 ms
-Average: 278.4218750009313 ms
-Harmonic: 191.49100480215873 ms
-Geometric: 232.13670548729021 ms
-Minimum: 76.69999998807907 ms
-Maximum: 641.5 ms
-
-NanoPowGpu: Time to calculate proof-of-work for a send block 32 times
-Total: 11805.200000077486 ms
-Average: 368.91250000242144 ms
-Harmonic: 131.36379466491744 ms
-Geometric: 228.69384924435158 ms
-Minimum: 21.900000005960464 ms
-Maximum: 1479.5 ms
-
-NanoPowGpu: Time to calculate proof-of-work for a send block 32 times
-(after inlining three G calls)
-Total: 11208.399999916553 ms
-Average: 350.2624999973923 ms
-Harmonic: 115.78621253028925 ms
-Geometric: 210.41080264689026 ms
-Minimum: 25 ms
-Maximum: 1249.199999988079 ms
-
-NanoPowGpu: Time to calculate proof-of-work for a send block 32 times
-(after inlining entire first G round)
-Total: 9778.899999797344 ms
-Average: 305.590624993667 ms
-Harmonic: 120.65186396913916 ms
-Geometric: 193.85674573632113 ms
-Minimum: 23.69999998807907 ms
-Maximum: 1752.199999988079 ms
-
-NanoPowGpu: Time to calculate proof-of-work for a send block 32 times
-(after inlining 3 rounds of G mixing)
-Total: 10425.399999856949 ms
-Average: 325.79374999552965 ms
-Harmonic: 150.5729403997282 ms
-Geometric: 231.43806657572657 ms
-Minimum: 31.900000035762787 ms
-Maximum: 954.9000000357628 ms
-
-
-NanoPow (WebGPU) iPhone 0xff
-{
- "count": 32,
- "total": 161323,
- "min": 130,
- "max": 22190,
- "arithmetic": 5041.3438,
- "truncated": 3780.2813,
- "harmonic": 1252.8660,
- "geometric": 2906.9620
-}
-
-NanoPow (WebGPU) iPad Mini 2 0x400
-{
- "count": 32,
- "total": 226661,
- "min": 169,
- "max": 31964,
- "arithmetic": 7083.1563,
- "truncated": 5208.2813,
- "harmonic": 1722.7691,
- "geometric": 4127.4107
-}
-
+++ /dev/null
-// src/shaders/compute.wgsl
-var compute_default = "struct UBO{blockhash:array<vec4<u32>,2>,random:vec2<u32>,threshold:u32};@group(0)@binding(0)var<uniform> ubo:UBO;struct WORK{nonce:vec2<u32>,found:atomic<u32>};@group(0)@binding(1)var<storage,read_write>work:WORK;var<workgroup> found:bool;const BLAKE2B_IV32_0:vec2<u32>=vec2(0xF2BDC900u,0x6A09E667u);const ROTATE_1=vec2(1u,1u);const ROTATE_8=vec2(8u,8u);const ROTATE_16=vec2(16u,16u);const ROTATE_24=vec2(24u,24u);const ROTATE_31=vec2(31u,31u);@compute @workgroup_size(64)fn search(@builtin(global_invocation_id)global_id:vec3<u32>,@builtin(local_invocation_id)local_id:vec3<u32>){found=(local_id.x==0u&&atomicLoad(&work.found)!=0u);workgroupBarrier();if(found){return;}main(global_id);}@compute @workgroup_size(1)fn validate(@builtin(global_invocation_id)global_id:vec3<u32>){main(global_id);}fn main(id:vec3<u32>){let threshold:u32=ubo.threshold;let m0:vec2<u32>=ubo.random ^ id.xy;let m1:vec2<u32>=ubo.blockhash[0u].xy;let m2:vec2<u32>=ubo.blockhash[0u].zw;let m3:vec2<u32>=ubo.blockhash[1u].xy;let m4:vec2<u32>=ubo.blockhash[1u].zw;var v0:vec2<u32>=BLAKE2B_IV32_0;var v1:vec2<u32>=vec2(0x84CAA73Bu,0xBB67AE85u);var v2:vec2<u32>=vec2(0xFE94F82Bu,0x3C6EF372u);var v3:vec2<u32>=vec2(0x5F1D36F1u,0xA54FF53Au);var v4:vec2<u32>=vec2(0xADE682D1u,0x510E527Fu);var v5:vec2<u32>=vec2(0x2B3E6C1Fu,0x9B05688Cu);var v6:vec2<u32>=vec2(0xFB41BD6Bu,0x1F83D9ABu);var v7:vec2<u32>=vec2(0x137E2179u,0x5BE0CD19u);var v8:vec2<u32>=vec2(0xF3BCC908u,0x6A09E667u);var v9:vec2<u32>=vec2(0x84CAA73Bu,0xBB67AE85u);var v10:vec2<u32>=vec2(0xFE94F82Bu,0x3C6EF372u);var v11:vec2<u32>=vec2(0x5F1D36F1u,0xA54FF53Au);var v12:vec2<u32>=vec2(0xADE682F9u,0x510E527Fu);var v13:vec2<u32>=vec2(0x2B3E6C1Fu,0x9B05688Cu);var v14:vec2<u32>=vec2(0x04BE4294u,0xE07C2654u);var v15:vec2<u32>=vec2(0x137E2179u,0x5BE0CD19u);v0=v0+v4+vec2(0u,u32(v0.x+v4.x<v0.x));v0=v0+m0+vec2(0u,u32(v0.x+m0.x<v0.x));v12=(v12 ^ v0).yx;v8=v8+v12+vec2(0u,u32(v8.x+v12.x<v8.x));v4=((v4 ^ v8)>>ROTATE_24)|((v4 ^ v8).yx<<ROTATE_8);v0=v0+v4+vec2(0u,u32(v0.x+v4.x<v0.x));v0=v0+m1+vec2(0u,u32(v0.x+m1.x<v0.x));v12=((v12 ^ v0)>>ROTATE_16)|((v12 ^ v0).yx<<ROTATE_16);v8=v8+v12+vec2(0u,u32(v8.x+v12.x<v8.x));v4=((v4 ^ v8)<<ROTATE_1)|((v4 ^ v8).yx>>ROTATE_31);v1=v1+v5+vec2(0u,u32(v1.x+v5.x<v1.x));v1=v1+m2+vec2(0u,u32(v1.x+m2.x<v1.x));v13=(v13 ^ v1).yx;v9=v9+v13+vec2(0u,u32(v9.x+v13.x<v9.x));v5=((v5 ^ v9)>>ROTATE_24)|((v5 ^ v9).yx<<ROTATE_8);v1=v1+v5+vec2(0u,u32(v1.x+v5.x<v1.x));v1=v1+m3+vec2(0u,u32(v1.x+m3.x<v1.x));v13=((v13 ^ v1)>>ROTATE_16)|((v13 ^ v1).yx<<ROTATE_16);v9=v9+v13+vec2(0u,u32(v9.x+v13.x<v9.x));v5=((v5 ^ v9)<<ROTATE_1)|((v5 ^ v9).yx>>ROTATE_31);v2=v2+v6+vec2(0u,u32(v2.x+v6.x<v2.x));v2=v2+m4+vec2(0u,u32(v2.x+m4.x<v2.x));v14=(v14 ^ v2).yx;v10=v10+v14+vec2(0u,u32(v10.x+v14.x<v10.x));v6=((v6 ^ v10)>>ROTATE_24)|((v6 ^ v10).yx<<ROTATE_8);v2=v2+v6+vec2(0u,u32(v2.x+v6.x<v2.x));v14=((v14 ^ v2)>>ROTATE_16)|((v14 ^ v2).yx<<ROTATE_16);v10=v10+v14+vec2(0u,u32(v10.x+v14.x<v10.x));v6=((v6 ^ v10)<<ROTATE_1)|((v6 ^ v10).yx>>ROTATE_31);v3=v3+v7+vec2(0u,u32(v3.x+v7.x<v3.x));v15=(v15 ^ v3).yx;v11=v11+v15+vec2(0u,u32(v11.x+v15.x<v11.x));v7=((v7 ^ v11)>>ROTATE_24)|((v7 ^ v11).yx<<ROTATE_8);v3=v3+v7+vec2(0u,u32(v3.x+v7.x<v3.x));v15=((v15 ^ v3)>>ROTATE_16)|((v15 ^ v3).yx<<ROTATE_16);v11=v11+v15+vec2(0u,u32(v11.x+v15.x<v11.x));v7=((v7 ^ v11)<<ROTATE_1)|((v7 ^ v11).yx>>ROTATE_31);v0=v0+v5+vec2(0u,u32(v0.x+v5.x<v0.x));v15=(v15 ^ v0).yx;v10=v10+v15+vec2(0u,u32(v10.x+v15.x<v10.x));v5=((v5 ^ v10)>>ROTATE_24)|((v5 ^ v10).yx<<ROTATE_8);v0=v0+v5+vec2(0u,u32(v0.x+v5.x<v0.x));v15=((v15 ^ v0)>>ROTATE_16)|((v15 ^ v0).yx<<ROTATE_16);v10=v10+v15+vec2(0u,u32(v10.x+v15.x<v10.x));v5=((v5 ^ v10)<<ROTATE_1)|((v5 ^ v10).yx>>ROTATE_31);v1=v1+v6+vec2(0u,u32(v1.x+v6.x<v1.x));v12=(v12 ^ v1).yx;v11=v11+v12+vec2(0u,u32(v11.x+v12.x<v11.x));v6=((v6 ^ v11)>>ROTATE_24)|((v6 ^ v11).yx<<ROTATE_8);v1=v1+v6+vec2(0u,u32(v1.x+v6.x<v1.x));v12=((v12 ^ v1)>>ROTATE_16)|((v12 ^ v1).yx<<ROTATE_16);v11=v11+v12+vec2(0u,u32(v11.x+v12.x<v11.x));v6=((v6 ^ v11)<<ROTATE_1)|((v6 ^ v11).yx>>ROTATE_31);v2=v2+v7+vec2(0u,u32(v2.x+v7.x<v2.x));v13=(v13 ^ v2).yx;v8=v8+v13+vec2(0u,u32(v8.x+v13.x<v8.x));v7=((v7 ^ v8)>>ROTATE_24)|((v7 ^ v8).yx<<ROTATE_8);v2=v2+v7+vec2(0u,u32(v2.x+v7.x<v2.x));v13=((v13 ^ v2)>>ROTATE_16)|((v13 ^ v2).yx<<ROTATE_16);v8=v8+v13+vec2(0u,u32(v8.x+v13.x<v8.x));v7=((v7 ^ v8)<<ROTATE_1)|((v7 ^ v8).yx>>ROTATE_31);v3=v3+v4+vec2(0u,u32(v3.x+v4.x<v3.x));v14=(v14 ^ v3).yx;v9=v9+v14+vec2(0u,u32(v9.x+v14.x<v9.x));v4=((v4 ^ v9)>>ROTATE_24)|((v4 ^ v9).yx<<ROTATE_8);v3=v3+v4+vec2(0u,u32(v3.x+v4.x<v3.x));v14=((v14 ^ v3)>>ROTATE_16)|((v14 ^ v3).yx<<ROTATE_16);v9=v9+v14+vec2(0u,u32(v9.x+v14.x<v9.x));v4=((v4 ^ v9)<<ROTATE_1)|((v4 ^ v9).yx>>ROTATE_31);v0=v0+v4+vec2(0u,u32(v0.x+v4.x<v0.x));v12=(v12 ^ v0).yx;v8=v8+v12+vec2(0u,u32(v8.x+v12.x<v8.x));v4=((v4 ^ v8)>>ROTATE_24)|((v4 ^ v8).yx<<ROTATE_8);v0=v0+v4+vec2(0u,u32(v0.x+v4.x<v0.x));v12=((v12 ^ v0)>>ROTATE_16)|((v12 ^ v0).yx<<ROTATE_16);v8=v8+v12+vec2(0u,u32(v8.x+v12.x<v8.x));v4=((v4 ^ v8)<<ROTATE_1)|((v4 ^ v8).yx>>ROTATE_31);v1=v1+v5+vec2(0u,u32(v1.x+v5.x<v1.x));v1=v1+m4+vec2(0u,u32(v1.x+m4.x<v1.x));v13=(v13 ^ v1).yx;v9=v9+v13+vec2(0u,u32(v9.x+v13.x<v9.x));v5=((v5 ^ v9)>>ROTATE_24)|((v5 ^ v9).yx<<ROTATE_8);v1=v1+v5+vec2(0u,u32(v1.x+v5.x<v1.x));v13=((v13 ^ v1)>>ROTATE_16)|((v13 ^ v1).yx<<ROTATE_16);v9=v9+v13+vec2(0u,u32(v9.x+v13.x<v9.x));v5=((v5 ^ v9)<<ROTATE_1)|((v5 ^ v9).yx>>ROTATE_31);v2=v2+v6+vec2(0u,u32(v2.x+v6.x<v2.x));v14=(v14 ^ v2).yx;v10=v10+v14+vec2(0u,u32(v10.x+v14.x<v10.x));v6=((v6 ^ v10)>>ROTATE_24)|((v6 ^ v10).yx<<ROTATE_8);v2=v2+v6+vec2(0u,u32(v2.x+v6.x<v2.x));v14=((v14 ^ v2)>>ROTATE_16)|((v14 ^ v2).yx<<ROTATE_16);v10=v10+v14+vec2(0u,u32(v10.x+v14.x<v10.x));v6=((v6 ^ v10)<<ROTATE_1)|((v6 ^ v10).yx>>ROTATE_31);v3=v3+v7+vec2(0u,u32(v3.x+v7.x<v3.x));v15=(v15 ^ v3).yx;v11=v11+v15+vec2(0u,u32(v11.x+v15.x<v11.x));v7=((v7 ^ v11)>>ROTATE_24)|((v7 ^ v11).yx<<ROTATE_8);v3=v3+v7+vec2(0u,u32(v3.x+v7.x<v3.x));v15=((v15 ^ v3)>>ROTATE_16)|((v15 ^ v3).yx<<ROTATE_16);v11=v11+v15+vec2(0u,u32(v11.x+v15.x<v11.x));v7=((v7 ^ v11)<<ROTATE_1)|((v7 ^ v11).yx>>ROTATE_31);v0=v0+v5+vec2(0u,u32(v0.x+v5.x<v0.x));v0=v0+m1+vec2(0u,u32(v0.x+m1.x<v0.x));v15=(v15 ^ v0).yx;v10=v10+v15+vec2(0u,u32(v10.x+v15.x<v10.x));v5=((v5 ^ v10)>>ROTATE_24)|((v5 ^ v10).yx<<ROTATE_8);v0=v0+v5+vec2(0u,u32(v0.x+v5.x<v0.x));v15=((v15 ^ v0)>>ROTATE_16)|((v15 ^ v0).yx<<ROTATE_16);v10=v10+v15+vec2(0u,u32(v10.x+v15.x<v10.x));v5=((v5 ^ v10)<<ROTATE_1)|((v5 ^ v10).yx>>ROTATE_31);v1=v1+v6+vec2(0u,u32(v1.x+v6.x<v1.x));v1=v1+m0+vec2(0u,u32(v1.x+m0.x<v1.x));v12=(v12 ^ v1).yx;v11=v11+v12+vec2(0u,u32(v11.x+v12.x<v11.x));v6=((v6 ^ v11)>>ROTATE_24)|((v6 ^ v11).yx<<ROTATE_8);v1=v1+v6+vec2(0u,u32(v1.x+v6.x<v1.x));v1=v1+m2+vec2(0u,u32(v1.x+m2.x<v1.x));v12=((v12 ^ v1)>>ROTATE_16)|((v12 ^ v1).yx<<ROTATE_16);v11=v11+v12+vec2(0u,u32(v11.x+v12.x<v11.x));v6=((v6 ^ v11)<<ROTATE_1)|((v6 ^ v11).yx>>ROTATE_31);v2=v2+v7+vec2(0u,u32(v2.x+v7.x<v2.x));v13=(v13 ^ v2).yx;v8=v8+v13+vec2(0u,u32(v8.x+v13.x<v8.x));v7=((v7 ^ v8)>>ROTATE_24)|((v7 ^ v8).yx<<ROTATE_8);v2=v2+v7+vec2(0u,u32(v2.x+v7.x<v2.x));v13=((v13 ^ v2)>>ROTATE_16)|((v13 ^ v2).yx<<ROTATE_16);v8=v8+v13+vec2(0u,u32(v8.x+v13.x<v8.x));v7=((v7 ^ v8)<<ROTATE_1)|((v7 ^ v8).yx>>ROTATE_31);v3=v3+v4+vec2(0u,u32(v3.x+v4.x<v3.x));v14=(v14 ^ v3).yx;v9=v9+v14+vec2(0u,u32(v9.x+v14.x<v9.x));v4=((v4 ^ v9)>>ROTATE_24)|((v4 ^ v9).yx<<ROTATE_8);v3=v3+v4+vec2(0u,u32(v3.x+v4.x<v3.x));v3=v3+m3+vec2(0u,u32(v3.x+m3.x<v3.x));v14=((v14 ^ v3)>>ROTATE_16)|((v14 ^ v3).yx<<ROTATE_16);v9=v9+v14+vec2(0u,u32(v9.x+v14.x<v9.x));v4=((v4 ^ v9)<<ROTATE_1)|((v4 ^ v9).yx>>ROTATE_31);v0=v0+v4+vec2(0u,u32(v0.x+v4.x<v0.x));v12=(v12 ^ v0).yx;v8=v8+v12+vec2(0u,u32(v8.x+v12.x<v8.x));v4=((v4 ^ v8)>>ROTATE_24)|((v4 ^ v8).yx<<ROTATE_8);v0=v0+v4+vec2(0u,u32(v0.x+v4.x<v0.x));v12=((v12 ^ v0)>>ROTATE_16)|((v12 ^ v0).yx<<ROTATE_16);v8=v8+v12+vec2(0u,u32(v8.x+v12.x<v8.x));v4=((v4 ^ v8)<<ROTATE_1)|((v4 ^ v8).yx>>ROTATE_31);v1=v1+v5+vec2(0u,u32(v1.x+v5.x<v1.x));v13=(v13 ^ v1).yx;v9=v9+v13+vec2(0u,u32(v9.x+v13.x<v9.x));v5=((v5 ^ v9)>>ROTATE_24)|((v5 ^ v9).yx<<ROTATE_8);v1=v1+v5+vec2(0u,u32(v1.x+v5.x<v1.x));v1=v1+m0+vec2(0u,u32(v1.x+m0.x<v1.x));v13=((v13 ^ v1)>>ROTATE_16)|((v13 ^ v1).yx<<ROTATE_16);v9=v9+v13+vec2(0u,u32(v9.x+v13.x<v9.x));v5=((v5 ^ v9)<<ROTATE_1)|((v5 ^ v9).yx>>ROTATE_31);v2=v2+v6+vec2(0u,u32(v2.x+v6.x<v2.x));v14=(v14 ^ v2).yx;v10=v10+v14+vec2(0u,u32(v10.x+v14.x<v10.x));v6=((v6 ^ v10)>>ROTATE_24)|((v6 ^ v10).yx<<ROTATE_8);v2=v2+v6+vec2(0u,u32(v2.x+v6.x<v2.x));v2=v2+m2+vec2(0u,u32(v2.x+m2.x<v2.x));v14=((v14 ^ v2)>>ROTATE_16)|((v14 ^ v2).yx<<ROTATE_16);v10=v10+v14+vec2(0u,u32(v10.x+v14.x<v10.x));v6=((v6 ^ v10)<<ROTATE_1)|((v6 ^ v10).yx>>ROTATE_31);v3=v3+v7+vec2(0u,u32(v3.x+v7.x<v3.x));v15=(v15 ^ v3).yx;v11=v11+v15+vec2(0u,u32(v11.x+v15.x<v11.x));v7=((v7 ^ v11)>>ROTATE_24)|((v7 ^ v11).yx<<ROTATE_8);v3=v3+v7+vec2(0u,u32(v3.x+v7.x<v3.x));v15=((v15 ^ v3)>>ROTATE_16)|((v15 ^ v3).yx<<ROTATE_16);v11=v11+v15+vec2(0u,u32(v11.x+v15.x<v11.x));v7=((v7 ^ v11)<<ROTATE_1)|((v7 ^ v11).yx>>ROTATE_31);v0=v0+v5+vec2(0u,u32(v0.x+v5.x<v0.x));v15=(v15 ^ v0).yx;v10=v10+v15+vec2(0u,u32(v10.x+v15.x<v10.x));v5=((v5 ^ v10)>>ROTATE_24)|((v5 ^ v10).yx<<ROTATE_8);v0=v0+v5+vec2(0u,u32(v0.x+v5.x<v0.x));v15=((v15 ^ v0)>>ROTATE_16)|((v15 ^ v0).yx<<ROTATE_16);v10=v10+v15+vec2(0u,u32(v10.x+v15.x<v10.x));v5=((v5 ^ v10)<<ROTATE_1)|((v5 ^ v10).yx>>ROTATE_31);v1=v1+v6+vec2(0u,u32(v1.x+v6.x<v1.x));v1=v1+m3+vec2(0u,u32(v1.x+m3.x<v1.x));v12=(v12 ^ v1).yx;v11=v11+v12+vec2(0u,u32(v11.x+v12.x<v11.x));v6=((v6 ^ v11)>>ROTATE_24)|((v6 ^ v11).yx<<ROTATE_8);v1=v1+v6+vec2(0u,u32(v1.x+v6.x<v1.x));v12=((v12 ^ v1)>>ROTATE_16)|((v12 ^ v1).yx<<ROTATE_16);v11=v11+v12+vec2(0u,u32(v11.x+v12.x<v11.x));v6=((v6 ^ v11)<<ROTATE_1)|((v6 ^ v11).yx>>ROTATE_31);v2=v2+v7+vec2(0u,u32(v2.x+v7.x<v2.x));v13=(v13 ^ v2).yx;v8=v8+v13+vec2(0u,u32(v8.x+v13.x<v8.x));v7=((v7 ^ v8)>>ROTATE_24)|((v7 ^ v8).yx<<ROTATE_8);v2=v2+v7+vec2(0u,u32(v2.x+v7.x<v2.x));v2=v2+m1+vec2(0u,u32(v2.x+m1.x<v2.x));v13=((v13 ^ v2)>>ROTATE_16)|((v13 ^ v2).yx<<ROTATE_16);v8=v8+v13+vec2(0u,u32(v8.x+v13.x<v8.x));v7=((v7 ^ v8)<<ROTATE_1)|((v7 ^ v8).yx>>ROTATE_31);v3=v3+v4+vec2(0u,u32(v3.x+v4.x<v3.x));v14=(v14 ^ v3).yx;v9=v9+v14+vec2(0u,u32(v9.x+v14.x<v9.x));v4=((v4 ^ v9)>>ROTATE_24)|((v4 ^ v9).yx<<ROTATE_8);v3=v3+v4+vec2(0u,u32(v3.x+v4.x<v3.x));v3=v3+m4+vec2(0u,u32(v3.x+m4.x<v3.x));v14=((v14 ^ v3)>>ROTATE_16)|((v14 ^ v3).yx<<ROTATE_16);v9=v9+v14+vec2(0u,u32(v9.x+v14.x<v9.x));v4=((v4 ^ v9)<<ROTATE_1)|((v4 ^ v9).yx>>ROTATE_31);v0=v0+v4+vec2(0u,u32(v0.x+v4.x<v0.x));v12=(v12 ^ v0).yx;v8=v8+v12+vec2(0u,u32(v8.x+v12.x<v8.x));v4=((v4 ^ v8)>>ROTATE_24)|((v4 ^ v8).yx<<ROTATE_8);v0=v0+v4+vec2(0u,u32(v0.x+v4.x<v0.x));v12=((v12 ^ v0)>>ROTATE_16)|((v12 ^ v0).yx<<ROTATE_16);v8=v8+v12+vec2(0u,u32(v8.x+v12.x<v8.x));v4=((v4 ^ v8)<<ROTATE_1)|((v4 ^ v8).yx>>ROTATE_31);v1=v1+v5+vec2(0u,u32(v1.x+v5.x<v1.x));v1=v1+m3+vec2(0u,u32(v1.x+m3.x<v1.x));v13=(v13 ^ v1).yx;v9=v9+v13+vec2(0u,u32(v9.x+v13.x<v9.x));v5=((v5 ^ v9)>>ROTATE_24)|((v5 ^ v9).yx<<ROTATE_8);v1=v1+v5+vec2(0u,u32(v1.x+v5.x<v1.x));v1=v1+m1+vec2(0u,u32(v1.x+m1.x<v1.x));v13=((v13 ^ v1)>>ROTATE_16)|((v13 ^ v1).yx<<ROTATE_16);v9=v9+v13+vec2(0u,u32(v9.x+v13.x<v9.x));v5=((v5 ^ v9)<<ROTATE_1)|((v5 ^ v9).yx>>ROTATE_31);v2=v2+v6+vec2(0u,u32(v2.x+v6.x<v2.x));v14=(v14 ^ v2).yx;v10=v10+v14+vec2(0u,u32(v10.x+v14.x<v10.x));v6=((v6 ^ v10)>>ROTATE_24)|((v6 ^ v10).yx<<ROTATE_8);v2=v2+v6+vec2(0u,u32(v2.x+v6.x<v2.x));v14=((v14 ^ v2)>>ROTATE_16)|((v14 ^ v2).yx<<ROTATE_16);v10=v10+v14+vec2(0u,u32(v10.x+v14.x<v10.x));v6=((v6 ^ v10)<<ROTATE_1)|((v6 ^ v10).yx>>ROTATE_31);v3=v3+v7+vec2(0u,u32(v3.x+v7.x<v3.x));v15=(v15 ^ v3).yx;v11=v11+v15+vec2(0u,u32(v11.x+v15.x<v11.x));v7=((v7 ^ v11)>>ROTATE_24)|((v7 ^ v11).yx<<ROTATE_8);v3=v3+v7+vec2(0u,u32(v3.x+v7.x<v3.x));v15=((v15 ^ v3)>>ROTATE_16)|((v15 ^ v3).yx<<ROTATE_16);v11=v11+v15+vec2(0u,u32(v11.x+v15.x<v11.x));v7=((v7 ^ v11)<<ROTATE_1)|((v7 ^ v11).yx>>ROTATE_31);v0=v0+v5+vec2(0u,u32(v0.x+v5.x<v0.x));v0=v0+m2+vec2(0u,u32(v0.x+m2.x<v0.x));v15=(v15 ^ v0).yx;v10=v10+v15+vec2(0u,u32(v10.x+v15.x<v10.x));v5=((v5 ^ v10)>>ROTATE_24)|((v5 ^ v10).yx<<ROTATE_8);v0=v0+v5+vec2(0u,u32(v0.x+v5.x<v0.x));v15=((v15 ^ v0)>>ROTATE_16)|((v15 ^ v0).yx<<ROTATE_16);v10=v10+v15+vec2(0u,u32(v10.x+v15.x<v10.x));v5=((v5 ^ v10)<<ROTATE_1)|((v5 ^ v10).yx>>ROTATE_31);v1=v1+v6+vec2(0u,u32(v1.x+v6.x<v1.x));v12=(v12 ^ v1).yx;v11=v11+v12+vec2(0u,u32(v11.x+v12.x<v11.x));v6=((v6 ^ v11)>>ROTATE_24)|((v6 ^ v11).yx<<ROTATE_8);v1=v1+v6+vec2(0u,u32(v1.x+v6.x<v1.x));v12=((v12 ^ v1)>>ROTATE_16)|((v12 ^ v1).yx<<ROTATE_16);v11=v11+v12+vec2(0u,u32(v11.x+v12.x<v11.x));v6=((v6 ^ v11)<<ROTATE_1)|((v6 ^ v11).yx>>ROTATE_31);v2=v2+v7+vec2(0u,u32(v2.x+v7.x<v2.x));v2=v2+m4+vec2(0u,u32(v2.x+m4.x<v2.x));v13=(v13 ^ v2).yx;v8=v8+v13+vec2(0u,u32(v8.x+v13.x<v8.x));v7=((v7 ^ v8)>>ROTATE_24)|((v7 ^ v8).yx<<ROTATE_8);v2=v2+v7+vec2(0u,u32(v2.x+v7.x<v2.x));v2=v2+m0+vec2(0u,u32(v2.x+m0.x<v2.x));v13=((v13 ^ v2)>>ROTATE_16)|((v13 ^ v2).yx<<ROTATE_16);v8=v8+v13+vec2(0u,u32(v8.x+v13.x<v8.x));v7=((v7 ^ v8)<<ROTATE_1)|((v7 ^ v8).yx>>ROTATE_31);v3=v3+v4+vec2(0u,u32(v3.x+v4.x<v3.x));v14=(v14 ^ v3).yx;v9=v9+v14+vec2(0u,u32(v9.x+v14.x<v9.x));v4=((v4 ^ v9)>>ROTATE_24)|((v4 ^ v9).yx<<ROTATE_8);v3=v3+v4+vec2(0u,u32(v3.x+v4.x<v3.x));v14=((v14 ^ v3)>>ROTATE_16)|((v14 ^ v3).yx<<ROTATE_16);v9=v9+v14+vec2(0u,u32(v9.x+v14.x<v9.x));v4=((v4 ^ v9)<<ROTATE_1)|((v4 ^ v9).yx>>ROTATE_31);v0=v0+v4+vec2(0u,u32(v0.x+v4.x<v0.x));v12=(v12 ^ v0).yx;v8=v8+v12+vec2(0u,u32(v8.x+v12.x<v8.x));v4=((v4 ^ v8)>>ROTATE_24)|((v4 ^ v8).yx<<ROTATE_8);v0=v0+v4+vec2(0u,u32(v0.x+v4.x<v0.x));v0=v0+m0+vec2(0u,u32(v0.x+m0.x<v0.x));v12=((v12 ^ v0)>>ROTATE_16)|((v12 ^ v0).yx<<ROTATE_16);v8=v8+v12+vec2(0u,u32(v8.x+v12.x<v8.x));v4=((v4 ^ v8)<<ROTATE_1)|((v4 ^ v8).yx>>ROTATE_31);v1=v1+v5+vec2(0u,u32(v1.x+v5.x<v1.x));v13=(v13 ^ v1).yx;v9=v9+v13+vec2(0u,u32(v9.x+v13.x<v9.x));v5=((v5 ^ v9)>>ROTATE_24)|((v5 ^ v9).yx<<ROTATE_8);v1=v1+v5+vec2(0u,u32(v1.x+v5.x<v1.x));v13=((v13 ^ v1)>>ROTATE_16)|((v13 ^ v1).yx<<ROTATE_16);v9=v9+v13+vec2(0u,u32(v9.x+v13.x<v9.x));v5=((v5 ^ v9)<<ROTATE_1)|((v5 ^ v9).yx>>ROTATE_31);v2=v2+v6+vec2(0u,u32(v2.x+v6.x<v2.x));v2=v2+m2+vec2(0u,u32(v2.x+m2.x<v2.x));v14=(v14 ^ v2).yx;v10=v10+v14+vec2(0u,u32(v10.x+v14.x<v10.x));v6=((v6 ^ v10)>>ROTATE_24)|((v6 ^ v10).yx<<ROTATE_8);v2=v2+v6+vec2(0u,u32(v2.x+v6.x<v2.x));v2=v2+m4+vec2(0u,u32(v2.x+m4.x<v2.x));v14=((v14 ^ v2)>>ROTATE_16)|((v14 ^ v2).yx<<ROTATE_16);v10=v10+v14+vec2(0u,u32(v10.x+v14.x<v10.x));v6=((v6 ^ v10)<<ROTATE_1)|((v6 ^ v10).yx>>ROTATE_31);v3=v3+v7+vec2(0u,u32(v3.x+v7.x<v3.x));v15=(v15 ^ v3).yx;v11=v11+v15+vec2(0u,u32(v11.x+v15.x<v11.x));v7=((v7 ^ v11)>>ROTATE_24)|((v7 ^ v11).yx<<ROTATE_8);v3=v3+v7+vec2(0u,u32(v3.x+v7.x<v3.x));v15=((v15 ^ v3)>>ROTATE_16)|((v15 ^ v3).yx<<ROTATE_16);v11=v11+v15+vec2(0u,u32(v11.x+v15.x<v11.x));v7=((v7 ^ v11)<<ROTATE_1)|((v7 ^ v11).yx>>ROTATE_31);v0=v0+v5+vec2(0u,u32(v0.x+v5.x<v0.x));v15=(v15 ^ v0).yx;v10=v10+v15+vec2(0u,u32(v10.x+v15.x<v10.x));v5=((v5 ^ v10)>>ROTATE_24)|((v5 ^ v10).yx<<ROTATE_8);v0=v0+v5+vec2(0u,u32(v0.x+v5.x<v0.x));v0=v0+m1+vec2(0u,u32(v0.x+m1.x<v0.x));v15=((v15 ^ v0)>>ROTATE_16)|((v15 ^ v0).yx<<ROTATE_16);v10=v10+v15+vec2(0u,u32(v10.x+v15.x<v10.x));v5=((v5 ^ v10)<<ROTATE_1)|((v5 ^ v10).yx>>ROTATE_31);v1=v1+v6+vec2(0u,u32(v1.x+v6.x<v1.x));v12=(v12 ^ v1).yx;v11=v11+v12+vec2(0u,u32(v11.x+v12.x<v11.x));v6=((v6 ^ v11)>>ROTATE_24)|((v6 ^ v11).yx<<ROTATE_8);v1=v1+v6+vec2(0u,u32(v1.x+v6.x<v1.x));v12=((v12 ^ v1)>>ROTATE_16)|((v12 ^ v1).yx<<ROTATE_16);v11=v11+v12+vec2(0u,u32(v11.x+v12.x<v11.x));v6=((v6 ^ v11)<<ROTATE_1)|((v6 ^ v11).yx>>ROTATE_31);v2=v2+v7+vec2(0u,u32(v2.x+v7.x<v2.x));v13=(v13 ^ v2).yx;v8=v8+v13+vec2(0u,u32(v8.x+v13.x<v8.x));v7=((v7 ^ v8)>>ROTATE_24)|((v7 ^ v8).yx<<ROTATE_8);v2=v2+v7+vec2(0u,u32(v2.x+v7.x<v2.x));v13=((v13 ^ v2)>>ROTATE_16)|((v13 ^ v2).yx<<ROTATE_16);v8=v8+v13+vec2(0u,u32(v8.x+v13.x<v8.x));v7=((v7 ^ v8)<<ROTATE_1)|((v7 ^ v8).yx>>ROTATE_31);v3=v3+v4+vec2(0u,u32(v3.x+v4.x<v3.x));v3=v3+m3+vec2(0u,u32(v3.x+m3.x<v3.x));v14=(v14 ^ v3).yx;v9=v9+v14+vec2(0u,u32(v9.x+v14.x<v9.x));v4=((v4 ^ v9)>>ROTATE_24)|((v4 ^ v9).yx<<ROTATE_8);v3=v3+v4+vec2(0u,u32(v3.x+v4.x<v3.x));v14=((v14 ^ v3)>>ROTATE_16)|((v14 ^ v3).yx<<ROTATE_16);v9=v9+v14+vec2(0u,u32(v9.x+v14.x<v9.x));v4=((v4 ^ v9)<<ROTATE_1)|((v4 ^ v9).yx>>ROTATE_31);v0=v0+v4+vec2(0u,u32(v0.x+v4.x<v0.x));v0=v0+m2+vec2(0u,u32(v0.x+m2.x<v0.x));v12=(v12 ^ v0).yx;v8=v8+v12+vec2(0u,u32(v8.x+v12.x<v8.x));v4=((v4 ^ v8)>>ROTATE_24)|((v4 ^ v8).yx<<ROTATE_8);v0=v0+v4+vec2(0u,u32(v0.x+v4.x<v0.x));v12=((v12 ^ v0)>>ROTATE_16)|((v12 ^ v0).yx<<ROTATE_16);v8=v8+v12+vec2(0u,u32(v8.x+v12.x<v8.x));v4=((v4 ^ v8)<<ROTATE_1)|((v4 ^ v8).yx>>ROTATE_31);v1=v1+v5+vec2(0u,u32(v1.x+v5.x<v1.x));v13=(v13 ^ v1).yx;v9=v9+v13+vec2(0u,u32(v9.x+v13.x<v9.x));v5=((v5 ^ v9)>>ROTATE_24)|((v5 ^ v9).yx<<ROTATE_8);v1=v1+v5+vec2(0u,u32(v1.x+v5.x<v1.x));v13=((v13 ^ v1)>>ROTATE_16)|((v13 ^ v1).yx<<ROTATE_16);v9=v9+v13+vec2(0u,u32(v9.x+v13.x<v9.x));v5=((v5 ^ v9)<<ROTATE_1)|((v5 ^ v9).yx>>ROTATE_31);v2=v2+v6+vec2(0u,u32(v2.x+v6.x<v2.x));v2=v2+m0+vec2(0u,u32(v2.x+m0.x<v2.x));v14=(v14 ^ v2).yx;v10=v10+v14+vec2(0u,u32(v10.x+v14.x<v10.x));v6=((v6 ^ v10)>>ROTATE_24)|((v6 ^ v10).yx<<ROTATE_8);v2=v2+v6+vec2(0u,u32(v2.x+v6.x<v2.x));v14=((v14 ^ v2)>>ROTATE_16)|((v14 ^ v2).yx<<ROTATE_16);v10=v10+v14+vec2(0u,u32(v10.x+v14.x<v10.x));v6=((v6 ^ v10)<<ROTATE_1)|((v6 ^ v10).yx>>ROTATE_31);v3=v3+v7+vec2(0u,u32(v3.x+v7.x<v3.x));v15=(v15 ^ v3).yx;v11=v11+v15+vec2(0u,u32(v11.x+v15.x<v11.x));v7=((v7 ^ v11)>>ROTATE_24)|((v7 ^ v11).yx<<ROTATE_8);v3=v3+v7+vec2(0u,u32(v3.x+v7.x<v3.x));v3=v3+m3+vec2(0u,u32(v3.x+m3.x<v3.x));v15=((v15 ^ v3)>>ROTATE_16)|((v15 ^ v3).yx<<ROTATE_16);v11=v11+v15+vec2(0u,u32(v11.x+v15.x<v11.x));v7=((v7 ^ v11)<<ROTATE_1)|((v7 ^ v11).yx>>ROTATE_31);v0=v0+v5+vec2(0u,u32(v0.x+v5.x<v0.x));v0=v0+m4+vec2(0u,u32(v0.x+m4.x<v0.x));v15=(v15 ^ v0).yx;v10=v10+v15+vec2(0u,u32(v10.x+v15.x<v10.x));v5=((v5 ^ v10)>>ROTATE_24)|((v5 ^ v10).yx<<ROTATE_8);v0=v0+v5+vec2(0u,u32(v0.x+v5.x<v0.x));v15=((v15 ^ v0)>>ROTATE_16)|((v15 ^ v0).yx<<ROTATE_16);v10=v10+v15+vec2(0u,u32(v10.x+v15.x<v10.x));v5=((v5 ^ v10)<<ROTATE_1)|((v5 ^ v10).yx>>ROTATE_31);v1=v1+v6+vec2(0u,u32(v1.x+v6.x<v1.x));v12=(v12 ^ v1).yx;v11=v11+v12+vec2(0u,u32(v11.x+v12.x<v11.x));v6=((v6 ^ v11)>>ROTATE_24)|((v6 ^ v11).yx<<ROTATE_8);v1=v1+v6+vec2(0u,u32(v1.x+v6.x<v1.x));v12=((v12 ^ v1)>>ROTATE_16)|((v12 ^ v1).yx<<ROTATE_16);v11=v11+v12+vec2(0u,u32(v11.x+v12.x<v11.x));v6=((v6 ^ v11)<<ROTATE_1)|((v6 ^ v11).yx>>ROTATE_31);v2=v2+v7+vec2(0u,u32(v2.x+v7.x<v2.x));v13=(v13 ^ v2).yx;v8=v8+v13+vec2(0u,u32(v8.x+v13.x<v8.x));v7=((v7 ^ v8)>>ROTATE_24)|((v7 ^ v8).yx<<ROTATE_8);v2=v2+v7+vec2(0u,u32(v2.x+v7.x<v2.x));v13=((v13 ^ v2)>>ROTATE_16)|((v13 ^ v2).yx<<ROTATE_16);v8=v8+v13+vec2(0u,u32(v8.x+v13.x<v8.x));v7=((v7 ^ v8)<<ROTATE_1)|((v7 ^ v8).yx>>ROTATE_31);v3=v3+v4+vec2(0u,u32(v3.x+v4.x<v3.x));v3=v3+m1+vec2(0u,u32(v3.x+m1.x<v3.x));v14=(v14 ^ v3).yx;v9=v9+v14+vec2(0u,u32(v9.x+v14.x<v9.x));v4=((v4 ^ v9)>>ROTATE_24)|((v4 ^ v9).yx<<ROTATE_8);v3=v3+v4+vec2(0u,u32(v3.x+v4.x<v3.x));v14=((v14 ^ v3)>>ROTATE_16)|((v14 ^ v3).yx<<ROTATE_16);v9=v9+v14+vec2(0u,u32(v9.x+v14.x<v9.x));v4=((v4 ^ v9)<<ROTATE_1)|((v4 ^ v9).yx>>ROTATE_31);v0=v0+v4+vec2(0u,u32(v0.x+v4.x<v0.x));v12=(v12 ^ v0).yx;v8=v8+v12+vec2(0u,u32(v8.x+v12.x<v8.x));v4=((v4 ^ v8)>>ROTATE_24)|((v4 ^ v8).yx<<ROTATE_8);v0=v0+v4+vec2(0u,u32(v0.x+v4.x<v0.x));v12=((v12 ^ v0)>>ROTATE_16)|((v12 ^ v0).yx<<ROTATE_16);v8=v8+v12+vec2(0u,u32(v8.x+v12.x<v8.x));v4=((v4 ^ v8)<<ROTATE_1)|((v4 ^ v8).yx>>ROTATE_31);v1=v1+v5+vec2(0u,u32(v1.x+v5.x<v1.x));v1=v1+m1+vec2(0u,u32(v1.x+m1.x<v1.x));v13=(v13 ^ v1).yx;v9=v9+v13+vec2(0u,u32(v9.x+v13.x<v9.x));v5=((v5 ^ v9)>>ROTATE_24)|((v5 ^ v9).yx<<ROTATE_8);v1=v1+v5+vec2(0u,u32(v1.x+v5.x<v1.x));v13=((v13 ^ v1)>>ROTATE_16)|((v13 ^ v1).yx<<ROTATE_16);v9=v9+v13+vec2(0u,u32(v9.x+v13.x<v9.x));v5=((v5 ^ v9)<<ROTATE_1)|((v5 ^ v9).yx>>ROTATE_31);v2=v2+v6+vec2(0u,u32(v2.x+v6.x<v2.x));v14=(v14 ^ v2).yx;v10=v10+v14+vec2(0u,u32(v10.x+v14.x<v10.x));v6=((v6 ^ v10)>>ROTATE_24)|((v6 ^ v10).yx<<ROTATE_8);v2=v2+v6+vec2(0u,u32(v2.x+v6.x<v2.x));v14=((v14 ^ v2)>>ROTATE_16)|((v14 ^ v2).yx<<ROTATE_16);v10=v10+v14+vec2(0u,u32(v10.x+v14.x<v10.x));v6=((v6 ^ v10)<<ROTATE_1)|((v6 ^ v10).yx>>ROTATE_31);v3=v3+v7+vec2(0u,u32(v3.x+v7.x<v3.x));v3=v3+m4+vec2(0u,u32(v3.x+m4.x<v3.x));v15=(v15 ^ v3).yx;v11=v11+v15+vec2(0u,u32(v11.x+v15.x<v11.x));v7=((v7 ^ v11)>>ROTATE_24)|((v7 ^ v11).yx<<ROTATE_8);v3=v3+v7+vec2(0u,u32(v3.x+v7.x<v3.x));v15=((v15 ^ v3)>>ROTATE_16)|((v15 ^ v3).yx<<ROTATE_16);v11=v11+v15+vec2(0u,u32(v11.x+v15.x<v11.x));v7=((v7 ^ v11)<<ROTATE_1)|((v7 ^ v11).yx>>ROTATE_31);v0=v0+v5+vec2(0u,u32(v0.x+v5.x<v0.x));v0=v0+m0+vec2(0u,u32(v0.x+m0.x<v0.x));v15=(v15 ^ v0).yx;v10=v10+v15+vec2(0u,u32(v10.x+v15.x<v10.x));v5=((v5 ^ v10)>>ROTATE_24)|((v5 ^ v10).yx<<ROTATE_8);v0=v0+v5+vec2(0u,u32(v0.x+v5.x<v0.x));v15=((v15 ^ v0)>>ROTATE_16)|((v15 ^ v0).yx<<ROTATE_16);v10=v10+v15+vec2(0u,u32(v10.x+v15.x<v10.x));v5=((v5 ^ v10)<<ROTATE_1)|((v5 ^ v10).yx>>ROTATE_31);v1=v1+v6+vec2(0u,u32(v1.x+v6.x<v1.x));v12=(v12 ^ v1).yx;v11=v11+v12+vec2(0u,u32(v11.x+v12.x<v11.x));v6=((v6 ^ v11)>>ROTATE_24)|((v6 ^ v11).yx<<ROTATE_8);v1=v1+v6+vec2(0u,u32(v1.x+v6.x<v1.x));v1=v1+m3+vec2(0u,u32(v1.x+m3.x<v1.x));v12=((v12 ^ v1)>>ROTATE_16)|((v12 ^ v1).yx<<ROTATE_16);v11=v11+v12+vec2(0u,u32(v11.x+v12.x<v11.x));v6=((v6 ^ v11)<<ROTATE_1)|((v6 ^ v11).yx>>ROTATE_31);v2=v2+v7+vec2(0u,u32(v2.x+v7.x<v2.x));v13=(v13 ^ v2).yx;v8=v8+v13+vec2(0u,u32(v8.x+v13.x<v8.x));v7=((v7 ^ v8)>>ROTATE_24)|((v7 ^ v8).yx<<ROTATE_8);v2=v2+v7+vec2(0u,u32(v2.x+v7.x<v2.x));v2=v2+m2+vec2(0u,u32(v2.x+m2.x<v2.x));v13=((v13 ^ v2)>>ROTATE_16)|((v13 ^ v2).yx<<ROTATE_16);v8=v8+v13+vec2(0u,u32(v8.x+v13.x<v8.x));v7=((v7 ^ v8)<<ROTATE_1)|((v7 ^ v8).yx>>ROTATE_31);v3=v3+v4+vec2(0u,u32(v3.x+v4.x<v3.x));v14=(v14 ^ v3).yx;v9=v9+v14+vec2(0u,u32(v9.x+v14.x<v9.x));v4=((v4 ^ v9)>>ROTATE_24)|((v4 ^ v9).yx<<ROTATE_8);v3=v3+v4+vec2(0u,u32(v3.x+v4.x<v3.x));v14=((v14 ^ v3)>>ROTATE_16)|((v14 ^ v3).yx<<ROTATE_16);v9=v9+v14+vec2(0u,u32(v9.x+v14.x<v9.x));v4=((v4 ^ v9)<<ROTATE_1)|((v4 ^ v9).yx>>ROTATE_31);v0=v0+v4+vec2(0u,u32(v0.x+v4.x<v0.x));v12=(v12 ^ v0).yx;v8=v8+v12+vec2(0u,u32(v8.x+v12.x<v8.x));v4=((v4 ^ v8)>>ROTATE_24)|((v4 ^ v8).yx<<ROTATE_8);v0=v0+v4+vec2(0u,u32(v0.x+v4.x<v0.x));v12=((v12 ^ v0)>>ROTATE_16)|((v12 ^ v0).yx<<ROTATE_16);v8=v8+v12+vec2(0u,u32(v8.x+v12.x<v8.x));v4=((v4 ^ v8)<<ROTATE_1)|((v4 ^ v8).yx>>ROTATE_31);v1=v1+v5+vec2(0u,u32(v1.x+v5.x<v1.x));v13=(v13 ^ v1).yx;v9=v9+v13+vec2(0u,u32(v9.x+v13.x<v9.x));v5=((v5 ^ v9)>>ROTATE_24)|((v5 ^ v9).yx<<ROTATE_8);v1=v1+v5+vec2(0u,u32(v1.x+v5.x<v1.x));v13=((v13 ^ v1)>>ROTATE_16)|((v13 ^ v1).yx<<ROTATE_16);v9=v9+v13+vec2(0u,u32(v9.x+v13.x<v9.x));v5=((v5 ^ v9)<<ROTATE_1)|((v5 ^ v9).yx>>ROTATE_31);v2=v2+v6+vec2(0u,u32(v2.x+v6.x<v2.x));v14=(v14 ^ v2).yx;v10=v10+v14+vec2(0u,u32(v10.x+v14.x<v10.x));v6=((v6 ^ v10)>>ROTATE_24)|((v6 ^ v10).yx<<ROTATE_8);v2=v2+v6+vec2(0u,u32(v2.x+v6.x<v2.x));v2=v2+m1+vec2(0u,u32(v2.x+m1.x<v2.x));v14=((v14 ^ v2)>>ROTATE_16)|((v14 ^ v2).yx<<ROTATE_16);v10=v10+v14+vec2(0u,u32(v10.x+v14.x<v10.x));v6=((v6 ^ v10)<<ROTATE_1)|((v6 ^ v10).yx>>ROTATE_31);v3=v3+v7+vec2(0u,u32(v3.x+v7.x<v3.x));v3=v3+m3+vec2(0u,u32(v3.x+m3.x<v3.x));v15=(v15 ^ v3).yx;v11=v11+v15+vec2(0u,u32(v11.x+v15.x<v11.x));v7=((v7 ^ v11)>>ROTATE_24)|((v7 ^ v11).yx<<ROTATE_8);v3=v3+v7+vec2(0u,u32(v3.x+v7.x<v3.x));v15=((v15 ^ v3)>>ROTATE_16)|((v15 ^ v3).yx<<ROTATE_16);v11=v11+v15+vec2(0u,u32(v11.x+v15.x<v11.x));v7=((v7 ^ v11)<<ROTATE_1)|((v7 ^ v11).yx>>ROTATE_31);v0=v0+v5+vec2(0u,u32(v0.x+v5.x<v0.x));v15=(v15 ^ v0).yx;v10=v10+v15+vec2(0u,u32(v10.x+v15.x<v10.x));v5=((v5 ^ v10)>>ROTATE_24)|((v5 ^ v10).yx<<ROTATE_8);v0=v0+v5+vec2(0u,u32(v0.x+v5.x<v0.x));v0=v0+m0+vec2(0u,u32(v0.x+m0.x<v0.x));v15=((v15 ^ v0)>>ROTATE_16)|((v15 ^ v0).yx<<ROTATE_16);v10=v10+v15+vec2(0u,u32(v10.x+v15.x<v10.x));v5=((v5 ^ v10)<<ROTATE_1)|((v5 ^ v10).yx>>ROTATE_31);v1=v1+v6+vec2(0u,u32(v1.x+v6.x<v1.x));v12=(v12 ^ v1).yx;v11=v11+v12+vec2(0u,u32(v11.x+v12.x<v11.x));v6=((v6 ^ v11)>>ROTATE_24)|((v6 ^ v11).yx<<ROTATE_8);v1=v1+v6+vec2(0u,u32(v1.x+v6.x<v1.x));v1=v1+m4+vec2(0u,u32(v1.x+m4.x<v1.x));v12=((v12 ^ v1)>>ROTATE_16)|((v12 ^ v1).yx<<ROTATE_16);v11=v11+v12+vec2(0u,u32(v11.x+v12.x<v11.x));v6=((v6 ^ v11)<<ROTATE_1)|((v6 ^ v11).yx>>ROTATE_31);v2=v2+v7+vec2(0u,u32(v2.x+v7.x<v2.x));v13=(v13 ^ v2).yx;v8=v8+v13+vec2(0u,u32(v8.x+v13.x<v8.x));v7=((v7 ^ v8)>>ROTATE_24)|((v7 ^ v8).yx<<ROTATE_8);v2=v2+v7+vec2(0u,u32(v2.x+v7.x<v2.x));v13=((v13 ^ v2)>>ROTATE_16)|((v13 ^ v2).yx<<ROTATE_16);v8=v8+v13+vec2(0u,u32(v8.x+v13.x<v8.x));v7=((v7 ^ v8)<<ROTATE_1)|((v7 ^ v8).yx>>ROTATE_31);v3=v3+v4+vec2(0u,u32(v3.x+v4.x<v3.x));v3=v3+m2+vec2(0u,u32(v3.x+m2.x<v3.x));v14=(v14 ^ v3).yx;v9=v9+v14+vec2(0u,u32(v9.x+v14.x<v9.x));v4=((v4 ^ v9)>>ROTATE_24)|((v4 ^ v9).yx<<ROTATE_8);v3=v3+v4+vec2(0u,u32(v3.x+v4.x<v3.x));v14=((v14 ^ v3)>>ROTATE_16)|((v14 ^ v3).yx<<ROTATE_16);v9=v9+v14+vec2(0u,u32(v9.x+v14.x<v9.x));v4=((v4 ^ v9)<<ROTATE_1)|((v4 ^ v9).yx>>ROTATE_31);v0=v0+v4+vec2(0u,u32(v0.x+v4.x<v0.x));v12=(v12 ^ v0).yx;v8=v8+v12+vec2(0u,u32(v8.x+v12.x<v8.x));v4=((v4 ^ v8)>>ROTATE_24)|((v4 ^ v8).yx<<ROTATE_8);v0=v0+v4+vec2(0u,u32(v0.x+v4.x<v0.x));v12=((v12 ^ v0)>>ROTATE_16)|((v12 ^ v0).yx<<ROTATE_16);v8=v8+v12+vec2(0u,u32(v8.x+v12.x<v8.x));v4=((v4 ^ v8)<<ROTATE_1)|((v4 ^ v8).yx>>ROTATE_31);v1=v1+v5+vec2(0u,u32(v1.x+v5.x<v1.x));v13=(v13 ^ v1).yx;v9=v9+v13+vec2(0u,u32(v9.x+v13.x<v9.x));v5=((v5 ^ v9)>>ROTATE_24)|((v5 ^ v9).yx<<ROTATE_8);v1=v1+v5+vec2(0u,u32(v1.x+v5.x<v1.x));v13=((v13 ^ v1)>>ROTATE_16)|((v13 ^ v1).yx<<ROTATE_16);v9=v9+v13+vec2(0u,u32(v9.x+v13.x<v9.x));v5=((v5 ^ v9)<<ROTATE_1)|((v5 ^ v9).yx>>ROTATE_31);v2=v2+v6+vec2(0u,u32(v2.x+v6.x<v2.x));v14=(v14 ^ v2).yx;v10=v10+v14+vec2(0u,u32(v10.x+v14.x<v10.x));v6=((v6 ^ v10)>>ROTATE_24)|((v6 ^ v10).yx<<ROTATE_8);v2=v2+v6+vec2(0u,u32(v2.x+v6.x<v2.x));v2=v2+m3+vec2(0u,u32(v2.x+m3.x<v2.x));v14=((v14 ^ v2)>>ROTATE_16)|((v14 ^ v2).yx<<ROTATE_16);v10=v10+v14+vec2(0u,u32(v10.x+v14.x<v10.x));v6=((v6 ^ v10)<<ROTATE_1)|((v6 ^ v10).yx>>ROTATE_31);v3=v3+v7+vec2(0u,u32(v3.x+v7.x<v3.x));v3=v3+m0+vec2(0u,u32(v3.x+m0.x<v3.x));v15=(v15 ^ v3).yx;v11=v11+v15+vec2(0u,u32(v11.x+v15.x<v11.x));v7=((v7 ^ v11)>>ROTATE_24)|((v7 ^ v11).yx<<ROTATE_8);v3=v3+v7+vec2(0u,u32(v3.x+v7.x<v3.x));v15=((v15 ^ v3)>>ROTATE_16)|((v15 ^ v3).yx<<ROTATE_16);v11=v11+v15+vec2(0u,u32(v11.x+v15.x<v11.x));v7=((v7 ^ v11)<<ROTATE_1)|((v7 ^ v11).yx>>ROTATE_31);v0=v0+v5+vec2(0u,u32(v0.x+v5.x<v0.x));v15=(v15 ^ v0).yx;v10=v10+v15+vec2(0u,u32(v10.x+v15.x<v10.x));v5=((v5 ^ v10)>>ROTATE_24)|((v5 ^ v10).yx<<ROTATE_8);v0=v0+v5+vec2(0u,u32(v0.x+v5.x<v0.x));v0=v0+m2+vec2(0u,u32(v0.x+m2.x<v0.x));v15=((v15 ^ v0)>>ROTATE_16)|((v15 ^ v0).yx<<ROTATE_16);v10=v10+v15+vec2(0u,u32(v10.x+v15.x<v10.x));v5=((v5 ^ v10)<<ROTATE_1)|((v5 ^ v10).yx>>ROTATE_31);v1=v1+v6+vec2(0u,u32(v1.x+v6.x<v1.x));v12=(v12 ^ v1).yx;v11=v11+v12+vec2(0u,u32(v11.x+v12.x<v11.x));v6=((v6 ^ v11)>>ROTATE_24)|((v6 ^ v11).yx<<ROTATE_8);v1=v1+v6+vec2(0u,u32(v1.x+v6.x<v1.x));v12=((v12 ^ v1)>>ROTATE_16)|((v12 ^ v1).yx<<ROTATE_16);v11=v11+v12+vec2(0u,u32(v11.x+v12.x<v11.x));v6=((v6 ^ v11)<<ROTATE_1)|((v6 ^ v11).yx>>ROTATE_31);v2=v2+v7+vec2(0u,u32(v2.x+v7.x<v2.x));v2=v2+m1+vec2(0u,u32(v2.x+m1.x<v2.x));v13=(v13 ^ v2).yx;v8=v8+v13+vec2(0u,u32(v8.x+v13.x<v8.x));v7=((v7 ^ v8)>>ROTATE_24)|((v7 ^ v8).yx<<ROTATE_8);v2=v2+v7+vec2(0u,u32(v2.x+v7.x<v2.x));v2=v2+m4+vec2(0u,u32(v2.x+m4.x<v2.x));v13=((v13 ^ v2)>>ROTATE_16)|((v13 ^ v2).yx<<ROTATE_16);v8=v8+v13+vec2(0u,u32(v8.x+v13.x<v8.x));v7=((v7 ^ v8)<<ROTATE_1)|((v7 ^ v8).yx>>ROTATE_31);v3=v3+v4+vec2(0u,u32(v3.x+v4.x<v3.x));v14=(v14 ^ v3).yx;v9=v9+v14+vec2(0u,u32(v9.x+v14.x<v9.x));v4=((v4 ^ v9)>>ROTATE_24)|((v4 ^ v9).yx<<ROTATE_8);v3=v3+v4+vec2(0u,u32(v3.x+v4.x<v3.x));v14=((v14 ^ v3)>>ROTATE_16)|((v14 ^ v3).yx<<ROTATE_16);v9=v9+v14+vec2(0u,u32(v9.x+v14.x<v9.x));v4=((v4 ^ v9)<<ROTATE_1)|((v4 ^ v9).yx>>ROTATE_31);v0=v0+v4+vec2(0u,u32(v0.x+v4.x<v0.x));v12=(v12 ^ v0).yx;v8=v8+v12+vec2(0u,u32(v8.x+v12.x<v8.x));v4=((v4 ^ v8)>>ROTATE_24)|((v4 ^ v8).yx<<ROTATE_8);v0=v0+v4+vec2(0u,u32(v0.x+v4.x<v0.x));v0=v0+m2+vec2(0u,u32(v0.x+m2.x<v0.x));v12=((v12 ^ v0)>>ROTATE_16)|((v12 ^ v0).yx<<ROTATE_16);v8=v8+v12+vec2(0u,u32(v8.x+v12.x<v8.x));v4=((v4 ^ v8)<<ROTATE_1)|((v4 ^ v8).yx>>ROTATE_31);v1=v1+v5+vec2(0u,u32(v1.x+v5.x<v1.x));v13=(v13 ^ v1).yx;v9=v9+v13+vec2(0u,u32(v9.x+v13.x<v9.x));v5=((v5 ^ v9)>>ROTATE_24)|((v5 ^ v9).yx<<ROTATE_8);v1=v1+v5+vec2(0u,u32(v1.x+v5.x<v1.x));v1=v1+m4+vec2(0u,u32(v1.x+m4.x<v1.x));v13=((v13 ^ v1)>>ROTATE_16)|((v13 ^ v1).yx<<ROTATE_16);v9=v9+v13+vec2(0u,u32(v9.x+v13.x<v9.x));v5=((v5 ^ v9)<<ROTATE_1)|((v5 ^ v9).yx>>ROTATE_31);v2=v2+v6+vec2(0u,u32(v2.x+v6.x<v2.x));v14=(v14 ^ v2).yx;v10=v10+v14+vec2(0u,u32(v10.x+v14.x<v10.x));v6=((v6 ^ v10)>>ROTATE_24)|((v6 ^ v10).yx<<ROTATE_8);v2=v2+v6+vec2(0u,u32(v2.x+v6.x<v2.x));v14=((v14 ^ v2)>>ROTATE_16)|((v14 ^ v2).yx<<ROTATE_16);v10=v10+v14+vec2(0u,u32(v10.x+v14.x<v10.x));v6=((v6 ^ v10)<<ROTATE_1)|((v6 ^ v10).yx>>ROTATE_31);v3=v3+v7+vec2(0u,u32(v3.x+v7.x<v3.x));v3=v3+m1+vec2(0u,u32(v3.x+m1.x<v3.x));v15=(v15 ^ v3).yx;v11=v11+v15+vec2(0u,u32(v11.x+v15.x<v11.x));v7=((v7 ^ v11)>>ROTATE_24)|((v7 ^ v11).yx<<ROTATE_8);v3=v3+v7+vec2(0u,u32(v3.x+v7.x<v3.x));v15=((v15 ^ v3)>>ROTATE_16)|((v15 ^ v3).yx<<ROTATE_16);v11=v11+v15+vec2(0u,u32(v11.x+v15.x<v11.x));v7=((v7 ^ v11)<<ROTATE_1)|((v7 ^ v11).yx>>ROTATE_31);v0=v0+v5+vec2(0u,u32(v0.x+v5.x<v0.x));v15=(v15 ^ v0).yx;v10=v10+v15+vec2(0u,u32(v10.x+v15.x<v10.x));v5=((v5 ^ v10)>>ROTATE_24)|((v5 ^ v10).yx<<ROTATE_8);v0=v0+v5+vec2(0u,u32(v0.x+v5.x<v0.x));v15=((v15 ^ v0)>>ROTATE_16)|((v15 ^ v0).yx<<ROTATE_16);v10=v10+v15+vec2(0u,u32(v10.x+v15.x<v10.x));v5=((v5 ^ v10)<<ROTATE_1)|((v5 ^ v10).yx>>ROTATE_31);v1=v1+v6+vec2(0u,u32(v1.x+v6.x<v1.x));v12=(v12 ^ v1).yx;v11=v11+v12+vec2(0u,u32(v11.x+v12.x<v11.x));v6=((v6 ^ v11)>>ROTATE_24)|((v6 ^ v11).yx<<ROTATE_8);v1=v1+v6+vec2(0u,u32(v1.x+v6.x<v1.x));v12=((v12 ^ v1)>>ROTATE_16)|((v12 ^ v1).yx<<ROTATE_16);v11=v11+v12+vec2(0u,u32(v11.x+v12.x<v11.x));v6=((v6 ^ v11)<<ROTATE_1)|((v6 ^ v11).yx>>ROTATE_31);v2=v2+v7+vec2(0u,u32(v2.x+v7.x<v2.x));v2=v2+m3+vec2(0u,u32(v2.x+m3.x<v2.x));v13=(v13 ^ v2).yx;v8=v8+v13+vec2(0u,u32(v8.x+v13.x<v8.x));v7=((v7 ^ v8)>>ROTATE_24)|((v7 ^ v8).yx<<ROTATE_8);v2=v2+v7+vec2(0u,u32(v2.x+v7.x<v2.x));v13=((v13 ^ v2)>>ROTATE_16)|((v13 ^ v2).yx<<ROTATE_16);v8=v8+v13+vec2(0u,u32(v8.x+v13.x<v8.x));v7=((v7 ^ v8)<<ROTATE_1)|((v7 ^ v8).yx>>ROTATE_31);v3=v3+v4+vec2(0u,u32(v3.x+v4.x<v3.x));v14=(v14 ^ v3).yx;v9=v9+v14+vec2(0u,u32(v9.x+v14.x<v9.x));v4=((v4 ^ v9)>>ROTATE_24)|((v4 ^ v9).yx<<ROTATE_8);v3=v3+v4+vec2(0u,u32(v3.x+v4.x<v3.x));v3=v3+m0+vec2(0u,u32(v3.x+m0.x<v3.x));v14=((v14 ^ v3)>>ROTATE_16)|((v14 ^ v3).yx<<ROTATE_16);v9=v9+v14+vec2(0u,u32(v9.x+v14.x<v9.x));v4=((v4 ^ v9)<<ROTATE_1)|((v4 ^ v9).yx>>ROTATE_31);v0=v0+v4+vec2(0u,u32(v0.x+v4.x<v0.x));v0=v0+m0+vec2(0u,u32(v0.x+m0.x<v0.x));v12=(v12 ^ v0).yx;v8=v8+v12+vec2(0u,u32(v8.x+v12.x<v8.x));v4=((v4 ^ v8)>>ROTATE_24)|((v4 ^ v8).yx<<ROTATE_8);v0=v0+v4+vec2(0u,u32(v0.x+v4.x<v0.x));v0=v0+m1+vec2(0u,u32(v0.x+m1.x<v0.x));v12=((v12 ^ v0)>>ROTATE_16)|((v12 ^ v0).yx<<ROTATE_16);v8=v8+v12+vec2(0u,u32(v8.x+v12.x<v8.x));v4=((v4 ^ v8)<<ROTATE_1)|((v4 ^ v8).yx>>ROTATE_31);v1=v1+v5+vec2(0u,u32(v1.x+v5.x<v1.x));v1=v1+m2+vec2(0u,u32(v1.x+m2.x<v1.x));v13=(v13 ^ v1).yx;v9=v9+v13+vec2(0u,u32(v9.x+v13.x<v9.x));v5=((v5 ^ v9)>>ROTATE_24)|((v5 ^ v9).yx<<ROTATE_8);v1=v1+v5+vec2(0u,u32(v1.x+v5.x<v1.x));v1=v1+m3+vec2(0u,u32(v1.x+m3.x<v1.x));v13=((v13 ^ v1)>>ROTATE_16)|((v13 ^ v1).yx<<ROTATE_16);v9=v9+v13+vec2(0u,u32(v9.x+v13.x<v9.x));v5=((v5 ^ v9)<<ROTATE_1)|((v5 ^ v9).yx>>ROTATE_31);v2=v2+v6+vec2(0u,u32(v2.x+v6.x<v2.x));v2=v2+m4+vec2(0u,u32(v2.x+m4.x<v2.x));v14=(v14 ^ v2).yx;v10=v10+v14+vec2(0u,u32(v10.x+v14.x<v10.x));v6=((v6 ^ v10)>>ROTATE_24)|((v6 ^ v10).yx<<ROTATE_8);v2=v2+v6+vec2(0u,u32(v2.x+v6.x<v2.x));v14=((v14 ^ v2)>>ROTATE_16)|((v14 ^ v2).yx<<ROTATE_16);v10=v10+v14+vec2(0u,u32(v10.x+v14.x<v10.x));v6=((v6 ^ v10)<<ROTATE_1)|((v6 ^ v10).yx>>ROTATE_31);v3=v3+v7+vec2(0u,u32(v3.x+v7.x<v3.x));v15=(v15 ^ v3).yx;v11=v11+v15+vec2(0u,u32(v11.x+v15.x<v11.x));v7=((v7 ^ v11)>>ROTATE_24)|((v7 ^ v11).yx<<ROTATE_8);v3=v3+v7+vec2(0u,u32(v3.x+v7.x<v3.x));v15=((v15 ^ v3)>>ROTATE_16)|((v15 ^ v3).yx<<ROTATE_16);v11=v11+v15+vec2(0u,u32(v11.x+v15.x<v11.x));v7=((v7 ^ v11)<<ROTATE_1)|((v7 ^ v11).yx>>ROTATE_31);v0=v0+v5+vec2(0u,u32(v0.x+v5.x<v0.x));v15=(v15 ^ v0).yx;v10=v10+v15+vec2(0u,u32(v10.x+v15.x<v10.x));v5=((v5 ^ v10)>>ROTATE_24)|((v5 ^ v10).yx<<ROTATE_8);v0=v0+v5+vec2(0u,u32(v0.x+v5.x<v0.x));v15=((v15 ^ v0)>>ROTATE_16)|((v15 ^ v0).yx<<ROTATE_16);v10=v10+v15+vec2(0u,u32(v10.x+v15.x<v10.x));v5=((v5 ^ v10)<<ROTATE_1)|((v5 ^ v10).yx>>ROTATE_31);v1=v1+v6+vec2(0u,u32(v1.x+v6.x<v1.x));v12=(v12 ^ v1).yx;v11=v11+v12+vec2(0u,u32(v11.x+v12.x<v11.x));v6=((v6 ^ v11)>>ROTATE_24)|((v6 ^ v11).yx<<ROTATE_8);v1=v1+v6+vec2(0u,u32(v1.x+v6.x<v1.x));v12=((v12 ^ v1)>>ROTATE_16)|((v12 ^ v1).yx<<ROTATE_16);v11=v11+v12+vec2(0u,u32(v11.x+v12.x<v11.x));v6=((v6 ^ v11)<<ROTATE_1)|((v6 ^ v11).yx>>ROTATE_31);v2=v2+v7+vec2(0u,u32(v2.x+v7.x<v2.x));v13=(v13 ^ v2).yx;v8=v8+v13+vec2(0u,u32(v8.x+v13.x<v8.x));v7=((v7 ^ v8)>>ROTATE_24)|((v7 ^ v8).yx<<ROTATE_8);v2=v2+v7+vec2(0u,u32(v2.x+v7.x<v2.x));v13=((v13 ^ v2)>>ROTATE_16)|((v13 ^ v2).yx<<ROTATE_16);v8=v8+v13+vec2(0u,u32(v8.x+v13.x<v8.x));v7=((v7 ^ v8)<<ROTATE_1)|((v7 ^ v8).yx>>ROTATE_31);v3=v3+v4+vec2(0u,u32(v3.x+v4.x<v3.x));v14=(v14 ^ v3).yx;v9=v9+v14+vec2(0u,u32(v9.x+v14.x<v9.x));v4=((v4 ^ v9)>>ROTATE_24)|((v4 ^ v9).yx<<ROTATE_8);v3=v3+v4+vec2(0u,u32(v3.x+v4.x<v3.x));v14=((v14 ^ v3)>>ROTATE_16)|((v14 ^ v3).yx<<ROTATE_16);v9=v9+v14+vec2(0u,u32(v9.x+v14.x<v9.x));v4=((v4 ^ v9)<<ROTATE_1)|((v4 ^ v9).yx>>ROTATE_31);v0=v0+v4+vec2(0u,u32(v0.x+v4.x<v0.x));v12=(v12 ^ v0).yx;v8=v8+v12+vec2(0u,u32(v8.x+v12.x<v8.x));v4=((v4 ^ v8)>>ROTATE_24)|((v4 ^ v8).yx<<ROTATE_8);v0=v0+v4+vec2(0u,u32(v0.x+v4.x<v0.x));v12=((v12 ^ v0)>>ROTATE_16)|((v12 ^ v0).yx<<ROTATE_16);v8=v8+v12+vec2(0u,u32(v8.x+v12.x<v8.x));v1=v1+v5+vec2(0u,u32(v1.x+v5.x<v1.x));v1=v1+m4+vec2(0u,u32(v1.x+m4.x<v1.x));v13=(v13 ^ v1).yx;v9=v9+v13+vec2(0u,u32(v9.x+v13.x<v9.x));v5=((v5 ^ v9)>>ROTATE_24)|((v5 ^ v9).yx<<ROTATE_8);v1=v1+v5+vec2(0u,u32(v1.x+v5.x<v1.x));v13=((v13 ^ v1)>>ROTATE_16)|((v13 ^ v1).yx<<ROTATE_16);v9=v9+v13+vec2(0u,u32(v9.x+v13.x<v9.x));v5=((v5 ^ v9)<<ROTATE_1)|((v5 ^ v9).yx>>ROTATE_31);v2=v2+v6+vec2(0u,u32(v2.x+v6.x<v2.x));v14=(v14 ^ v2).yx;v10=v10+v14+vec2(0u,u32(v10.x+v14.x<v10.x));v6=((v6 ^ v10)>>ROTATE_24)|((v6 ^ v10).yx<<ROTATE_8);v2=v2+v6+vec2(0u,u32(v2.x+v6.x<v2.x));v14=((v14 ^ v2)>>ROTATE_16)|((v14 ^ v2).yx<<ROTATE_16);v10=v10+v14+vec2(0u,u32(v10.x+v14.x<v10.x));v3=v3+v7+vec2(0u,u32(v3.x+v7.x<v3.x));v15=(v15 ^ v3).yx;v11=v11+v15+vec2(0u,u32(v11.x+v15.x<v11.x));v7=((v7 ^ v11)>>ROTATE_24)|((v7 ^ v11).yx<<ROTATE_8);v3=v3+v7+vec2(0u,u32(v3.x+v7.x<v3.x));v15=((v15 ^ v3)>>ROTATE_16)|((v15 ^ v3).yx<<ROTATE_16);v11=v11+v15+vec2(0u,u32(v11.x+v15.x<v11.x));v7=((v7 ^ v11)<<ROTATE_1)|((v7 ^ v11).yx>>ROTATE_31);v0=v0+v5+vec2(0u,u32(v0.x+v5.x<v0.x));v0=v0+m1+vec2(0u,u32(v0.x+m1.x<v0.x));v15=(v15 ^ v0).yx;v10=v10+v15+vec2(0u,u32(v10.x+v15.x<v10.x));v5=((v5 ^ v10)>>ROTATE_24)|((v5 ^ v10).yx<<ROTATE_8);v0=v0+v5+vec2(0u,u32(v0.x+v5.x<v0.x));v2=v2+v7+vec2(0u,u32(v2.x+v7.x<v2.x));v13=(v13 ^ v2).yx;v8=v8+v13+vec2(0u,u32(v8.x+v13.x<v8.x));v7=((v7 ^ v8)>>ROTATE_24)|((v7 ^ v8).yx<<ROTATE_8);v2=v2+v7+vec2(0u,u32(v2.x+v7.x<v2.x));v13=((v13 ^ v2)>>ROTATE_16)|((v13 ^ v2).yx<<ROTATE_16);v8=v8+v13+vec2(0u,u32(v8.x+v13.x<v8.x));if((BLAKE2B_IV32_0.y ^ v0.y ^ v8.y)>threshold){let wasFound:u32=atomicExchange(&work.found,1u);if(wasFound==0u){work.nonce=m0;}}return;}";
-
-// src/shaders/gl-fragment.ts
-var NanoPowGlFragmentShader = `#version 300 es
-#pragma vscode_glsllint_stage: frag
-precision highp float;
-
-in vec2 uv_pos;
-out uvec4 nonce;
-
-// blockhash - array of precalculated block hash components
-// threshold - 0xfffffff8 for send/change blocks, 0xfffffe00 for all else
-// workload - Defines canvas size
-layout(std140) uniform UBO {
- uint blockhash[8];
- uint threshold;
- float workload;
-};
-
-// Random work values
-layout(std140) uniform WORK {
- uvec2 work;
-};
-
-// Defined separately from uint v[32] below as the original value is required
-// to calculate the second uint32 of the digest for threshold comparison
-const uint BLAKE2B_IV32_1 = 0x6A09E667u;
-
-// Used during G for vector bit rotations
-const uvec2 ROTATE_1 = uvec2(1u, 1u);
-const uvec2 ROTATE_8 = uvec2(8u, 8u);
-const uvec2 ROTATE_16 = uvec2(16u, 16u);
-const uvec2 ROTATE_24 = uvec2(24u, 24u);
-const uvec2 ROTATE_31 = uvec2(31u, 31u);
-
-// Both buffers represent 16 uint64s as 32 uint32s
-// because that's what GLSL offers, just like Javascript
-
-// Compression buffer, intialized to 2 instances of the initialization vector
-// The following values have been modified from the BLAKE2B_IV:
-// OUTLEN is constant 8 bytes
-// v[0] ^= 0x01010000u ^ uint(OUTLEN);
-// INLEN is constant 40 bytes: work value (8) + block hash (32)
-// v[24] ^= uint(INLEN);
-// It's always the "last" compression at this INLEN
-// v[28] = ~v[28];
-// v[29] = ~v[29];
-uvec2 v[16] = uvec2[16](
- uvec2(0xF2BDC900u, 0x6A09E667u),
- uvec2(0x84CAA73Bu, 0xBB67AE85u),
- uvec2(0xFE94F82Bu, 0x3C6EF372u),
- uvec2(0x5F1D36F1u, 0xA54FF53Au),
- uvec2(0xADE682D1u, 0x510E527Fu),
- uvec2(0x2B3E6C1Fu, 0x9B05688Cu),
- uvec2(0xFB41BD6Bu, 0x1F83D9ABu),
- uvec2(0x137E2179u, 0x5BE0CD19u),
- uvec2(0xF3BCC908u, 0x6A09E667u),
- uvec2(0x84CAA73Bu, 0xBB67AE85u),
- uvec2(0xFE94F82Bu, 0x3C6EF372u),
- uvec2(0x5F1D36F1u, 0xA54FF53Au),
- uvec2(0xADE682F9u, 0x510E527Fu),
- uvec2(0x2B3E6C1Fu, 0x9B05688Cu),
- uvec2(0x04BE4294u, 0xE07C2654u),
- uvec2(0x137E2179u, 0x5BE0CD19u)
-);
-
-// Input data buffer
-uvec2 m[16];
-
-// Offsets into the input data buffer for each mixing step
-const uint SIGMA[192] = uint[192](
- 0u,1u,2u,3u,4u,5u,6u,7u,8u,9u,10u,11u,12u,13u,14u,15u,
- 14u,10u,4u,8u,9u,15u,13u,6u,1u,12u,0u,2u,11u,7u,5u,3u,
- 11u,8u,12u,0u,5u,2u,15u,13u,10u,14u,3u,6u,7u,1u,9u,4u,
- 7u,9u,3u,1u,13u,12u,11u,14u,2u,6u,5u,10u,4u,0u,15u,8u,
- 9u,0u,5u,7u,2u,4u,10u,15u,14u,1u,11u,12u,6u,8u,3u,13u,
- 2u,12u,6u,10u,0u,11u,8u,3u,4u,13u,7u,5u,15u,14u,1u,9u,
- 12u,5u,1u,15u,14u,13u,4u,10u,0u,7u,6u,3u,9u,2u,8u,11u,
- 13u,11u,7u,14u,12u,1u,3u,9u,5u,0u,15u,4u,8u,6u,2u,10u,
- 6u,15u,14u,9u,11u,3u,0u,8u,12u,2u,13u,7u,1u,4u,10u,5u,
- 10u,2u,8u,4u,7u,6u,1u,5u,15u,11u,9u,14u,3u,12u,13u,0u,
- 0u,1u,2u,3u,4u,5u,6u,7u,8u,9u,10u,11u,12u,13u,14u,15u,
- 14u,10u,4u,8u,9u,15u,13u,6u,1u,12u,0u,2u,11u,7u,5u,3u
-);
-
-// G mixing function
-void G (uint a, uint b, uint c, uint d, uint x, uint y) {
- v[a] = v[a] + v[b] + uvec2(0u, uint(v[a].x + v[b].x < v[b].x));
- v[a] = v[a] + m[x] + uvec2(0u, uint(v[a].x + m[x].x < m[x].x));
- v[d] = (v[d] ^ v[a]).yx;
- v[c] = v[c] + v[d] + uvec2(0u, uint(v[c].x + v[d].x < v[d].x));
- v[b] = ((v[b] ^ v[c]) >> ROTATE_24) | ((v[b] ^ v[c]).yx << ROTATE_8);
- v[a] = v[a] + v[b] + uvec2(0u, uint(v[a].x + v[b].x < v[b].x));
- v[a] = v[a] + m[y] + uvec2(0u, uint(v[a].x + m[y].x < m[y].x));
- v[d] = ((v[d] ^ v[a]) >> ROTATE_16) | ((v[d] ^ v[a]).yx << ROTATE_16);
- v[c] = v[c] + v[d] + uvec2(0u, uint(v[c].x + v[d].x < v[d].x));
- v[b] = ((v[b] ^ v[c]).yx >> ROTATE_31) | ((v[b] ^ v[c]) << ROTATE_1);
-}
-
-void main() {
- // Nonce uniquely differentiated by pixel location
- m[0u].x = work.x ^ uint(uv_pos.x * workload);
- m[0u].y = work.y ^ uint(uv_pos.y * workload);
-
- // Block hash
- m[1u] = uvec2(blockhash[0u], blockhash[1u]);
- m[2u] = uvec2(blockhash[2u], blockhash[3u]);
- m[3u] = uvec2(blockhash[4u], blockhash[5u]);
- m[4u] = uvec2(blockhash[6u], blockhash[7u]);
-
- // twelve rounds of mixing
- for(uint i = 0u; i < 12u; i = i + 1u) {
- G(0u, 4u, 8u, 12u, SIGMA[i * 16u + 0u], SIGMA[i * 16u + 1u]);
- G(1u, 5u, 9u, 13u, SIGMA[i * 16u + 2u], SIGMA[i * 16u + 3u]);
- G(2u, 6u, 10u, 14u, SIGMA[i * 16u + 4u], SIGMA[i * 16u + 5u]);
- G(3u, 7u, 11u, 15u, SIGMA[i * 16u + 6u], SIGMA[i * 16u + 7u]);
- G(0u, 5u, 10u, 15u, SIGMA[i * 16u + 8u], SIGMA[i * 16u + 9u]);
- G(1u, 6u, 11u, 12u, SIGMA[i * 16u + 10u], SIGMA[i * 16u + 11u]);
- G(2u, 7u, 8u, 13u, SIGMA[i * 16u + 12u], SIGMA[i * 16u + 13u]);
- G(3u, 4u, 9u, 14u, SIGMA[i * 16u + 14u], SIGMA[i * 16u + 15u]);
- }
-
- // Pixel data set from work values
- // Finalize digest from high bits, low bits can be safely ignored
- if ((BLAKE2B_IV32_1 ^ v[0u].y ^ v[8u].y) > threshold) {
- nonce = uvec4(1u, m[0].y, m[0].x, 1u);
- } else {
- discard;
- }
-}
-`;
-
-// src/shaders/gl-vertex.ts
-var NanoPowGlVertexShader = `#version 300 es
-#pragma vscode_glsllint_stage: vert
-precision highp float;
-layout (location=0) in vec4 position;
-layout (location=1) in vec2 uv;
-
-out vec2 uv_pos;
-
-void main() {
- uv_pos = uv;
- gl_Position = position;
-}
-`;
-
-// src/classes/gl.ts
-var NanoPowGl = class _NanoPowGl {
- static #busy = false;
- /** Used to set canvas size. Must be a multiple of 256. */
- static #WORKLOAD = 256 * Math.max(1, Math.floor(navigator.hardwareConcurrency));
- static #gl;
- static #program;
- static #vertexShader;
- static #fragmentShader;
- static #texture;
- static #framebuffer;
- static #positionBuffer;
- static #uvBuffer;
- static #uboBuffer;
- static #workBuffer;
- static #query;
- static #pixels;
- /**Vertex Positions, 2 triangles */
- static #positions = new Float32Array([
- -1,
- -1,
- 0,
- -1,
- 1,
- 0,
- 1,
- 1,
- 0,
- 1,
- -1,
- 0,
- 1,
- 1,
- 0,
- -1,
- -1,
- 0
- ]);
- /** Texture Positions */
- static #uvPosArray = new Float32Array([
- 1,
- 1,
- 1,
- 0,
- 0,
- 0,
- 0,
- 1,
- 0,
- 0,
- 1,
- 1
- ]);
- /** Compile */
- static async init() {
- if (this.#busy) return;
- this.#busy = true;
- try {
- this.#gl = new OffscreenCanvas(this.#WORKLOAD, this.#WORKLOAD).getContext("webgl2");
- if (this.#gl == null) throw new Error("WebGL 2 is required");
- this.#gl.clearColor(0, 0, 0, 1);
- this.#program = this.#gl.createProgram();
- if (this.#program == null) throw new Error("Failed to create shader program");
- this.#vertexShader = this.#gl.createShader(this.#gl.VERTEX_SHADER);
- if (this.#vertexShader == null) throw new Error("Failed to create vertex shader");
- this.#gl.shaderSource(this.#vertexShader, NanoPowGlVertexShader);
- this.#gl.compileShader(this.#vertexShader);
- if (!this.#gl.getShaderParameter(this.#vertexShader, this.#gl.COMPILE_STATUS))
- throw new Error(this.#gl.getShaderInfoLog(this.#vertexShader) ?? `Failed to compile vertex shader`);
- this.#fragmentShader = this.#gl.createShader(this.#gl.FRAGMENT_SHADER);
- if (this.#fragmentShader == null) throw new Error("Failed to create fragment shader");
- this.#gl.shaderSource(this.#fragmentShader, NanoPowGlFragmentShader);
- this.#gl.compileShader(this.#fragmentShader);
- if (!this.#gl.getShaderParameter(this.#fragmentShader, this.#gl.COMPILE_STATUS))
- throw new Error(this.#gl.getShaderInfoLog(this.#fragmentShader) ?? `Failed to compile fragment shader`);
- this.#gl.attachShader(this.#program, this.#vertexShader);
- this.#gl.attachShader(this.#program, this.#fragmentShader);
- this.#gl.linkProgram(this.#program);
- if (!this.#gl.getProgramParameter(this.#program, this.#gl.LINK_STATUS))
- throw new Error(this.#gl.getProgramInfoLog(this.#program) ?? `Failed to link program`);
- this.#gl.useProgram(this.#program);
- const triangleArray = this.#gl.createVertexArray();
- this.#gl.bindVertexArray(triangleArray);
- this.#texture = this.#gl.createTexture();
- this.#gl.bindTexture(this.#gl.TEXTURE_2D, this.#texture);
- this.#gl.texImage2D(this.#gl.TEXTURE_2D, 0, this.#gl.RGBA32UI, this.#gl.drawingBufferWidth, this.#gl.drawingBufferHeight, 0, this.#gl.RGBA_INTEGER, this.#gl.UNSIGNED_INT, null);
- this.#gl.texParameteri(this.#gl.TEXTURE_2D, this.#gl.TEXTURE_MIN_FILTER, this.#gl.NEAREST);
- this.#gl.texParameteri(this.#gl.TEXTURE_2D, this.#gl.TEXTURE_MAG_FILTER, this.#gl.NEAREST);
- this.#gl.bindTexture(this.#gl.TEXTURE_2D, null);
- this.#framebuffer = this.#gl.createFramebuffer();
- this.#gl.bindFramebuffer(this.#gl.FRAMEBUFFER, this.#framebuffer);
- this.#gl.framebufferTexture2D(this.#gl.FRAMEBUFFER, this.#gl.COLOR_ATTACHMENT0, this.#gl.TEXTURE_2D, this.#texture, 0);
- if (this.#gl.checkFramebufferStatus(this.#gl.FRAMEBUFFER) !== this.#gl.FRAMEBUFFER_COMPLETE)
- throw new Error(`Failed to create framebuffer`);
- this.#gl.bindFramebuffer(this.#gl.FRAMEBUFFER, null);
- this.#positionBuffer = this.#gl.createBuffer();
- this.#gl.bindBuffer(this.#gl.ARRAY_BUFFER, this.#positionBuffer);
- this.#gl.bufferData(this.#gl.ARRAY_BUFFER, this.#positions, this.#gl.STATIC_DRAW);
- this.#gl.vertexAttribPointer(0, 3, this.#gl.FLOAT, false, 0, 0);
- this.#gl.enableVertexAttribArray(0);
- this.#gl.bindBuffer(this.#gl.ARRAY_BUFFER, null);
- this.#uvBuffer = this.#gl.createBuffer();
- this.#gl.bindBuffer(this.#gl.ARRAY_BUFFER, this.#uvBuffer);
- this.#gl.bufferData(this.#gl.ARRAY_BUFFER, this.#uvPosArray, this.#gl.STATIC_DRAW);
- this.#gl.vertexAttribPointer(1, 2, this.#gl.FLOAT, false, 0, 0);
- this.#gl.enableVertexAttribArray(1);
- this.#gl.bindBuffer(this.#gl.ARRAY_BUFFER, null);
- this.#uboBuffer = this.#gl.createBuffer();
- this.#gl.bindBuffer(this.#gl.UNIFORM_BUFFER, this.#uboBuffer);
- this.#gl.bufferData(this.#gl.UNIFORM_BUFFER, 144, this.#gl.DYNAMIC_DRAW);
- this.#gl.bindBuffer(this.#gl.UNIFORM_BUFFER, null);
- this.#gl.bindBufferBase(this.#gl.UNIFORM_BUFFER, 0, this.#uboBuffer);
- this.#gl.uniformBlockBinding(this.#program, this.#gl.getUniformBlockIndex(this.#program, "UBO"), 0);
- this.#gl.bindBuffer(this.#gl.UNIFORM_BUFFER, null);
- this.#workBuffer = this.#gl.createBuffer();
- this.#gl.bindBuffer(this.#gl.UNIFORM_BUFFER, this.#workBuffer);
- this.#gl.bufferData(this.#gl.UNIFORM_BUFFER, 32, this.#gl.STREAM_DRAW);
- this.#gl.bindBuffer(this.#gl.UNIFORM_BUFFER, null);
- this.#gl.bindBufferBase(this.#gl.UNIFORM_BUFFER, 1, this.#workBuffer);
- this.#gl.uniformBlockBinding(this.#program, this.#gl.getUniformBlockIndex(this.#program, "WORK"), 1);
- this.#gl.bindBuffer(this.#gl.UNIFORM_BUFFER, null);
- this.#pixels = new Uint32Array(this.#gl.drawingBufferWidth * this.#gl.drawingBufferHeight * 4);
- this.#query = this.#gl.createQuery();
- } catch (err) {
- throw new Error(`WebGL initialization failed. ${err}`);
- } finally {
- this.#busy = false;
- }
- }
- static reset() {
- _NanoPowGl.#query = null;
- _NanoPowGl.#workBuffer = null;
- _NanoPowGl.#uboBuffer = null;
- _NanoPowGl.#uvBuffer = null;
- _NanoPowGl.#positionBuffer = null;
- _NanoPowGl.#framebuffer = null;
- _NanoPowGl.#texture = null;
- _NanoPowGl.#fragmentShader = null;
- _NanoPowGl.#vertexShader = null;
- _NanoPowGl.#program = null;
- _NanoPowGl.#gl = null;
- _NanoPowGl.#busy = false;
- _NanoPowGl.init();
- }
- static #logAverages(times) {
- let count = times.length, sum = 0, reciprocals = 0, logarithms = 0, truncated = 0, min = 65535, max = 0, rate = 0;
- times.sort();
- for (let i = 0; i < count; i++) {
- sum += times[i];
- reciprocals += 1 / times[i];
- logarithms += Math.log(times[i]);
- min = Math.min(min, times[i]);
- max = Math.max(max, times[i]);
- if (count < 3 || i > count * 0.1 && i < count * 0.9) truncated += times[i];
- }
- const averages = {
- "Count (frames)": count,
- "Total (ms)": sum,
- "Rate (f/s)": 1e3 * count * 0.8 / (truncated || sum),
- "Minimum (ms)": min,
- "Maximum (ms)": max,
- "Arithmetic Mean (ms)": sum / count,
- "Truncated Mean (ms)": truncated / count,
- "Harmonic Mean (ms)": count / reciprocals,
- "Geometric Mean (ms)": Math.exp(logarithms / count)
- };
- console.table(averages);
- }
- static #draw(work) {
- if (this.#gl == null || this.#query == null) throw new Error("WebGL 2 is required to draw and query pixels");
- if (this.#workBuffer == null) throw new Error("Work buffer is required to draw");
- this.#gl.clear(this.#gl.COLOR_BUFFER_BIT);
- this.#gl.bindBuffer(this.#gl.UNIFORM_BUFFER, this.#workBuffer);
- this.#gl.bufferSubData(this.#gl.UNIFORM_BUFFER, 0, work);
- this.#gl.bindBuffer(this.#gl.UNIFORM_BUFFER, null);
- this.#gl.beginQuery(this.#gl.ANY_SAMPLES_PASSED_CONSERVATIVE, this.#query);
- this.#gl.bindFramebuffer(this.#gl.FRAMEBUFFER, this.#framebuffer);
- this.#gl.drawArrays(this.#gl.TRIANGLES, 0, 6);
- this.#gl.bindFramebuffer(this.#gl.FRAMEBUFFER, null);
- this.#gl.endQuery(this.#gl.ANY_SAMPLES_PASSED_CONSERVATIVE);
- }
- static async #checkQueryResult() {
- return new Promise((resolve, reject) => {
- try {
- if (this.#gl == null || this.#query == null) throw new Error("WebGL 2 is required to check query results");
- if (this.#gl.getQueryParameter(this.#query, this.#gl.QUERY_RESULT_AVAILABLE)) {
- resolve(!!this.#gl.getQueryParameter(this.#query, this.#gl.QUERY_RESULT));
- } else {
- requestAnimationFrame(async () => {
- const result = await _NanoPowGl.#checkQueryResult();
- resolve(result);
- });
- }
- } catch (err) {
- reject(err);
- }
- });
- }
- /**
- * Reads pixels into the work buffer, checks every 4th pixel for the 'found'
- * byte, converts the subsequent 3 pixels with the nonce byte values to a hex
- * string, and returns the result.
- *
- * @param workHex - Original nonce if provided for a validation call
- * @returns Nonce as an 8-byte (16-char) hexadecimal string
- */
- static #readResult(workHex) {
- if (this.#gl == null) throw new Error("WebGL 2 is required to read pixels");
- this.#gl.bindFramebuffer(this.#gl.FRAMEBUFFER, this.#framebuffer);
- this.#gl.readPixels(0, 0, this.#gl.drawingBufferWidth, this.#gl.drawingBufferHeight, this.#gl.RGBA_INTEGER, this.#gl.UNSIGNED_INT, this.#pixels);
- this.#gl.bindFramebuffer(this.#gl.FRAMEBUFFER, null);
- for (let i = 0; i < this.#pixels.length; i += 4) {
- if (this.#pixels[i] !== 0) {
- const hex = `${this.#pixels[i + 1].toString(16).padStart(8, "0")}${this.#pixels[i + 2].toString(16).padStart(8, "0")}`;
- if (workHex == null || workHex == hex) return hex;
- }
- }
- throw new Error("Query reported result but nonce value not found");
- }
- /**
- * Finds a nonce that satisfies the Nano proof-of-work requirements.
- *
- * @param {string} hash - Hexadecimal hash of previous block, or public key for new accounts
- * @param {number} [threshold=0xfffffff8] - Difficulty of proof-of-work calculation
- */
- static async search(hash, options) {
- if (_NanoPowGl.#gl == null) throw new Error("WebGL 2 is required");
- if (this.#gl == null) throw new Error("WebGL 2 is required");
- if (!/^[A-Fa-f0-9]{64}$/.test(hash)) throw new Error(`Invalid hash ${hash}`);
- if (this.#busy) {
- return new Promise((resolve) => {
- setTimeout(async () => {
- const result = this.search(hash, options);
- resolve(result);
- }, 100);
- });
- }
- this.#busy = true;
- const threshold = typeof options?.threshold !== "number" || options.threshold < 0 || options.threshold > 4294967295 ? 4294967288 : options.threshold;
- const effort = typeof options?.effort !== "number" || options.effort < 1 || options.effort > 32 ? 8 : options.effort;
- const debug = !!options?.debug;
- if (this.#WORKLOAD !== 256 * effort) {
- this.#WORKLOAD = 256 * effort;
- this.reset();
- }
- const uboView = new DataView(new ArrayBuffer(144));
- for (let i = 0; i < 64; i += 8) {
- const uint32 = hash.slice(i, i + 8);
- uboView.setUint32(i * 2, parseInt(uint32, 16));
- }
- uboView.setUint32(128, threshold, true);
- uboView.setFloat32(132, 256 * effort, true);
- _NanoPowGl.#gl.bindBuffer(_NanoPowGl.#gl.UNIFORM_BUFFER, _NanoPowGl.#uboBuffer);
- _NanoPowGl.#gl.bufferSubData(_NanoPowGl.#gl.UNIFORM_BUFFER, 0, uboView);
- _NanoPowGl.#gl.bindBuffer(_NanoPowGl.#gl.UNIFORM_BUFFER, null);
- let times = [];
- let start = performance.now();
- let nonce = null;
- const seed = new Uint8Array(8);
- while (nonce == null) {
- start = performance.now();
- crypto.getRandomValues(seed);
- this.#draw(seed);
- const found = await this.#checkQueryResult();
- times.push(performance.now() - start);
- if (found) {
- nonce = this.#readResult();
- }
- }
- this.#busy = false;
- if (debug) this.#logAverages(times);
- return nonce;
- }
- /**
- * Validates that a nonce satisfies Nano proof-of-work requirements.
- *
- * @param {string} work - Hexadecimal proof-of-work value to validate
- * @param {string} hash - Hexadecimal hash of previous block, or public key for new accounts
- * @param {number} [threshold=0xfffffff8] - Difficulty of proof-of-work calculation
- */
- static async validate(work, hash, options) {
- if (_NanoPowGl.#gl == null) throw new Error("WebGL 2 is required");
- if (this.#gl == null) throw new Error("WebGL 2 is required");
- if (!/^[A-Fa-f0-9]{16}$/.test(work)) throw new Error(`Invalid work ${work}`);
- if (!/^[A-Fa-f0-9]{64}$/.test(hash)) throw new Error(`Invalid hash ${hash}`);
- if (this.#busy) {
- return new Promise((resolve) => {
- setTimeout(async () => {
- const result = this.validate(work, hash, options);
- resolve(result);
- }, 100);
- });
- }
- this.#busy = true;
- const threshold = typeof options?.threshold !== "number" || options.threshold < 0 || options.threshold > 4294967295 ? 4294967288 : options.threshold;
- const debug = !!options?.debug;
- if (this.#WORKLOAD !== 1) {
- this.#WORKLOAD = 1;
- this.reset();
- }
- const uboView = new DataView(new ArrayBuffer(144));
- for (let i = 0; i < 64; i += 8) {
- const uint32 = hash.slice(i, i + 8);
- uboView.setUint32(i * 2, parseInt(uint32, 16));
- }
- uboView.setUint32(128, threshold, true);
- uboView.setFloat32(132, _NanoPowGl.#WORKLOAD - 1, true);
- _NanoPowGl.#gl.bindBuffer(_NanoPowGl.#gl.UNIFORM_BUFFER, _NanoPowGl.#uboBuffer);
- _NanoPowGl.#gl.bufferSubData(_NanoPowGl.#gl.UNIFORM_BUFFER, 0, uboView);
- _NanoPowGl.#gl.bindBuffer(_NanoPowGl.#gl.UNIFORM_BUFFER, null);
- let nonce = null;
- const data = new DataView(new ArrayBuffer(8));
- data.setBigUint64(0, BigInt(`0x${work}`), true);
- const seed = new Uint8Array(data.buffer);
- this.#draw(seed);
- let found = await this.#checkQueryResult();
- if (found) {
- try {
- nonce = this.#readResult(work);
- } catch (err) {
- found = false;
- }
- }
- this.#busy = false;
- if (found && nonce !== work) throw new Error(`Nonce found but does not match work`);
- return found;
- }
-};
-
-// src/classes/gpu.ts
-var NanoPowGpu = class _NanoPowGpu {
- // Initialize WebGPU
- static #busy = false;
- static #device = null;
- static #uboBuffer;
- static #gpuBuffer;
- static #cpuBuffer;
- static #bindGroupLayout;
- static #searchPipeline;
- static #validatePipeline;
- // Initialize WebGPU
- static async init() {
- if (this.#busy) return;
- this.#busy = true;
- try {
- if (navigator.gpu == null) throw new Error("WebGPU is not supported in this browser.");
- const adapter = await navigator.gpu.requestAdapter();
- if (adapter == null) throw new Error("WebGPU adapter refused by browser.");
- const device = await adapter.requestDevice();
- if (!(device instanceof GPUDevice)) throw new Error("WebGPU device failed to load.");
- device.lost.then(this.reset);
- this.#device = device;
- this.setup();
- } catch (err) {
- throw new Error(`WebGPU initialization failed. ${err}`);
- } finally {
- this.#busy = false;
- }
- }
- static setup() {
- if (this.#device == null) throw new Error(`WebGPU device failed to load.`);
- this.#uboBuffer = this.#device.createBuffer({
- size: 48,
- usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST
- });
- this.#gpuBuffer = this.#device.createBuffer({
- size: 16,
- usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC
- });
- this.#cpuBuffer = this.#device.createBuffer({
- size: 16,
- usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ
- });
- this.#bindGroupLayout = this.#device.createBindGroupLayout({
- entries: [
- {
- binding: 0,
- visibility: GPUShaderStage.COMPUTE,
- buffer: { type: "uniform" }
- },
- {
- binding: 1,
- visibility: GPUShaderStage.COMPUTE,
- buffer: { type: "storage" }
- }
- ]
- });
- const shaderModule = this.#device.createShaderModule({
- code: compute_default
- });
- this.#searchPipeline = this.#device.createComputePipeline({
- layout: this.#device.createPipelineLayout({
- bindGroupLayouts: [this.#bindGroupLayout]
- }),
- compute: {
- entryPoint: "search",
- module: shaderModule
- }
- });
- this.#validatePipeline = this.#device.createComputePipeline({
- layout: this.#device.createPipelineLayout({
- bindGroupLayouts: [this.#bindGroupLayout]
- }),
- compute: {
- entryPoint: "validate",
- module: shaderModule
- }
- });
- }
- static reset() {
- console.warn(`GPU device lost. Reinitializing...`);
- _NanoPowGpu.#cpuBuffer?.destroy();
- _NanoPowGpu.#gpuBuffer?.destroy();
- _NanoPowGpu.#uboBuffer?.destroy();
- _NanoPowGpu.#busy = false;
- _NanoPowGpu.init();
- }
- static #logAverages(times) {
- let count = times.length, truncatedCount = 0, truncated = 0, sum = 0, reciprocals = 0, logarithms = 0, min = Number.MAX_SAFE_INTEGER, max = 0, median = 0, rate = 0;
- times.sort();
- for (let i = 0; i < count; i++) {
- sum += times[i];
- reciprocals += 1 / times[i];
- logarithms += Math.log(times[i]);
- min = Math.min(min, times[i]);
- max = Math.max(max, times[i]);
- if (i === Math.ceil(count / 2)) {
- median = times[i];
- }
- if (count < 3 || i > 0.1 * count && i < 0.9 * (count - 1)) {
- truncated += times[i];
- truncatedCount++;
- }
- }
- const averages = {
- "Count (dispatches)": count,
- "Total (ms)": sum,
- "Rate (d/s)": 1e3 * truncatedCount / (truncated || sum),
- "Minimum (ms)": min,
- "Maximum (ms)": max,
- "Median (ms)": median,
- "Arithmetic Mean (ms)": sum / count,
- "Truncated Mean (ms)": truncated / truncatedCount,
- "Harmonic Mean (ms)": count / reciprocals,
- "Geometric Mean (ms)": Math.exp(logarithms / count)
- };
- console.table(averages);
- }
- static async #dispatch(pipeline, seed, hash, threshold, passes) {
- if (this.#device == null) throw new Error(`WebGPU device failed to load.`);
- const uboView = new DataView(new ArrayBuffer(48));
- for (let i = 0; i < 64; i += 16) {
- const u64 = hash.slice(i, i + 16);
- uboView.setBigUint64(i / 2, BigInt(`0x${u64}`));
- }
- uboView.setBigUint64(32, seed, true);
- uboView.setUint32(40, threshold, true);
- this.#device.queue.writeBuffer(this.#uboBuffer, 0, uboView);
- this.#device.queue.writeBuffer(this.#gpuBuffer, 0, new Uint32Array([0, 0, 0]));
- const bindGroup = this.#device.createBindGroup({
- layout: this.#bindGroupLayout,
- entries: [
- {
- binding: 0,
- resource: {
- buffer: this.#uboBuffer
- }
- },
- {
- binding: 1,
- resource: {
- buffer: this.#gpuBuffer
- }
- }
- ]
- });
- const commandEncoder = this.#device.createCommandEncoder();
- const passEncoder = commandEncoder.beginComputePass();
- passEncoder.setPipeline(pipeline);
- passEncoder.setBindGroup(0, bindGroup);
- passEncoder.dispatchWorkgroups(passes, passes);
- passEncoder.end();
- commandEncoder.copyBufferToBuffer(this.#gpuBuffer, 0, this.#cpuBuffer, 0, 12);
- this.#device.queue.submit([commandEncoder.finish()]);
- let data = null;
- try {
- await this.#cpuBuffer.mapAsync(GPUMapMode.READ);
- await this.#device.queue.onSubmittedWorkDone();
- data = new DataView(this.#cpuBuffer.getMappedRange().slice(0));
- this.#cpuBuffer.unmap();
- } catch (err) {
- console.warn(`Error getting data from GPU. ${err}`);
- return this.#dispatch(pipeline, seed, hash, threshold, passes);
- }
- if (data == null) throw new Error(`Failed to get data from buffer.`);
- return data;
- }
- /**
- * Finds a nonce that satisfies the Nano proof-of-work requirements.
- *
- * @param {string} hash - Hexadecimal hash of previous block, or public key for new accounts
- * @param {NanoPowOptions} options - Used to configure search execution
- */
- static async search(hash, options) {
- if (!/^[A-Fa-f0-9]{64}$/.test(hash)) throw new TypeError(`Invalid hash ${hash}`);
- if (this.#busy) {
- return new Promise((resolve) => {
- setTimeout(async () => {
- const result = this.search(hash, options);
- resolve(result);
- }, 100);
- });
- }
- this.#busy = true;
- const threshold = typeof options?.threshold !== "number" || options.threshold < 0 || options.threshold > 4294967295 ? 4294967288 : options.threshold;
- const effort = typeof options?.effort !== "number" || options.effort < 1 || options.effort > 32 ? 2048 : options.effort * 256;
- const debug = !!options?.debug;
- let loads = 0;
- while (this.#device == null && loads < 20) {
- await new Promise((resolve) => {
- setTimeout(resolve, 500);
- });
- }
- if (this.#device == null) {
- this.#busy = false;
- throw new Error(`WebGPU device failed to load.`);
- }
- let times = [];
- let start = performance.now();
- let nonce = 0n;
- do {
- start = performance.now();
- const random = Math.floor(Math.random() * 4294967295);
- const seed = BigInt(random) << 32n | BigInt(random);
- const data = await this.#dispatch(this.#searchPipeline, seed, hash, threshold, effort);
- nonce = data.getBigUint64(0, true);
- this.#busy = !data.getUint32(8);
- times.push(performance.now() - start);
- } while (this.#busy);
- if (debug) this.#logAverages(times);
- return nonce.toString(16).padStart(16, "0");
- }
- /**
- * Validates that a nonce satisfies Nano proof-of-work requirements.
- *
- * @param {string} work - Hexadecimal proof-of-work value to validate
- * @param {string} hash - Hexadecimal hash of previous block, or public key for new accounts
- * @param {NanoPowOptions} options - Options used to configure search execution
- */
- static async validate(work, hash, options) {
- if (!/^[A-Fa-f0-9]{16}$/.test(work)) throw new TypeError(`Invalid work ${work}`);
- if (!/^[A-Fa-f0-9]{64}$/.test(hash)) throw new TypeError(`Invalid hash ${hash}`);
- if (this.#busy) {
- return new Promise((resolve) => {
- setTimeout(async () => {
- const result = this.validate(work, hash, options);
- resolve(result);
- }, 100);
- });
- }
- this.#busy = true;
- const debug = !!options?.debug;
- const threshold = typeof options?.threshold !== "number" || options.threshold < 0 || options.threshold > 4294967295 ? 4294967288 : options.threshold;
- let loads = 0;
- while (this.#device == null && loads < 20) {
- await new Promise((resolve) => {
- setTimeout(resolve, 500);
- });
- }
- if (this.#device == null) {
- this.#busy = false;
- throw new Error(`WebGPU device failed to load.`);
- }
- const seed = BigInt(`0x${work}`);
- const data = await this.#dispatch(this.#validatePipeline, seed, hash, threshold, 1);
- const nonce = data.getBigUint64(0, true).toString(16).padStart(16, "0");
- const found = !!data.getUint32(8);
- this.#busy = false;
- if (found && work !== nonce) throw new Error(`Nonce (${nonce}) found but does not match work (${work})`);
- return found;
- }
-};
-
-// src/classes/index.ts
-var isGlSupported;
-var isGpuSupported = false;
-try {
- await NanoPowGpu.init();
- isGpuSupported = true;
-} catch (err) {
- console.warn(`WebGPU is not supported in this environment.`);
- isGpuSupported = false;
-}
-try {
- await NanoPowGl.init();
- isGlSupported = true;
-} catch (err) {
- console.error(err);
- console.warn(`WebGL is not supported in this environment.`);
- isGlSupported = false;
-}
-var NanoPow = isGpuSupported ? NanoPowGpu : isGlSupported ? NanoPowGl : null;
-
-// src/main.ts
-var main_default = NanoPow;
-export {
- NanoPow,
- NanoPowGl,
- NanoPowGpu,
- main_default as default
-};
// SPDX-FileCopyrightText: 2025 Chris Duncan <chris@zoso.dev>
// SPDX-License-Identifier: GPL-3.0-or-later
+/**
+* Input buffers
+*/
struct UBO {
blockhash: array<vec4<u32>, 2>,
random: vec2<u32>,
};
@group(0) @binding(0) var<uniform> ubo: UBO;
+/**
+* Output buffers
+*/
struct WORK {
nonce: vec2<u32>,
found: atomic<u32>
};
@group(0) @binding(1) var<storage, read_write> work: WORK;
-/**
-* Shared flag to prevent execution for all workgroup threads based on the
-* atomicLoad() result of a single member thread.
-*/
-var<workgroup> found: bool;
-
/**
* Defined separately from `v0` because the original value is required to
* calculate the digest and compare it to the threshold.
/**
* Used to rotate bits by a fixed amount during G mixing.
*/
-const ROTATE_1 = vec2(1u, 1u);
-const ROTATE_8 = vec2(8u, 8u);
-const ROTATE_16 = vec2(16u, 16u);
-const ROTATE_24 = vec2(24u, 24u);
-const ROTATE_31 = vec2(31u, 31u);
+const VEC2_0 = vec2(0u);
+const VEC4_0 = vec4(0u);
+const ROTATE_1 = vec4(1u);
+const ROTATE_8 = vec4(8u);
+const ROTATE_16 = vec4(16u);
+const ROTATE_24 = vec4(24u);
+const ROTATE_31 = vec4(31u);
+
+/**
+* Shared flag to prevent execution for all workgroup threads based on the
+* atomicLoad() result of a single member thread.
+*/
+var<workgroup> found: bool;
/**
* Search compute function
-* Calls main with a workgroup size of 64 which has been tested as optimal
+* Calls main with a workgroup size of 64 which is generally considered optimal
+* due to how warps and wavefronts are executed on modern GPUs. The entire
+* workgroup exits immediately if a nonce was already found by a previous
+* workgroup.
*/
@compute @workgroup_size(64)
fn search(@builtin(global_invocation_id) global_id: vec3<u32>, @builtin(local_invocation_id) local_id: vec3<u32>) {
* performance, but the variable name still contains the original index digit.
*/
fn main(id: vec3<u32>) {
- let threshold: u32 = ubo.threshold;
-
/**
* Initialize (nonce||blockhash) concatenation
*/
* It is always the "last" compression at this INLEN
* v14 = ~v14;
*/
- var v0: vec2<u32> = BLAKE2B_IV32_0;
- var v1: vec2<u32> = vec2(0x84CAA73Bu, 0xBB67AE85u);
- var v2: vec2<u32> = vec2(0xFE94F82Bu, 0x3C6EF372u);
- var v3: vec2<u32> = vec2(0x5F1D36F1u, 0xA54FF53Au);
- var v4: vec2<u32> = vec2(0xADE682D1u, 0x510E527Fu);
- var v5: vec2<u32> = vec2(0x2B3E6C1Fu, 0x9B05688Cu);
- var v6: vec2<u32> = vec2(0xFB41BD6Bu, 0x1F83D9ABu);
- var v7: vec2<u32> = vec2(0x137E2179u, 0x5BE0CD19u);
- var v8: vec2<u32> = vec2(0xF3BCC908u, 0x6A09E667u);
- var v9: vec2<u32> = vec2(0x84CAA73Bu, 0xBB67AE85u);
- var v10: vec2<u32> = vec2(0xFE94F82Bu, 0x3C6EF372u);
- var v11: vec2<u32> = vec2(0x5F1D36F1u, 0xA54FF53Au);
- var v12: vec2<u32> = vec2(0xADE682F9u, 0x510E527Fu);
- var v13: vec2<u32> = vec2(0x2B3E6C1Fu, 0x9B05688Cu);
- var v14: vec2<u32> = vec2(0x04BE4294u, 0xE07C2654u);
- var v15: vec2<u32> = vec2(0x137E2179u, 0x5BE0CD19u);
+ var v01: vec4<u32> = vec4(BLAKE2B_IV32_0, 0x84CAA73Bu, 0xBB67AE85u);
+ var v23: vec4<u32> = vec4(0xFE94F82Bu, 0x3C6EF372u, 0x5F1D36F1u, 0xA54FF53Au);
+ var v45: vec4<u32> = vec4(0xADE682D1u, 0x510E527Fu, 0x2B3E6C1Fu, 0x9B05688Cu);
+ var v67: vec4<u32> = vec4(0xFB41BD6Bu, 0x1F83D9ABu, 0x137E2179u, 0x5BE0CD19u);
+ var v89: vec4<u32> = vec4(0xF3BCC908u, 0x6A09E667u, 0x84CAA73Bu, 0xBB67AE85u);
+ var v1011: vec4<u32> = vec4(0xFE94F82Bu, 0x3C6EF372u, 0x5F1D36F1u, 0xA54FF53Au);
+ var v1213: vec4<u32> = vec4(0xADE682F9u, 0x510E527Fu, 0x2B3E6C1Fu, 0x9B05688Cu);
+ var v1415: vec4<u32> = vec4(0x04BE4294u, 0xE07C2654u, 0x137E2179u, 0x5BE0CD19u);
/**
- * Twelve rounds of G mixing as part of BLAKE2b compression step. Each round is
- * divided into eight subprocesses. Each subprocesses applies transformations
- * to `m` and `v` variables based on a defined set of index inputs. The
- * algorithm for each subprocess is defined as follows:
+ * Twelve rounds of G mixing as part of BLAKE2b compression step. Normally,
+ * each round is divided into eight subprocesses; NanoPow compresses these
+ * operations into four subprocesses by executing sequential pairs
+ * simultaneously, inspired by https://github.com/minio/blake2b-simd
+ *
+ * Each subprocess applies transformations to to `m` and `v` variables based on
+ * a defined set of index inputs. The algorithm for each subprocess is defined
+ * as follows:
*
* r is the current round
* i is the current subprocess within that round
* Each sum step has an extra carry addition. Note that the m[sigma] sum is
* skipped if m[sigma] is zero since it effectively does nothing.
*/
-
+ var a: vec4<u32>;
+ var b: vec4<u32>;
+ var c: vec4<u32>;
+ var d: vec4<u32>;
+ var x: vec4<u32>;
+ var y: vec4<u32>;
/****************************************************************************
* ROUND(0) *
****************************************************************************/
/**
* r=0, i=0, a=v[0], b=v[4], c=v[8], d=v[12]
- */
- v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x));
- v0 = v0 + m0 + vec2(0u, u32(v0.x + m0.x < v0.x));
- v12 = (v12 ^ v0).yx;
- v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x));
- v4 = ((v4 ^ v8) >> ROTATE_24) | ((v4 ^ v8).yx << ROTATE_8);
- v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x));
- v0 = v0 + m1 + vec2(0u, u32(v0.x + m1.x < v0.x));
- v12 = ((v12 ^ v0) >> ROTATE_16) | ((v12 ^ v0).yx << ROTATE_16);
- v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x));
- v4 = ((v4 ^ v8) << ROTATE_1) | ((v4 ^ v8).yx >> ROTATE_31);
-
- /**
* r=0, i=1, a=v[1], b=v[5], c=v[9], d=v[13]
*/
- v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x));
- v1 = v1 + m2 + vec2(0u, u32(v1.x + m2.x < v1.x));
- v13 = (v13 ^ v1).yx;
- v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x));
- v5 = ((v5 ^ v9) >> ROTATE_24) | ((v5 ^ v9).yx << ROTATE_8);
- v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x));
- v1 = v1 + m3 + vec2(0u, u32(v1.x + m3.x < v1.x));
- v13 = ((v13 ^ v1) >> ROTATE_16) | ((v13 ^ v1).yx << ROTATE_16);
- v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x));
- v5 = ((v5 ^ v9) << ROTATE_1) | ((v5 ^ v9).yx >> ROTATE_31);
+ x = vec4(m0, m2);
+ y = vec4(m1, m3);
- /**
- * r=0, i=2, a=v[2], b=v[6], c=v[10], d=v[14]
- */
- v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x));
- v2 = v2 + m4 + vec2(0u, u32(v2.x + m4.x < v2.x));
- v14 = (v14 ^ v2).yx;
- v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x));
- v6 = ((v6 ^ v10) >> ROTATE_24) | ((v6 ^ v10).yx << ROTATE_8);
- v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x));
- v14 = ((v14 ^ v2) >> ROTATE_16) | ((v14 ^ v2).yx << ROTATE_16);
- v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x));
- v6 = ((v6 ^ v10) << ROTATE_1) | ((v6 ^ v10).yx >> ROTATE_31);
+ v01 += v45 + vec4(0u, u32(v01.x + v45.x < v01.x), 0u, u32(v01.z + v45.z < v01.z));
+ v01 += x + vec4(0u, u32(v01.x + x.x < v01.x), 0u, u32(v01.z + x.z < v01.z));
+ v1213 = (v1213 ^ v01).yxwz;
+ v89 += v1213 + vec4(0u, u32(v89.x + v1213.x < v89.x), 0u, u32(v89.z + v1213.z < v89.z));
+ v45 = ((v45 ^ v89) >> ROTATE_24) | ((v45 ^ v89).yxwz << ROTATE_8);
+ v01 += v45 + vec4(0u, u32(v01.x + v45.x < v01.x), 0u, u32(v01.z + v45.z < v01.z));
+ v01 += y + vec4(0u, u32(v01.x + y.x < v01.x), 0u, u32(v01.z + y.z < v01.z));
+ v1213 = ((v1213 ^ v01) >> ROTATE_16) | ((v1213 ^ v01).yxwz << ROTATE_16);
+ v89 += v1213 + vec4(0u, u32(v89.x + v1213.x < v89.x), 0u, u32(v89.z + v1213.z < v89.z));
+ v45 = ((v45 ^ v89) << ROTATE_1) | ((v45 ^ v89).yxwz >> ROTATE_31);
/**
+ * r=0, i=2, a=v[2], b=v[6], c=v[10], d=v[14]
* r=0, i=3, a=v[3], b=v[7], c=v[11], d=v[15]
*/
- v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x));
- v15 = (v15 ^ v3).yx;
- v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x));
- v7 = ((v7 ^ v11) >> ROTATE_24) | ((v7 ^ v11).yx << ROTATE_8);
- v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x));
- v15 = ((v15 ^ v3) >> ROTATE_16) | ((v15 ^ v3).yx << ROTATE_16);
- v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x));
- v7 = ((v7 ^ v11) << ROTATE_1) | ((v7 ^ v11).yx >> ROTATE_31);
+ x = vec4(m4, VEC2_0);
+ // y = VEC4_0;
- /**
- * r=0, i=4, a=v[0], b=v[5], c=v[10], d=v[15]
- */
- v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x));
- v15 = (v15 ^ v0).yx;
- v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x));
- v5 = ((v5 ^ v10) >> ROTATE_24) | ((v5 ^ v10).yx << ROTATE_8);
- v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x));
- v15 = ((v15 ^ v0) >> ROTATE_16) | ((v15 ^ v0).yx << ROTATE_16);
- v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x));
- v5 = ((v5 ^ v10) << ROTATE_1) | ((v5 ^ v10).yx >> ROTATE_31);
+ v23 += v67 + vec4(0u, u32(v23.x + v67.x < v23.x), 0u, u32(v23.z + v67.z < v23.z));
+ v23 += x + vec4(0u, u32(v23.x + x.x < v23.x), 0u, u32(v23.z + x.z < v23.z));
+ v1415 = (v1415 ^ v23).yxwz;
+ v1011 += v1415 + vec4(0u, u32(v1011.x + v1415.x < v1011.x), 0u, u32(v1011.z + v1415.z < v1011.z));
+ v67 = ((v67 ^ v1011) >> ROTATE_24) | ((v67 ^ v1011).yxwz << ROTATE_8);
+ v23 += v67 + vec4(0u, u32(v23.x + v67.x < v23.x), 0u, u32(v23.z + v67.z < v23.z));
+ // v23 += y + vec4(0u, u32(v23.x + y.x < v23.x), 0u, u32(v23.z + y.z < v23.z));
+ v1415 = ((v1415 ^ v23) >> ROTATE_16) | ((v1415 ^ v23).yxwz << ROTATE_16);
+ v1011 += v1415 + vec4(0u, u32(v1011.x + v1415.x < v1011.x), 0u, u32(v1011.z + v1415.z < v1011.z));
+ v67 = ((v67 ^ v1011) << ROTATE_1) | ((v67 ^ v1011).yxwz >> ROTATE_31);
/**
+ * r=0, i=4, a=v[0], b=v[5], c=v[10], d=v[15]
* r=0, i=5, a=v[1], b=v[6], c=v[11], d=v[12]
*/
- v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x));
- v12 = (v12 ^ v1).yx;
- v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x));
- v6 = ((v6 ^ v11) >> ROTATE_24) | ((v6 ^ v11).yx << ROTATE_8);
- v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x));
- v12 = ((v12 ^ v1) >> ROTATE_16) | ((v12 ^ v1).yx << ROTATE_16);
- v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x));
- v6 = ((v6 ^ v11) << ROTATE_1) | ((v6 ^ v11).yx >> ROTATE_31);
+ b = vec4(v45.zw, v67.xy);
+ d = vec4(v1415.zw, v1213.xy);
+ // x = VEC4_0;
+ // y = VEC4_0;
- /**
- * r=0, i=6, a=v[2], b=v[7], c=v[8], d=v[13]
- */
- v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x));
- v13 = (v13 ^ v2).yx;
- v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x));
- v7 = ((v7 ^ v8) >> ROTATE_24) | ((v7 ^ v8).yx << ROTATE_8);
- v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x));
- v13 = ((v13 ^ v2) >> ROTATE_16) | ((v13 ^ v2).yx << ROTATE_16);
- v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x));
- v7 = ((v7 ^ v8) << ROTATE_1) | ((v7 ^ v8).yx >> ROTATE_31);
+ v01 += b + vec4(0u, u32(v01.x + b.x < v01.x), 0u, u32(v01.z + b.z < v01.z));
+ // v01 += x + vec4(0u, u32(v01.x + x.x < v01.x), 0u, u32(v01.z + x.z < v01.z));
+ d = (d ^ v01).yxwz;
+ v1011 += d + vec4(0u, u32(v1011.x + d.x < v1011.x), 0u, u32(v1011.z + d.z < v1011.z));
+ b = ((b ^ v1011) >> ROTATE_24) | ((b ^ v1011).yxwz << ROTATE_8);
+ v01 += b + vec4(0u, u32(v01.x + b.x < v01.x), 0u, u32(v01.z + b.z < v01.z));
+ // v01 += y + vec4(0u, u32(v01.x + y.x < v01.x), 0u, u32(v01.z + y.z < v01.z));
+ d = ((d ^ v01) >> ROTATE_16) | ((d ^ v01).yxwz << ROTATE_16);
+ v1011 += d + vec4(0u, u32(v1011.x + d.x < v1011.x), 0u, u32(v1011.z + d.z < v1011.z));
+ b = ((b ^ v1011) << ROTATE_1) | ((b ^ v1011).yxwz >> ROTATE_31);
+
+ v45 = vec4(v45.xy, b.xy);
+ v67 = vec4(b.zw, v67.zw);
+ v1415 = vec4(v1415.xy, d.xy);
+ v1213 = vec4(d.zw, v1213.zw);
/**
+ * r=0, i=6, a=v[2], b=v[7], c=v[8], d=v[13]
* r=0, i=7, a=v[3], b=v[4], c=v[9], d=v[14]
*/
- v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x));
- v14 = (v14 ^ v3).yx;
- v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x));
- v4 = ((v4 ^ v9) >> ROTATE_24) | ((v4 ^ v9).yx << ROTATE_8);
- v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x));
- v14 = ((v14 ^ v3) >> ROTATE_16) | ((v14 ^ v3).yx << ROTATE_16);
- v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x));
- v4 = ((v4 ^ v9) << ROTATE_1) | ((v4 ^ v9).yx >> ROTATE_31);
+ b = vec4(v67.zw, v45.xy);
+ d = vec4(v1213.zw, v1415.xy);
+ // x = VEC4_0;
+ // y = VEC4_0;
+
+ v23 += b + vec4(0u, u32(v23.x + b.x < v23.x), 0u, u32(v23.z + b.z < v23.z));
+ // v23 += x + vec4(0u, u32(v23.x + x.x < v23.x), 0u, u32(v23.z + x.z < v23.z));
+ d = (d ^ v23).yxwz;
+ v89 += d + vec4(0u, u32(v89.x + d.x < v89.x), 0u, u32(v89.z + d.z < v89.z));
+ b = ((b ^ v89) >> ROTATE_24) | ((b ^ v89).yxwz << ROTATE_8);
+ v23 += b + vec4(0u, u32(v23.x + b.x < v23.x), 0u, u32(v23.z + b.z < v23.z));
+ // v23 += y + vec4(0u, u32(v23.x + y.x < v23.x), 0u, u32(v23.z + y.z < v23.z));
+ d = ((d ^ v23) >> ROTATE_16) | ((d ^ v23).yxwz << ROTATE_16);
+ v89 += d + vec4(0u, u32(v89.x + d.x < v89.x), 0u, u32(v89.z + d.z < v89.z));
+ b = ((b ^ v89) << ROTATE_1) | ((b ^ v89).yxwz >> ROTATE_31);
+ v67 = vec4(v67.xy, b.xy);
+ v45 = vec4(b.zw, v45.zw);
+ v1213 = vec4(v1213.xy, d.xy);
+ v1415 = vec4(d.zw, v1415.zw);
/**
* r=1, i=0, a=v[0], b=v[4], c=v[8], d=v[12]
- */
- v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x));
- v12 = (v12 ^ v0).yx;
- v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x));
- v4 = ((v4 ^ v8) >> ROTATE_24) | ((v4 ^ v8).yx << ROTATE_8);
- v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x));
- v12 = ((v12 ^ v0) >> ROTATE_16) | ((v12 ^ v0).yx << ROTATE_16);
- v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x));
- v4 = ((v4 ^ v8) << ROTATE_1) | ((v4 ^ v8).yx >> ROTATE_31);
-
- /**
* r=1, i=1, a=v[1], b=v[5], c=v[9], d=v[13]
*/
- v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x));
- v1 = v1 + m4 + vec2(0u, u32(v1.x + m4.x < v1.x));
- v13 = (v13 ^ v1).yx;
- v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x));
- v5 = ((v5 ^ v9) >> ROTATE_24) | ((v5 ^ v9).yx << ROTATE_8);
- v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x));
- v13 = ((v13 ^ v1) >> ROTATE_16) | ((v13 ^ v1).yx << ROTATE_16);
- v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x));
- v5 = ((v5 ^ v9) << ROTATE_1) | ((v5 ^ v9).yx >> ROTATE_31);
+ x = vec4(VEC2_0, m4);
+ // y = VEC4_0;
- /**
- * r=1, i=2, a=v[2], b=v[6], c=v[10], d=v[14]
- */
- v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x));
- v14 = (v14 ^ v2).yx;
- v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x));
- v6 = ((v6 ^ v10) >> ROTATE_24) | ((v6 ^ v10).yx << ROTATE_8);
- v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x));
- v14 = ((v14 ^ v2) >> ROTATE_16) | ((v14 ^ v2).yx << ROTATE_16);
- v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x));
- v6 = ((v6 ^ v10) << ROTATE_1) | ((v6 ^ v10).yx >> ROTATE_31);
+ v01 += v45 + vec4(0u, u32(v01.x + v45.x < v01.x), 0u, u32(v01.z + v45.z < v01.z));
+ v01 += x + vec4(0u, u32(v01.x + x.x < v01.x), 0u, u32(v01.z + x.z < v01.z));
+ v1213 = (v1213 ^ v01).yxwz;
+ v89 += v1213 + vec4(0u, u32(v89.x + v1213.x < v89.x), 0u, u32(v89.z + v1213.z < v89.z));
+ v45 = ((v45 ^ v89) >> ROTATE_24) | ((v45 ^ v89).yxwz << ROTATE_8);
+ v01 += v45 + vec4(0u, u32(v01.x + v45.x < v01.x), 0u, u32(v01.z + v45.z < v01.z));
+ // v01 += y + vec4(0u, u32(v01.x + y.x < v01.x), 0u, u32(v01.z + y.z < v01.z));
+ v1213 = ((v1213 ^ v01) >> ROTATE_16) | ((v1213 ^ v01).yxwz << ROTATE_16);
+ v89 += v1213 + vec4(0u, u32(v89.x + v1213.x < v89.x), 0u, u32(v89.z + v1213.z < v89.z));
+ v45 = ((v45 ^ v89) << ROTATE_1) | ((v45 ^ v89).yxwz >> ROTATE_31);
/**
+ * r=1, i=2, a=v[2], b=v[6], c=v[10], d=v[14]
* r=1, i=3, a=v[3], b=v[7], c=v[11], d=v[15]
*/
- v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x));
- v15 = (v15 ^ v3).yx;
- v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x));
- v7 = ((v7 ^ v11) >> ROTATE_24) | ((v7 ^ v11).yx << ROTATE_8);
- v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x));
- v15 = ((v15 ^ v3) >> ROTATE_16) | ((v15 ^ v3).yx << ROTATE_16);
- v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x));
- v7 = ((v7 ^ v11) << ROTATE_1) | ((v7 ^ v11).yx >> ROTATE_31);
+ // x = VEC4_0;
+ // y = VEC4_0;
- /**
- * r=1, i=4, a=v[0], b=v[5], c=v[10], d=v[15]
- */
- v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x));
- v0 = v0 + m1 + vec2(0u, u32(v0.x + m1.x < v0.x));
- v15 = (v15 ^ v0).yx;
- v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x));
- v5 = ((v5 ^ v10) >> ROTATE_24) | ((v5 ^ v10).yx << ROTATE_8);
- v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x));
- v15 = ((v15 ^ v0) >> ROTATE_16) | ((v15 ^ v0).yx << ROTATE_16);
- v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x));
- v5 = ((v5 ^ v10) << ROTATE_1) | ((v5 ^ v10).yx >> ROTATE_31);
+ v23 += v67 + vec4(0u, u32(v23.x + v67.x < v23.x), 0u, u32(v23.z + v67.z < v23.z));
+ // v23 += x + vec4(0u, u32(v23.x + x.x < v23.x), 0u, u32(v23.z + x.z < v23.z));
+ v1415 = (v1415 ^ v23).yxwz;
+ v1011 += v1415 + vec4(0u, u32(v1011.x + v1415.x < v1011.x), 0u, u32(v1011.z + v1415.z < v1011.z));
+ v67 = ((v67 ^ v1011) >> ROTATE_24) | ((v67 ^ v1011).yxwz << ROTATE_8);
+ v23 += v67 + vec4(0u, u32(v23.x + v67.x < v23.x), 0u, u32(v23.z + v67.z < v23.z));
+ // v23 += y + vec4(0u, u32(v23.x + y.x < v23.x), 0u, u32(v23.z + y.z < v23.z));
+ v1415 = ((v1415 ^ v23) >> ROTATE_16) | ((v1415 ^ v23).yxwz << ROTATE_16);
+ v1011 += v1415 + vec4(0u, u32(v1011.x + v1415.x < v1011.x), 0u, u32(v1011.z + v1415.z < v1011.z));
+ v67 = ((v67 ^ v1011) << ROTATE_1) | ((v67 ^ v1011).yxwz >> ROTATE_31);
/**
+ * r=1, i=4, a=v[0], b=v[5], c=v[10], d=v[15]
* r=1, i=5, a=v[1], b=v[6], c=v[11], d=v[12]
*/
- v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x));
- v1 = v1 + m0 + vec2(0u, u32(v1.x + m0.x < v1.x));
- v12 = (v12 ^ v1).yx;
- v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x));
- v6 = ((v6 ^ v11) >> ROTATE_24) | ((v6 ^ v11).yx << ROTATE_8);
- v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x));
- v1 = v1 + m2 + vec2(0u, u32(v1.x + m2.x < v1.x));
- v12 = ((v12 ^ v1) >> ROTATE_16) | ((v12 ^ v1).yx << ROTATE_16);
- v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x));
- v6 = ((v6 ^ v11) << ROTATE_1) | ((v6 ^ v11).yx >> ROTATE_31);
+ b = vec4(v45.zw, v67.xy);
+ d = vec4(v1415.zw, v1213.xy);
+ x = vec4(m1, m0);
+ y = vec4(VEC2_0, m2);
- /**
- * r=1, i=6, a=v[2], b=v[7], c=v[8], d=v[13]
- */
- v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x));
- v13 = (v13 ^ v2).yx;
- v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x));
- v7 = ((v7 ^ v8) >> ROTATE_24) | ((v7 ^ v8).yx << ROTATE_8);
- v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x));
- v13 = ((v13 ^ v2) >> ROTATE_16) | ((v13 ^ v2).yx << ROTATE_16);
- v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x));
- v7 = ((v7 ^ v8) << ROTATE_1) | ((v7 ^ v8).yx >> ROTATE_31);
+ v01 += b + vec4(0u, u32(v01.x + b.x < v01.x), 0u, u32(v01.z + b.z < v01.z));
+ v01 += x + vec4(0u, u32(v01.x + x.x < v01.x), 0u, u32(v01.z + x.z < v01.z));
+ d = (d ^ v01).yxwz;
+ v1011 += d + vec4(0u, u32(v1011.x + d.x < v1011.x), 0u, u32(v1011.z + d.z < v1011.z));
+ b = ((b ^ v1011) >> ROTATE_24) | ((b ^ v1011).yxwz << ROTATE_8);
+ v01 += b + vec4(0u, u32(v01.x + b.x < v01.x), 0u, u32(v01.z + b.z < v01.z));
+ v01 += y + vec4(0u, u32(v01.x + y.x < v01.x), 0u, u32(v01.z + y.z < v01.z));
+ d = ((d ^ v01) >> ROTATE_16) | ((d ^ v01).yxwz << ROTATE_16);
+ v1011 += d + vec4(0u, u32(v1011.x + d.x < v1011.x), 0u, u32(v1011.z + d.z < v1011.z));
+ b = ((b ^ v1011) << ROTATE_1) | ((b ^ v1011).yxwz >> ROTATE_31);
+
+ v45 = vec4(v45.xy, b.xy);
+ v67 = vec4(b.zw, v67.zw);
+ v1415 = vec4(v1415.xy, d.xy);
+ v1213 = vec4(d.zw, v1213.zw);
/**
+ * r=1, i=6, a=v[2], b=v[7], c=v[8], d=v[13]
* r=1, i=7, a=v[3], b=v[4], c=v[9], d=v[14]
*/
- v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x));
- v14 = (v14 ^ v3).yx;
- v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x));
- v4 = ((v4 ^ v9) >> ROTATE_24) | ((v4 ^ v9).yx << ROTATE_8);
- v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x));
- v3 = v3 + m3 + vec2(0u, u32(v3.x + m3.x < v3.x));
- v14 = ((v14 ^ v3) >> ROTATE_16) | ((v14 ^ v3).yx << ROTATE_16);
- v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x));
- v4 = ((v4 ^ v9) << ROTATE_1) | ((v4 ^ v9).yx >> ROTATE_31);
+ b = vec4(v67.zw, v45.xy);
+ d = vec4(v1213.zw, v1415.xy);
+ // x = VEC4_0;
+ y = vec4(VEC2_0, m3);
+
+ v23 += b + vec4(0u, u32(v23.x + b.x < v23.x), 0u, u32(v23.z + b.z < v23.z));
+ // v23 += x + vec4(0u, u32(v23.x + x.x < v23.x), 0u, u32(v23.z + x.z < v23.z));
+ d = (d ^ v23).yxwz;
+ v89 += d + vec4(0u, u32(v89.x + d.x < v89.x), 0u, u32(v89.z + d.z < v89.z));
+ b = ((b ^ v89) >> ROTATE_24) | ((b ^ v89).yxwz << ROTATE_8);
+ v23 += b + vec4(0u, u32(v23.x + b.x < v23.x), 0u, u32(v23.z + b.z < v23.z));
+ v23 += y + vec4(0u, u32(v23.x + y.x < v23.x), 0u, u32(v23.z + y.z < v23.z));
+ d = ((d ^ v23) >> ROTATE_16) | ((d ^ v23).yxwz << ROTATE_16);
+ v89 += d + vec4(0u, u32(v89.x + d.x < v89.x), 0u, u32(v89.z + d.z < v89.z));
+ b = ((b ^ v89) << ROTATE_1) | ((b ^ v89).yxwz >> ROTATE_31);
+
+ v67 = vec4(v67.xy, b.xy);
+ v45 = vec4(b.zw, v45.zw);
+ v1213 = vec4(v1213.xy, d.xy);
+ v1415 = vec4(d.zw, v1415.zw);
/**
* r=2, i=0, a=v[0], b=v[4], c=v[8], d=v[12]
- */
- v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x));
- v12 = (v12 ^ v0).yx;
- v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x));
- v4 = ((v4 ^ v8) >> ROTATE_24) | ((v4 ^ v8).yx << ROTATE_8);
- v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x));
- v12 = ((v12 ^ v0) >> ROTATE_16) | ((v12 ^ v0).yx << ROTATE_16);
- v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x));
- v4 = ((v4 ^ v8) << ROTATE_1) | ((v4 ^ v8).yx >> ROTATE_31);
-
- /**
* r=2, i=1, a=v[1], b=v[5], c=v[9], d=v[13]
*/
- v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x));
- v13 = (v13 ^ v1).yx;
- v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x));
- v5 = ((v5 ^ v9) >> ROTATE_24) | ((v5 ^ v9).yx << ROTATE_8);
- v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x));
- v1 = v1 + m0 + vec2(0u, u32(v1.x + m0.x < v1.x));
- v13 = ((v13 ^ v1) >> ROTATE_16) | ((v13 ^ v1).yx << ROTATE_16);
- v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x));
- v5 = ((v5 ^ v9) << ROTATE_1) | ((v5 ^ v9).yx >> ROTATE_31);
+ // x = VEC4_0;
+ y = vec4(VEC2_0, m0);
- /**
- * r=2, i=2, a=v[2], b=v[6], c=v[10], d=v[14]
- */
- v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x));
- v14 = (v14 ^ v2).yx;
- v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x));
- v6 = ((v6 ^ v10) >> ROTATE_24) | ((v6 ^ v10).yx << ROTATE_8);
- v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x));
- v2 = v2 + m2 + vec2(0u, u32(v2.x + m2.x < v2.x));
- v14 = ((v14 ^ v2) >> ROTATE_16) | ((v14 ^ v2).yx << ROTATE_16);
- v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x));
- v6 = ((v6 ^ v10) << ROTATE_1) | ((v6 ^ v10).yx >> ROTATE_31);
+ v01 += v45 + vec4(0u, u32(v01.x + v45.x < v01.x), 0u, u32(v01.z + v45.z < v01.z));
+ // v01 += x + vec4(0u, u32(v01.x + x.x < v01.x), 0u, u32(v01.z + x.z < v01.z));
+ v1213 = (v1213 ^ v01).yxwz;
+ v89 += v1213 + vec4(0u, u32(v89.x + v1213.x < v89.x), 0u, u32(v89.z + v1213.z < v89.z));
+ v45 = ((v45 ^ v89) >> ROTATE_24) | ((v45 ^ v89).yxwz << ROTATE_8);
+ v01 += v45 + vec4(0u, u32(v01.x + v45.x < v01.x), 0u, u32(v01.z + v45.z < v01.z));
+ v01 += y + vec4(0u, u32(v01.x + y.x < v01.x), 0u, u32(v01.z + y.z < v01.z));
+ v1213 = ((v1213 ^ v01) >> ROTATE_16) | ((v1213 ^ v01).yxwz << ROTATE_16);
+ v89 += v1213 + vec4(0u, u32(v89.x + v1213.x < v89.x), 0u, u32(v89.z + v1213.z < v89.z));
+ v45 = ((v45 ^ v89) << ROTATE_1) | ((v45 ^ v89).yxwz >> ROTATE_31);
/**
+ * r=2, i=2, a=v[2], b=v[6], c=v[10], d=v[14]
* r=2, i=3, a=v[3], b=v[7], c=v[11], d=v[15]
*/
- v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x));
- v15 = (v15 ^ v3).yx;
- v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x));
- v7 = ((v7 ^ v11) >> ROTATE_24) | ((v7 ^ v11).yx << ROTATE_8);
- v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x));
- v15 = ((v15 ^ v3) >> ROTATE_16) | ((v15 ^ v3).yx << ROTATE_16);
- v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x));
- v7 = ((v7 ^ v11) << ROTATE_1) | ((v7 ^ v11).yx >> ROTATE_31);
+ // x = VEC4_0;
+ y = vec4(m2, VEC2_0);
- /**
- * r=2, i=4, a=v[0], b=v[5], c=v[10], d=v[15]
- */
- v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x));
- v15 = (v15 ^ v0).yx;
- v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x));
- v5 = ((v5 ^ v10) >> ROTATE_24) | ((v5 ^ v10).yx << ROTATE_8);
- v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x));
- v15 = ((v15 ^ v0) >> ROTATE_16) | ((v15 ^ v0).yx << ROTATE_16);
- v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x));
- v5 = ((v5 ^ v10) << ROTATE_1) | ((v5 ^ v10).yx >> ROTATE_31);
+ v23 += v67 + vec4(0u, u32(v23.x + v67.x < v23.x), 0u, u32(v23.z + v67.z < v23.z));
+ // v23 += x + vec4(0u, u32(v23.x + x.x < v23.x), 0u, u32(v23.z + x.z < v23.z));
+ v1415 = (v1415 ^ v23).yxwz;
+ v1011 += v1415 + vec4(0u, u32(v1011.x + v1415.x < v1011.x), 0u, u32(v1011.z + v1415.z < v1011.z));
+ v67 = ((v67 ^ v1011) >> ROTATE_24) | ((v67 ^ v1011).yxwz << ROTATE_8);
+ v23 += v67 + vec4(0u, u32(v23.x + v67.x < v23.x), 0u, u32(v23.z + v67.z < v23.z));
+ v23 += y + vec4(0u, u32(v23.x + y.x < v23.x), 0u, u32(v23.z + y.z < v23.z));
+ v1415 = ((v1415 ^ v23) >> ROTATE_16) | ((v1415 ^ v23).yxwz << ROTATE_16);
+ v1011 += v1415 + vec4(0u, u32(v1011.x + v1415.x < v1011.x), 0u, u32(v1011.z + v1415.z < v1011.z));
+ v67 = ((v67 ^ v1011) << ROTATE_1) | ((v67 ^ v1011).yxwz >> ROTATE_31);
/**
+ * r=2, i=4, a=v[0], b=v[5], c=v[10], d=v[15]
* r=2, i=5, a=v[1], b=v[6], c=v[11], d=v[12]
*/
- v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x));
- v1 = v1 + m3 + vec2(0u, u32(v1.x + m3.x < v1.x));
- v12 = (v12 ^ v1).yx;
- v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x));
- v6 = ((v6 ^ v11) >> ROTATE_24) | ((v6 ^ v11).yx << ROTATE_8);
- v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x));
- v12 = ((v12 ^ v1) >> ROTATE_16) | ((v12 ^ v1).yx << ROTATE_16);
- v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x));
- v6 = ((v6 ^ v11) << ROTATE_1) | ((v6 ^ v11).yx >> ROTATE_31);
+ b = vec4(v45.zw, v67.xy);
+ d = vec4(v1415.zw, v1213.xy);
+ x = vec4(VEC2_0, m3);
+ // y = VEC4_0;
- /**
- * r=2, i=6, a=v[2], b=v[7], c=v[8], d=v[13]
- */
- v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x));
- v13 = (v13 ^ v2).yx;
- v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x));
- v7 = ((v7 ^ v8) >> ROTATE_24) | ((v7 ^ v8).yx << ROTATE_8);
- v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x));
- v2 = v2 + m1 + vec2(0u, u32(v2.x + m1.x < v2.x));
- v13 = ((v13 ^ v2) >> ROTATE_16) | ((v13 ^ v2).yx << ROTATE_16);
- v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x));
- v7 = ((v7 ^ v8) << ROTATE_1) | ((v7 ^ v8).yx >> ROTATE_31);
+ v01 += b + vec4(0u, u32(v01.x + b.x < v01.x), 0u, u32(v01.z + b.z < v01.z));
+ v01 += x + vec4(0u, u32(v01.x + x.x < v01.x), 0u, u32(v01.z + x.z < v01.z));
+ d = (d ^ v01).yxwz;
+ v1011 += d + vec4(0u, u32(v1011.x + d.x < v1011.x), 0u, u32(v1011.z + d.z < v1011.z));
+ b = ((b ^ v1011) >> ROTATE_24) | ((b ^ v1011).yxwz << ROTATE_8);
+ v01 += b + vec4(0u, u32(v01.x + b.x < v01.x), 0u, u32(v01.z + b.z < v01.z));
+ // v01 += y + vec4(0u, u32(v01.x + y.x < v01.x), 0u, u32(v01.z + y.z < v01.z));
+ d = ((d ^ v01) >> ROTATE_16) | ((d ^ v01).yxwz << ROTATE_16);
+ v1011 += d + vec4(0u, u32(v1011.x + d.x < v1011.x), 0u, u32(v1011.z + d.z < v1011.z));
+ b = ((b ^ v1011) << ROTATE_1) | ((b ^ v1011).yxwz >> ROTATE_31);
+
+ v45 = vec4(v45.xy, b.xy);
+ v67 = vec4(b.zw, v67.zw);
+ v1415 = vec4(v1415.xy, d.xy);
+ v1213 = vec4(d.zw, v1213.zw);
/**
+ * r=2, i=6, a=v[2], b=v[7], c=v[8], d=v[13]
* r=2, i=7, a=v[3], b=v[4], c=v[9], d=v[14]
*/
- v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x));
- v14 = (v14 ^ v3).yx;
- v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x));
- v4 = ((v4 ^ v9) >> ROTATE_24) | ((v4 ^ v9).yx << ROTATE_8);
- v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x));
- v3 = v3 + m4 + vec2(0u, u32(v3.x + m4.x < v3.x));
- v14 = ((v14 ^ v3) >> ROTATE_16) | ((v14 ^ v3).yx << ROTATE_16);
- v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x));
- v4 = ((v4 ^ v9) << ROTATE_1) | ((v4 ^ v9).yx >> ROTATE_31);
+ b = vec4(v67.zw, v45.xy);
+ d = vec4(v1213.zw, v1415.xy);
+ // x = VEC4_0;
+ y = vec4(m1, m4);
+
+ v23 += b + vec4(0u, u32(v23.x + b.x < v23.x), 0u, u32(v23.z + b.z < v23.z));
+ // v23 += x + vec4(0u, u32(v23.x + x.x < v23.x), 0u, u32(v23.z + x.z < v23.z));
+ d = (d ^ v23).yxwz;
+ v89 += d + vec4(0u, u32(v89.x + d.x < v89.x), 0u, u32(v89.z + d.z < v89.z));
+ b = ((b ^ v89) >> ROTATE_24) | ((b ^ v89).yxwz << ROTATE_8);
+ v23 += b + vec4(0u, u32(v23.x + b.x < v23.x), 0u, u32(v23.z + b.z < v23.z));
+ v23 += y + vec4(0u, u32(v23.x + y.x < v23.x), 0u, u32(v23.z + y.z < v23.z));
+ d = ((d ^ v23) >> ROTATE_16) | ((d ^ v23).yxwz << ROTATE_16);
+ v89 += d + vec4(0u, u32(v89.x + d.x < v89.x), 0u, u32(v89.z + d.z < v89.z));
+ b = ((b ^ v89) << ROTATE_1) | ((b ^ v89).yxwz >> ROTATE_31);
+
+ v67 = vec4(v67.xy, b.xy);
+ v45 = vec4(b.zw, v45.zw);
+ v1213 = vec4(v1213.xy, d.xy);
+ v1415 = vec4(d.zw, v1415.zw);
/**
* r=3, i=0, a=v[0], b=v[4], c=v[8], d=v[12]
- */
- v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x));
- v12 = (v12 ^ v0).yx;
- v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x));
- v4 = ((v4 ^ v8) >> ROTATE_24) | ((v4 ^ v8).yx << ROTATE_8);
- v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x));
- v12 = ((v12 ^ v0) >> ROTATE_16) | ((v12 ^ v0).yx << ROTATE_16);
- v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x));
- v4 = ((v4 ^ v8) << ROTATE_1) | ((v4 ^ v8).yx >> ROTATE_31);
-
- /**
* r=3, i=1, a=v[1], b=v[5], c=v[9], d=v[13]
*/
- v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x));
- v1 = v1 + m3 + vec2(0u, u32(v1.x + m3.x < v1.x));
- v13 = (v13 ^ v1).yx;
- v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x));
- v5 = ((v5 ^ v9) >> ROTATE_24) | ((v5 ^ v9).yx << ROTATE_8);
- v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x));
- v1 = v1 + m1 + vec2(0u, u32(v1.x + m1.x < v1.x));
- v13 = ((v13 ^ v1) >> ROTATE_16) | ((v13 ^ v1).yx << ROTATE_16);
- v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x));
- v5 = ((v5 ^ v9) << ROTATE_1) | ((v5 ^ v9).yx >> ROTATE_31);
+ // x = vec4(VEC2_0, m3); // still assigned this value from last assignment at r=2 i=4/5
+ y = vec4(VEC2_0, m1);
- /**
- * r=3, i=2, a=v[2], b=v[6], c=v[10], d=v[14]
- */
- v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x));
- v14 = (v14 ^ v2).yx;
- v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x));
- v6 = ((v6 ^ v10) >> ROTATE_24) | ((v6 ^ v10).yx << ROTATE_8);
- v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x));
- v14 = ((v14 ^ v2) >> ROTATE_16) | ((v14 ^ v2).yx << ROTATE_16);
- v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x));
- v6 = ((v6 ^ v10) << ROTATE_1) | ((v6 ^ v10).yx >> ROTATE_31);
+ v01 += v45 + vec4(0u, u32(v01.x + v45.x < v01.x), 0u, u32(v01.z + v45.z < v01.z));
+ v01 += x + vec4(0u, u32(v01.x + x.x < v01.x), 0u, u32(v01.z + x.z < v01.z));
+ v1213 = (v1213 ^ v01).yxwz;
+ v89 += v1213 + vec4(0u, u32(v89.x + v1213.x < v89.x), 0u, u32(v89.z + v1213.z < v89.z));
+ v45 = ((v45 ^ v89) >> ROTATE_24) | ((v45 ^ v89).yxwz << ROTATE_8);
+ v01 += v45 + vec4(0u, u32(v01.x + v45.x < v01.x), 0u, u32(v01.z + v45.z < v01.z));
+ v01 += y + vec4(0u, u32(v01.x + y.x < v01.x), 0u, u32(v01.z + y.z < v01.z));
+ v1213 = ((v1213 ^ v01) >> ROTATE_16) | ((v1213 ^ v01).yxwz << ROTATE_16);
+ v89 += v1213 + vec4(0u, u32(v89.x + v1213.x < v89.x), 0u, u32(v89.z + v1213.z < v89.z));
+ v45 = ((v45 ^ v89) << ROTATE_1) | ((v45 ^ v89).yxwz >> ROTATE_31);
/**
+ * r=3, i=2, a=v[2], b=v[6], c=v[10], d=v[14]
* r=3, i=3, a=v[3], b=v[7], c=v[11], d=v[15]
*/
- v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x));
- v15 = (v15 ^ v3).yx;
- v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x));
- v7 = ((v7 ^ v11) >> ROTATE_24) | ((v7 ^ v11).yx << ROTATE_8);
- v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x));
- v15 = ((v15 ^ v3) >> ROTATE_16) | ((v15 ^ v3).yx << ROTATE_16);
- v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x));
- v7 = ((v7 ^ v11) << ROTATE_1) | ((v7 ^ v11).yx >> ROTATE_31);
+ // x = VEC4_0;
+ // y = VEC4_0;
- /**
- * r=3, i=4, a=v[0], b=v[5], c=v[10], d=v[15]
- */
- v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x));
- v0 = v0 + m2 + vec2(0u, u32(v0.x + m2.x < v0.x));
- v15 = (v15 ^ v0).yx;
- v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x));
- v5 = ((v5 ^ v10) >> ROTATE_24) | ((v5 ^ v10).yx << ROTATE_8);
- v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x));
- v15 = ((v15 ^ v0) >> ROTATE_16) | ((v15 ^ v0).yx << ROTATE_16);
- v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x));
- v5 = ((v5 ^ v10) << ROTATE_1) | ((v5 ^ v10).yx >> ROTATE_31);
+ v23 += v67 + vec4(0u, u32(v23.x + v67.x < v23.x), 0u, u32(v23.z + v67.z < v23.z));
+ // v23 += x + vec4(0u, u32(v23.x + x.x < v23.x), 0u, u32(v23.z + x.z < v23.z));
+ v1415 = (v1415 ^ v23).yxwz;
+ v1011 += v1415 + vec4(0u, u32(v1011.x + v1415.x < v1011.x), 0u, u32(v1011.z + v1415.z < v1011.z));
+ v67 = ((v67 ^ v1011) >> ROTATE_24) | ((v67 ^ v1011).yxwz << ROTATE_8);
+ v23 += v67 + vec4(0u, u32(v23.x + v67.x < v23.x), 0u, u32(v23.z + v67.z < v23.z));
+ // v23 += y + vec4(0u, u32(v23.x + y.x < v23.x), 0u, u32(v23.z + y.z < v23.z));
+ v1415 = ((v1415 ^ v23) >> ROTATE_16) | ((v1415 ^ v23).yxwz << ROTATE_16);
+ v1011 += v1415 + vec4(0u, u32(v1011.x + v1415.x < v1011.x), 0u, u32(v1011.z + v1415.z < v1011.z));
+ v67 = ((v67 ^ v1011) << ROTATE_1) | ((v67 ^ v1011).yxwz >> ROTATE_31);
/**
+ * r=3, i=4, a=v[0], b=v[5], c=v[10], d=v[15]
* r=3, i=5, a=v[1], b=v[6], c=v[11], d=v[12]
*/
- v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x));
- v12 = (v12 ^ v1).yx;
- v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x));
- v6 = ((v6 ^ v11) >> ROTATE_24) | ((v6 ^ v11).yx << ROTATE_8);
- v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x));
- v12 = ((v12 ^ v1) >> ROTATE_16) | ((v12 ^ v1).yx << ROTATE_16);
- v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x));
- v6 = ((v6 ^ v11) << ROTATE_1) | ((v6 ^ v11).yx >> ROTATE_31);
+ b = vec4(v45.zw, v67.xy);
+ d = vec4(v1415.zw, v1213.xy);
+ x = vec4(m2, VEC2_0);
+ // y = VEC4_0;
- /**
- * r=3, i=6, a=v[2], b=v[7], c=v[8], d=v[13]
- */
- v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x));
- v2 = v2 + m4 + vec2(0u, u32(v2.x + m4.x < v2.x));
- v13 = (v13 ^ v2).yx;
- v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x));
- v7 = ((v7 ^ v8) >> ROTATE_24) | ((v7 ^ v8).yx << ROTATE_8);
- v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x));
- v2 = v2 + m0 + vec2(0u, u32(v2.x + m0.x < v2.x));
- v13 = ((v13 ^ v2) >> ROTATE_16) | ((v13 ^ v2).yx << ROTATE_16);
- v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x));
- v7 = ((v7 ^ v8) << ROTATE_1) | ((v7 ^ v8).yx >> ROTATE_31);
+ v01 += b + vec4(0u, u32(v01.x + b.x < v01.x), 0u, u32(v01.z + b.z < v01.z));
+ v01 += x + vec4(0u, u32(v01.x + x.x < v01.x), 0u, u32(v01.z + x.z < v01.z));
+ d = (d ^ v01).yxwz;
+ v1011 += d + vec4(0u, u32(v1011.x + d.x < v1011.x), 0u, u32(v1011.z + d.z < v1011.z));
+ b = ((b ^ v1011) >> ROTATE_24) | ((b ^ v1011).yxwz << ROTATE_8);
+ v01 += b + vec4(0u, u32(v01.x + b.x < v01.x), 0u, u32(v01.z + b.z < v01.z));
+ // v01 += y + vec4(0u, u32(v01.x + y.x < v01.x), 0u, u32(v01.z + y.z < v01.z));
+ d = ((d ^ v01) >> ROTATE_16) | ((d ^ v01).yxwz << ROTATE_16);
+ v1011 += d + vec4(0u, u32(v1011.x + d.x < v1011.x), 0u, u32(v1011.z + d.z < v1011.z));
+ b = ((b ^ v1011) << ROTATE_1) | ((b ^ v1011).yxwz >> ROTATE_31);
+
+ v45 = vec4(v45.xy, b.xy);
+ v67 = vec4(b.zw, v67.zw);
+ v1415 = vec4(v1415.xy, d.xy);
+ v1213 = vec4(d.zw, v1213.zw);
/**
+ * r=3, i=6, a=v[2], b=v[7], c=v[8], d=v[13]
* r=3, i=7, a=v[3], b=v[4], c=v[9], d=v[14]
*/
- v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x));
- v14 = (v14 ^ v3).yx;
- v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x));
- v4 = ((v4 ^ v9) >> ROTATE_24) | ((v4 ^ v9).yx << ROTATE_8);
- v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x));
- v14 = ((v14 ^ v3) >> ROTATE_16) | ((v14 ^ v3).yx << ROTATE_16);
- v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x));
- v4 = ((v4 ^ v9) << ROTATE_1) | ((v4 ^ v9).yx >> ROTATE_31);
+ b = vec4(v67.zw, v45.xy);
+ d = vec4(v1213.zw, v1415.xy);
+ x = vec4(m4, VEC2_0);
+ y = vec4(m0, VEC2_0);
+
+ v23 += b + vec4(0u, u32(v23.x + b.x < v23.x), 0u, u32(v23.z + b.z < v23.z));
+ v23 += x + vec4(0u, u32(v23.x + x.x < v23.x), 0u, u32(v23.z + x.z < v23.z));
+ d = (d ^ v23).yxwz;
+ v89 += d + vec4(0u, u32(v89.x + d.x < v89.x), 0u, u32(v89.z + d.z < v89.z));
+ b = ((b ^ v89) >> ROTATE_24) | ((b ^ v89).yxwz << ROTATE_8);
+ v23 += b + vec4(0u, u32(v23.x + b.x < v23.x), 0u, u32(v23.z + b.z < v23.z));
+ v23 += y + vec4(0u, u32(v23.x + y.x < v23.x), 0u, u32(v23.z + y.z < v23.z));
+ d = ((d ^ v23) >> ROTATE_16) | ((d ^ v23).yxwz << ROTATE_16);
+ v89 += d + vec4(0u, u32(v89.x + d.x < v89.x), 0u, u32(v89.z + d.z < v89.z));
+ b = ((b ^ v89) << ROTATE_1) | ((b ^ v89).yxwz >> ROTATE_31);
+
+ v67 = vec4(v67.xy, b.xy);
+ v45 = vec4(b.zw, v45.zw);
+ v1213 = vec4(v1213.xy, d.xy);
+ v1415 = vec4(d.zw, v1415.zw);
/**
* r=4, i=0, a=v[0], b=v[4], c=v[8], d=v[12]
- */
- v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x));
- v12 = (v12 ^ v0).yx;
- v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x));
- v4 = ((v4 ^ v8) >> ROTATE_24) | ((v4 ^ v8).yx << ROTATE_8);
- v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x));
- v0 = v0 + m0 + vec2(0u, u32(v0.x + m0.x < v0.x));
- v12 = ((v12 ^ v0) >> ROTATE_16) | ((v12 ^ v0).yx << ROTATE_16);
- v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x));
- v4 = ((v4 ^ v8) << ROTATE_1) | ((v4 ^ v8).yx >> ROTATE_31);
-
- /**
* r=4, i=1, a=v[1], b=v[5], c=v[9], d=v[13]
*/
- v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x));
- v13 = (v13 ^ v1).yx;
- v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x));
- v5 = ((v5 ^ v9) >> ROTATE_24) | ((v5 ^ v9).yx << ROTATE_8);
- v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x));
- v13 = ((v13 ^ v1) >> ROTATE_16) | ((v13 ^ v1).yx << ROTATE_16);
- v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x));
- v5 = ((v5 ^ v9) << ROTATE_1) | ((v5 ^ v9).yx >> ROTATE_31);
+ // x = VEC4_0;
+ // y = vec4(m0, VEC2_0); // still assigned this value from last assignment at r=3 i=6/7
- /**
- * r=4, i=2, a=v[2], b=v[6], c=v[10], d=v[14]
- */
- v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x));
- v2 = v2 + m2 + vec2(0u, u32(v2.x + m2.x < v2.x));
- v14 = (v14 ^ v2).yx;
- v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x));
- v6 = ((v6 ^ v10) >> ROTATE_24) | ((v6 ^ v10).yx << ROTATE_8);
- v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x));
- v2 = v2 + m4 + vec2(0u, u32(v2.x + m4.x < v2.x));
- v14 = ((v14 ^ v2) >> ROTATE_16) | ((v14 ^ v2).yx << ROTATE_16);
- v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x));
- v6 = ((v6 ^ v10) << ROTATE_1) | ((v6 ^ v10).yx >> ROTATE_31);
+ v01 += v45 + vec4(0u, u32(v01.x + v45.x < v01.x), 0u, u32(v01.z + v45.z < v01.z));
+ // v01 += x + vec4(0u, u32(v01.x + x.x < v01.x), 0u, u32(v01.z + x.z < v01.z));
+ v1213 = (v1213 ^ v01).yxwz;
+ v89 += v1213 + vec4(0u, u32(v89.x + v1213.x < v89.x), 0u, u32(v89.z + v1213.z < v89.z));
+ v45 = ((v45 ^ v89) >> ROTATE_24) | ((v45 ^ v89).yxwz << ROTATE_8);
+ v01 += v45 + vec4(0u, u32(v01.x + v45.x < v01.x), 0u, u32(v01.z + v45.z < v01.z));
+ v01 += y + vec4(0u, u32(v01.x + y.x < v01.x), 0u, u32(v01.z + y.z < v01.z));
+ v1213 = ((v1213 ^ v01) >> ROTATE_16) | ((v1213 ^ v01).yxwz << ROTATE_16);
+ v89 += v1213 + vec4(0u, u32(v89.x + v1213.x < v89.x), 0u, u32(v89.z + v1213.z < v89.z));
+ v45 = ((v45 ^ v89) << ROTATE_1) | ((v45 ^ v89).yxwz >> ROTATE_31);
/**
+ * r=4, i=2, a=v[2], b=v[6], c=v[10], d=v[14]
* r=4, i=3, a=v[3], b=v[7], c=v[11], d=v[15]
*/
- v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x));
- v15 = (v15 ^ v3).yx;
- v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x));
- v7 = ((v7 ^ v11) >> ROTATE_24) | ((v7 ^ v11).yx << ROTATE_8);
- v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x));
- v15 = ((v15 ^ v3) >> ROTATE_16) | ((v15 ^ v3).yx << ROTATE_16);
- v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x));
- v7 = ((v7 ^ v11) << ROTATE_1) | ((v7 ^ v11).yx >> ROTATE_31);
+ x = vec4(m2, VEC2_0);
+ y = vec4(m4, VEC2_0);
- /**
- * r=4, i=4, a=v[0], b=v[5], c=v[10], d=v[15]
- */
- v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x));
- v15 = (v15 ^ v0).yx;
- v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x));
- v5 = ((v5 ^ v10) >> ROTATE_24) | ((v5 ^ v10).yx << ROTATE_8);
- v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x));
- v0 = v0 + m1 + vec2(0u, u32(v0.x + m1.x < v0.x));
- v15 = ((v15 ^ v0) >> ROTATE_16) | ((v15 ^ v0).yx << ROTATE_16);
- v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x));
- v5 = ((v5 ^ v10) << ROTATE_1) | ((v5 ^ v10).yx >> ROTATE_31);
+ v23 += v67 + vec4(0u, u32(v23.x + v67.x < v23.x), 0u, u32(v23.z + v67.z < v23.z));
+ v23 += x + vec4(0u, u32(v23.x + x.x < v23.x), 0u, u32(v23.z + x.z < v23.z));
+ v1415 = (v1415 ^ v23).yxwz;
+ v1011 += v1415 + vec4(0u, u32(v1011.x + v1415.x < v1011.x), 0u, u32(v1011.z + v1415.z < v1011.z));
+ v67 = ((v67 ^ v1011) >> ROTATE_24) | ((v67 ^ v1011).yxwz << ROTATE_8);
+ v23 += v67 + vec4(0u, u32(v23.x + v67.x < v23.x), 0u, u32(v23.z + v67.z < v23.z));
+ v23 += y + vec4(0u, u32(v23.x + y.x < v23.x), 0u, u32(v23.z + y.z < v23.z));
+ v1415 = ((v1415 ^ v23) >> ROTATE_16) | ((v1415 ^ v23).yxwz << ROTATE_16);
+ v1011 += v1415 + vec4(0u, u32(v1011.x + v1415.x < v1011.x), 0u, u32(v1011.z + v1415.z < v1011.z));
+ v67 = ((v67 ^ v1011) << ROTATE_1) | ((v67 ^ v1011).yxwz >> ROTATE_31);
/**
+ * r=4, i=4, a=v[0], b=v[5], c=v[10], d=v[15]
* r=4, i=5, a=v[1], b=v[6], c=v[11], d=v[12]
*/
- v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x));
- v12 = (v12 ^ v1).yx;
- v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x));
- v6 = ((v6 ^ v11) >> ROTATE_24) | ((v6 ^ v11).yx << ROTATE_8);
- v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x));
- v12 = ((v12 ^ v1) >> ROTATE_16) | ((v12 ^ v1).yx << ROTATE_16);
- v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x));
- v6 = ((v6 ^ v11) << ROTATE_1) | ((v6 ^ v11).yx >> ROTATE_31);
+ b = vec4(v45.zw, v67.xy);
+ d = vec4(v1415.zw, v1213.xy);
+ // x = VEC4_0;
+ y = vec4(m1, VEC2_0);
- /**
- * r=4, i=6, a=v[2], b=v[7], c=v[8], d=v[13]
- */
- v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x));
- v13 = (v13 ^ v2).yx;
- v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x));
- v7 = ((v7 ^ v8) >> ROTATE_24) | ((v7 ^ v8).yx << ROTATE_8);
- v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x));
- v13 = ((v13 ^ v2) >> ROTATE_16) | ((v13 ^ v2).yx << ROTATE_16);
- v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x));
- v7 = ((v7 ^ v8) << ROTATE_1) | ((v7 ^ v8).yx >> ROTATE_31);
+ v01 += b + vec4(0u, u32(v01.x + b.x < v01.x), 0u, u32(v01.z + b.z < v01.z));
+ // v01 += x + vec4(0u, u32(v01.x + x.x < v01.x), 0u, u32(v01.z + x.z < v01.z));
+ d = (d ^ v01).yxwz;
+ v1011 += d + vec4(0u, u32(v1011.x + d.x < v1011.x), 0u, u32(v1011.z + d.z < v1011.z));
+ b = ((b ^ v1011) >> ROTATE_24) | ((b ^ v1011).yxwz << ROTATE_8);
+ v01 += b + vec4(0u, u32(v01.x + b.x < v01.x), 0u, u32(v01.z + b.z < v01.z));
+ v01 += y + vec4(0u, u32(v01.x + y.x < v01.x), 0u, u32(v01.z + y.z < v01.z));
+ d = ((d ^ v01) >> ROTATE_16) | ((d ^ v01).yxwz << ROTATE_16);
+ v1011 += d + vec4(0u, u32(v1011.x + d.x < v1011.x), 0u, u32(v1011.z + d.z < v1011.z));
+ b = ((b ^ v1011) << ROTATE_1) | ((b ^ v1011).yxwz >> ROTATE_31);
+
+ v45 = vec4(v45.xy, b.xy);
+ v67 = vec4(b.zw, v67.zw);
+ v1415 = vec4(v1415.xy, d.xy);
+ v1213 = vec4(d.zw, v1213.zw);
/**
+ * r=4, i=6, a=v[2], b=v[7], c=v[8], d=v[13]
* r=4, i=7, a=v[3], b=v[4], c=v[9], d=v[14]
*/
- v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x));
- v3 = v3 + m3 + vec2(0u, u32(v3.x + m3.x < v3.x));
- v14 = (v14 ^ v3).yx;
- v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x));
- v4 = ((v4 ^ v9) >> ROTATE_24) | ((v4 ^ v9).yx << ROTATE_8);
- v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x));
- v14 = ((v14 ^ v3) >> ROTATE_16) | ((v14 ^ v3).yx << ROTATE_16);
- v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x));
- v4 = ((v4 ^ v9) << ROTATE_1) | ((v4 ^ v9).yx >> ROTATE_31);
+ b = vec4(v67.zw, v45.xy);
+ d = vec4(v1213.zw, v1415.xy);
+ x = vec4(VEC2_0, m3);
+ // y = VEC4_0;
+
+ v23 += b + vec4(0u, u32(v23.x + b.x < v23.x), 0u, u32(v23.z + b.z < v23.z));
+ v23 += x + vec4(0u, u32(v23.x + x.x < v23.x), 0u, u32(v23.z + x.z < v23.z));
+ d = (d ^ v23).yxwz;
+ v89 += d + vec4(0u, u32(v89.x + d.x < v89.x), 0u, u32(v89.z + d.z < v89.z));
+ b = ((b ^ v89) >> ROTATE_24) | ((b ^ v89).yxwz << ROTATE_8);
+ v23 += b + vec4(0u, u32(v23.x + b.x < v23.x), 0u, u32(v23.z + b.z < v23.z));
+ // v23 += y + vec4(0u, u32(v23.x + y.x < v23.x), 0u, u32(v23.z + y.z < v23.z));
+ d = ((d ^ v23) >> ROTATE_16) | ((d ^ v23).yxwz << ROTATE_16);
+ v89 += d + vec4(0u, u32(v89.x + d.x < v89.x), 0u, u32(v89.z + d.z < v89.z));
+ b = ((b ^ v89) << ROTATE_1) | ((b ^ v89).yxwz >> ROTATE_31);
+
+ v67 = vec4(v67.xy, b.xy);
+ v45 = vec4(b.zw, v45.zw);
+ v1213 = vec4(v1213.xy, d.xy);
+ v1415 = vec4(d.zw, v1415.zw);
/**
* r=5, i=0, a=v[0], b=v[4], c=v[8], d=v[12]
- */
- v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x));
- v0 = v0 + m2 + vec2(0u, u32(v0.x + m2.x < v0.x));
- v12 = (v12 ^ v0).yx;
- v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x));
- v4 = ((v4 ^ v8) >> ROTATE_24) | ((v4 ^ v8).yx << ROTATE_8);
- v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x));
- v12 = ((v12 ^ v0) >> ROTATE_16) | ((v12 ^ v0).yx << ROTATE_16);
- v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x));
- v4 = ((v4 ^ v8) << ROTATE_1) | ((v4 ^ v8).yx >> ROTATE_31);
-
- /**
* r=5, i=1, a=v[1], b=v[5], c=v[9], d=v[13]
*/
- v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x));
- v13 = (v13 ^ v1).yx;
- v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x));
- v5 = ((v5 ^ v9) >> ROTATE_24) | ((v5 ^ v9).yx << ROTATE_8);
- v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x));
- v13 = ((v13 ^ v1) >> ROTATE_16) | ((v13 ^ v1).yx << ROTATE_16);
- v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x));
- v5 = ((v5 ^ v9) << ROTATE_1) | ((v5 ^ v9).yx >> ROTATE_31);
+ x = vec4(m2, VEC2_0);
+ // y = VEC4_0;
- /**
- * r=5, i=2, a=v[2], b=v[6], c=v[10], d=v[14]
- */
- v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x));
- v2 = v2 + m0 + vec2(0u, u32(v2.x + m0.x < v2.x));
- v14 = (v14 ^ v2).yx;
- v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x));
- v6 = ((v6 ^ v10) >> ROTATE_24) | ((v6 ^ v10).yx << ROTATE_8);
- v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x));
- v14 = ((v14 ^ v2) >> ROTATE_16) | ((v14 ^ v2).yx << ROTATE_16);
- v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x));
- v6 = ((v6 ^ v10) << ROTATE_1) | ((v6 ^ v10).yx >> ROTATE_31);
+ v01 += v45 + vec4(0u, u32(v01.x + v45.x < v01.x), 0u, u32(v01.z + v45.z < v01.z));
+ v01 += x + vec4(0u, u32(v01.x + x.x < v01.x), 0u, u32(v01.z + x.z < v01.z));
+ v1213 = (v1213 ^ v01).yxwz;
+ v89 += v1213 + vec4(0u, u32(v89.x + v1213.x < v89.x), 0u, u32(v89.z + v1213.z < v89.z));
+ v45 = ((v45 ^ v89) >> ROTATE_24) | ((v45 ^ v89).yxwz << ROTATE_8);
+ v01 += v45 + vec4(0u, u32(v01.x + v45.x < v01.x), 0u, u32(v01.z + v45.z < v01.z));
+ // v01 += y + vec4(0u, u32(v01.x + y.x < v01.x), 0u, u32(v01.z + y.z < v01.z));
+ v1213 = ((v1213 ^ v01) >> ROTATE_16) | ((v1213 ^ v01).yxwz << ROTATE_16);
+ v89 += v1213 + vec4(0u, u32(v89.x + v1213.x < v89.x), 0u, u32(v89.z + v1213.z < v89.z));
+ v45 = ((v45 ^ v89) << ROTATE_1) | ((v45 ^ v89).yxwz >> ROTATE_31);
/**
+ * r=5, i=2, a=v[2], b=v[6], c=v[10], d=v[14]
* r=5, i=3, a=v[3], b=v[7], c=v[11], d=v[15]
*/
- v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x));
- v15 = (v15 ^ v3).yx;
- v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x));
- v7 = ((v7 ^ v11) >> ROTATE_24) | ((v7 ^ v11).yx << ROTATE_8);
- v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x));
- v3 = v3 + m3 + vec2(0u, u32(v3.x + m3.x < v3.x));
- v15 = ((v15 ^ v3) >> ROTATE_16) | ((v15 ^ v3).yx << ROTATE_16);
- v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x));
- v7 = ((v7 ^ v11) << ROTATE_1) | ((v7 ^ v11).yx >> ROTATE_31);
+ x = vec4(m0, VEC2_0);
+ y = vec4(VEC2_0, m3);
- /**
- * r=5, i=4, a=v[0], b=v[5], c=v[10], d=v[15]
- */
- v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x));
- v0 = v0 + m4 + vec2(0u, u32(v0.x + m4.x < v0.x));
- v15 = (v15 ^ v0).yx;
- v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x));
- v5 = ((v5 ^ v10) >> ROTATE_24) | ((v5 ^ v10).yx << ROTATE_8);
- v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x));
- v15 = ((v15 ^ v0) >> ROTATE_16) | ((v15 ^ v0).yx << ROTATE_16);
- v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x));
- v5 = ((v5 ^ v10) << ROTATE_1) | ((v5 ^ v10).yx >> ROTATE_31);
+ v23 += v67 + vec4(0u, u32(v23.x + v67.x < v23.x), 0u, u32(v23.z + v67.z < v23.z));
+ v23 += x + vec4(0u, u32(v23.x + x.x < v23.x), 0u, u32(v23.z + x.z < v23.z));
+ v1415 = (v1415 ^ v23).yxwz;
+ v1011 += v1415 + vec4(0u, u32(v1011.x + v1415.x < v1011.x), 0u, u32(v1011.z + v1415.z < v1011.z));
+ v67 = ((v67 ^ v1011) >> ROTATE_24) | ((v67 ^ v1011).yxwz << ROTATE_8);
+ v23 += v67 + vec4(0u, u32(v23.x + v67.x < v23.x), 0u, u32(v23.z + v67.z < v23.z));
+ v23 += y + vec4(0u, u32(v23.x + y.x < v23.x), 0u, u32(v23.z + y.z < v23.z));
+ v1415 = ((v1415 ^ v23) >> ROTATE_16) | ((v1415 ^ v23).yxwz << ROTATE_16);
+ v1011 += v1415 + vec4(0u, u32(v1011.x + v1415.x < v1011.x), 0u, u32(v1011.z + v1415.z < v1011.z));
+ v67 = ((v67 ^ v1011) << ROTATE_1) | ((v67 ^ v1011).yxwz >> ROTATE_31);
/**
+ * r=5, i=4, a=v[0], b=v[5], c=v[10], d=v[15]
* r=5, i=5, a=v[1], b=v[6], c=v[11], d=v[12]
*/
- v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x));
- v12 = (v12 ^ v1).yx;
- v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x));
- v6 = ((v6 ^ v11) >> ROTATE_24) | ((v6 ^ v11).yx << ROTATE_8);
- v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x));
- v12 = ((v12 ^ v1) >> ROTATE_16) | ((v12 ^ v1).yx << ROTATE_16);
- v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x));
- v6 = ((v6 ^ v11) << ROTATE_1) | ((v6 ^ v11).yx >> ROTATE_31);
+ b = vec4(v45.zw, v67.xy);
+ d = vec4(v1415.zw, v1213.xy);
+ x = vec4(m4, VEC2_0);
+ // y = VEC4_0;
- /**
- * r=5, i=6, a=v[2], b=v[7], c=v[8], d=v[13]
- */
- v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x));
- v13 = (v13 ^ v2).yx;
- v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x));
- v7 = ((v7 ^ v8) >> ROTATE_24) | ((v7 ^ v8).yx << ROTATE_8);
- v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x));
- v13 = ((v13 ^ v2) >> ROTATE_16) | ((v13 ^ v2).yx << ROTATE_16);
- v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x));
- v7 = ((v7 ^ v8) << ROTATE_1) | ((v7 ^ v8).yx >> ROTATE_31);
+ v01 += b + vec4(0u, u32(v01.x + b.x < v01.x), 0u, u32(v01.z + b.z < v01.z));
+ v01 += x + vec4(0u, u32(v01.x + x.x < v01.x), 0u, u32(v01.z + x.z < v01.z));
+ d = (d ^ v01).yxwz;
+ v1011 += d + vec4(0u, u32(v1011.x + d.x < v1011.x), 0u, u32(v1011.z + d.z < v1011.z));
+ b = ((b ^ v1011) >> ROTATE_24) | ((b ^ v1011).yxwz << ROTATE_8);
+ v01 += b + vec4(0u, u32(v01.x + b.x < v01.x), 0u, u32(v01.z + b.z < v01.z));
+ // v01 += y + vec4(0u, u32(v01.x + y.x < v01.x), 0u, u32(v01.z + y.z < v01.z));
+ d = ((d ^ v01) >> ROTATE_16) | ((d ^ v01).yxwz << ROTATE_16);
+ v1011 += d + vec4(0u, u32(v1011.x + d.x < v1011.x), 0u, u32(v1011.z + d.z < v1011.z));
+ b = ((b ^ v1011) << ROTATE_1) | ((b ^ v1011).yxwz >> ROTATE_31);
+
+ v45 = vec4(v45.xy, b.xy);
+ v67 = vec4(b.zw, v67.zw);
+ v1415 = vec4(v1415.xy, d.xy);
+ v1213 = vec4(d.zw, v1213.zw);
/**
+ * r=5, i=6, a=v[2], b=v[7], c=v[8], d=v[13]
* r=5, i=7, a=v[3], b=v[4], c=v[9], d=v[14]
*/
- v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x));
- v3 = v3 + m1 + vec2(0u, u32(v3.x + m1.x < v3.x));
- v14 = (v14 ^ v3).yx;
- v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x));
- v4 = ((v4 ^ v9) >> ROTATE_24) | ((v4 ^ v9).yx << ROTATE_8);
- v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x));
- v14 = ((v14 ^ v3) >> ROTATE_16) | ((v14 ^ v3).yx << ROTATE_16);
- v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x));
- v4 = ((v4 ^ v9) << ROTATE_1) | ((v4 ^ v9).yx >> ROTATE_31);
+ b = vec4(v67.zw, v45.xy);
+ d = vec4(v1213.zw, v1415.xy);
+ x = vec4(VEC2_0, m1);
+ // y = VEC4_0;
+
+ v23 += b + vec4(0u, u32(v23.x + b.x < v23.x), 0u, u32(v23.z + b.z < v23.z));
+ v23 += x + vec4(0u, u32(v23.x + x.x < v23.x), 0u, u32(v23.z + x.z < v23.z));
+ d = (d ^ v23).yxwz;
+ v89 += d + vec4(0u, u32(v89.x + d.x < v89.x), 0u, u32(v89.z + d.z < v89.z));
+ b = ((b ^ v89) >> ROTATE_24) | ((b ^ v89).yxwz << ROTATE_8);
+ v23 += b + vec4(0u, u32(v23.x + b.x < v23.x), 0u, u32(v23.z + b.z < v23.z));
+ // v23 += y + vec4(0u, u32(v23.x + y.x < v23.x), 0u, u32(v23.z + y.z < v23.z));
+ d = ((d ^ v23) >> ROTATE_16) | ((d ^ v23).yxwz << ROTATE_16);
+ v89 += d + vec4(0u, u32(v89.x + d.x < v89.x), 0u, u32(v89.z + d.z < v89.z));
+ b = ((b ^ v89) << ROTATE_1) | ((b ^ v89).yxwz >> ROTATE_31);
+
+ v67 = vec4(v67.xy, b.xy);
+ v45 = vec4(b.zw, v45.zw);
+ v1213 = vec4(v1213.xy, d.xy);
+ v1415 = vec4(d.zw, v1415.zw);
/**
* r=6, i=0, a=v[0], b=v[4], c=v[8], d=v[12]
- */
- v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x));
- v12 = (v12 ^ v0).yx;
- v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x));
- v4 = ((v4 ^ v8) >> ROTATE_24) | ((v4 ^ v8).yx << ROTATE_8);
- v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x));
- v12 = ((v12 ^ v0) >> ROTATE_16) | ((v12 ^ v0).yx << ROTATE_16);
- v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x));
- v4 = ((v4 ^ v8) << ROTATE_1) | ((v4 ^ v8).yx >> ROTATE_31);
-
- /**
* r=6, i=1, a=v[1], b=v[5], c=v[9], d=v[13]
*/
- v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x));
- v1 = v1 + m1 + vec2(0u, u32(v1.x + m1.x < v1.x));
- v13 = (v13 ^ v1).yx;
- v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x));
- v5 = ((v5 ^ v9) >> ROTATE_24) | ((v5 ^ v9).yx << ROTATE_8);
- v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x));
- v13 = ((v13 ^ v1) >> ROTATE_16) | ((v13 ^ v1).yx << ROTATE_16);
- v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x));
- v5 = ((v5 ^ v9) << ROTATE_1) | ((v5 ^ v9).yx >> ROTATE_31);
+ // x = vec4(VEC2_0, m1); // still assigned this value from last assignment at r=5 i=6/7
+ // y = VEC4_0;
- /**
- * r=6, i=2, a=v[2], b=v[6], c=v[10], d=v[14]
- */
- v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x));
- v14 = (v14 ^ v2).yx;
- v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x));
- v6 = ((v6 ^ v10) >> ROTATE_24) | ((v6 ^ v10).yx << ROTATE_8);
- v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x));
- v14 = ((v14 ^ v2) >> ROTATE_16) | ((v14 ^ v2).yx << ROTATE_16);
- v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x));
- v6 = ((v6 ^ v10) << ROTATE_1) | ((v6 ^ v10).yx >> ROTATE_31);
+ v01 += v45 + vec4(0u, u32(v01.x + v45.x < v01.x), 0u, u32(v01.z + v45.z < v01.z));
+ v01 += x + vec4(0u, u32(v01.x + x.x < v01.x), 0u, u32(v01.z + x.z < v01.z));
+ v1213 = (v1213 ^ v01).yxwz;
+ v89 += v1213 + vec4(0u, u32(v89.x + v1213.x < v89.x), 0u, u32(v89.z + v1213.z < v89.z));
+ v45 = ((v45 ^ v89) >> ROTATE_24) | ((v45 ^ v89).yxwz << ROTATE_8);
+ v01 += v45 + vec4(0u, u32(v01.x + v45.x < v01.x), 0u, u32(v01.z + v45.z < v01.z));
+ // v01 += y + vec4(0u, u32(v01.x + y.x < v01.x), 0u, u32(v01.z + y.z < v01.z));
+ v1213 = ((v1213 ^ v01) >> ROTATE_16) | ((v1213 ^ v01).yxwz << ROTATE_16);
+ v89 += v1213 + vec4(0u, u32(v89.x + v1213.x < v89.x), 0u, u32(v89.z + v1213.z < v89.z));
+ v45 = ((v45 ^ v89) << ROTATE_1) | ((v45 ^ v89).yxwz >> ROTATE_31);
/**
+ * r=6, i=2, a=v[2], b=v[6], c=v[10], d=v[14]
* r=6, i=3, a=v[3], b=v[7], c=v[11], d=v[15]
*/
- v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x));
- v3 = v3 + m4 + vec2(0u, u32(v3.x + m4.x < v3.x));
- v15 = (v15 ^ v3).yx;
- v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x));
- v7 = ((v7 ^ v11) >> ROTATE_24) | ((v7 ^ v11).yx << ROTATE_8);
- v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x));
- v15 = ((v15 ^ v3) >> ROTATE_16) | ((v15 ^ v3).yx << ROTATE_16);
- v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x));
- v7 = ((v7 ^ v11) << ROTATE_1) | ((v7 ^ v11).yx >> ROTATE_31);
+ x = vec4(VEC2_0, m4);
+ // y = VEC4_0;
- /**
- * r=6, i=4, a=v[0], b=v[5], c=v[10], d=v[15]
- */
- v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x));
- v0 = v0 + m0 + vec2(0u, u32(v0.x + m0.x < v0.x));
- v15 = (v15 ^ v0).yx;
- v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x));
- v5 = ((v5 ^ v10) >> ROTATE_24) | ((v5 ^ v10).yx << ROTATE_8);
- v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x));
- v15 = ((v15 ^ v0) >> ROTATE_16) | ((v15 ^ v0).yx << ROTATE_16);
- v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x));
- v5 = ((v5 ^ v10) << ROTATE_1) | ((v5 ^ v10).yx >> ROTATE_31);
+ v23 += v67 + vec4(0u, u32(v23.x + v67.x < v23.x), 0u, u32(v23.z + v67.z < v23.z));
+ v23 += x + vec4(0u, u32(v23.x + x.x < v23.x), 0u, u32(v23.z + x.z < v23.z));
+ v1415 = (v1415 ^ v23).yxwz;
+ v1011 += v1415 + vec4(0u, u32(v1011.x + v1415.x < v1011.x), 0u, u32(v1011.z + v1415.z < v1011.z));
+ v67 = ((v67 ^ v1011) >> ROTATE_24) | ((v67 ^ v1011).yxwz << ROTATE_8);
+ v23 += v67 + vec4(0u, u32(v23.x + v67.x < v23.x), 0u, u32(v23.z + v67.z < v23.z));
+ // v23 += y + vec4(0u, u32(v23.x + y.x < v23.x), 0u, u32(v23.z + y.z < v23.z));
+ v1415 = ((v1415 ^ v23) >> ROTATE_16) | ((v1415 ^ v23).yxwz << ROTATE_16);
+ v1011 += v1415 + vec4(0u, u32(v1011.x + v1415.x < v1011.x), 0u, u32(v1011.z + v1415.z < v1011.z));
+ v67 = ((v67 ^ v1011) << ROTATE_1) | ((v67 ^ v1011).yxwz >> ROTATE_31);
/**
+ * r=6, i=4, a=v[0], b=v[5], c=v[10], d=v[15]
* r=6, i=5, a=v[1], b=v[6], c=v[11], d=v[12]
*/
- v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x));
- v12 = (v12 ^ v1).yx;
- v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x));
- v6 = ((v6 ^ v11) >> ROTATE_24) | ((v6 ^ v11).yx << ROTATE_8);
- v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x));
- v1 = v1 + m3 + vec2(0u, u32(v1.x + m3.x < v1.x));
- v12 = ((v12 ^ v1) >> ROTATE_16) | ((v12 ^ v1).yx << ROTATE_16);
- v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x));
- v6 = ((v6 ^ v11) << ROTATE_1) | ((v6 ^ v11).yx >> ROTATE_31);
+ b = vec4(v45.zw, v67.xy);
+ d = vec4(v1415.zw, v1213.xy);
+ x = vec4(m0, VEC2_0);
+ // y = vec4(VEC2_0, m3); // still assigned this value from last assignment at r=5 i=4/5
- /**
- * r=6, i=6, a=v[2], b=v[7], c=v[8], d=v[13]
- */
- v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x));
- v13 = (v13 ^ v2).yx;
- v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x));
- v7 = ((v7 ^ v8) >> ROTATE_24) | ((v7 ^ v8).yx << ROTATE_8);
- v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x));
- v2 = v2 + m2 + vec2(0u, u32(v2.x + m2.x < v2.x));
- v13 = ((v13 ^ v2) >> ROTATE_16) | ((v13 ^ v2).yx << ROTATE_16);
- v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x));
- v7 = ((v7 ^ v8) << ROTATE_1) | ((v7 ^ v8).yx >> ROTATE_31);
+ v01 += b + vec4(0u, u32(v01.x + b.x < v01.x), 0u, u32(v01.z + b.z < v01.z));
+ v01 += x + vec4(0u, u32(v01.x + x.x < v01.x), 0u, u32(v01.z + x.z < v01.z));
+ d = (d ^ v01).yxwz;
+ v1011 += d + vec4(0u, u32(v1011.x + d.x < v1011.x), 0u, u32(v1011.z + d.z < v1011.z));
+ b = ((b ^ v1011) >> ROTATE_24) | ((b ^ v1011).yxwz << ROTATE_8);
+ v01 += b + vec4(0u, u32(v01.x + b.x < v01.x), 0u, u32(v01.z + b.z < v01.z));
+ v01 += y + vec4(0u, u32(v01.x + y.x < v01.x), 0u, u32(v01.z + y.z < v01.z));
+ d = ((d ^ v01) >> ROTATE_16) | ((d ^ v01).yxwz << ROTATE_16);
+ v1011 += d + vec4(0u, u32(v1011.x + d.x < v1011.x), 0u, u32(v1011.z + d.z < v1011.z));
+ b = ((b ^ v1011) << ROTATE_1) | ((b ^ v1011).yxwz >> ROTATE_31);
+
+ v45 = vec4(v45.xy, b.xy);
+ v67 = vec4(b.zw, v67.zw);
+ v1415 = vec4(v1415.xy, d.xy);
+ v1213 = vec4(d.zw, v1213.zw);
/**
+ * r=6, i=6, a=v[2], b=v[7], c=v[8], d=v[13]
* r=6, i=7, a=v[3], b=v[4], c=v[9], d=v[14]
*/
- v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x));
- v14 = (v14 ^ v3).yx;
- v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x));
- v4 = ((v4 ^ v9) >> ROTATE_24) | ((v4 ^ v9).yx << ROTATE_8);
- v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x));
- v14 = ((v14 ^ v3) >> ROTATE_16) | ((v14 ^ v3).yx << ROTATE_16);
- v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x));
- v4 = ((v4 ^ v9) << ROTATE_1) | ((v4 ^ v9).yx >> ROTATE_31);
+ b = vec4(v67.zw, v45.xy);
+ d = vec4(v1213.zw, v1415.xy);
+ // x = VEC4_0;
+ y = vec4(m2, VEC2_0);
+
+ v23 += b + vec4(0u, u32(v23.x + b.x < v23.x), 0u, u32(v23.z + b.z < v23.z));
+ // v23 += x + vec4(0u, u32(v23.x + x.x < v23.x), 0u, u32(v23.z + x.z < v23.z));
+ d = (d ^ v23).yxwz;
+ v89 += d + vec4(0u, u32(v89.x + d.x < v89.x), 0u, u32(v89.z + d.z < v89.z));
+ b = ((b ^ v89) >> ROTATE_24) | ((b ^ v89).yxwz << ROTATE_8);
+ v23 += b + vec4(0u, u32(v23.x + b.x < v23.x), 0u, u32(v23.z + b.z < v23.z));
+ v23 += y + vec4(0u, u32(v23.x + y.x < v23.x), 0u, u32(v23.z + y.z < v23.z));
+ d = ((d ^ v23) >> ROTATE_16) | ((d ^ v23).yxwz << ROTATE_16);
+ v89 += d + vec4(0u, u32(v89.x + d.x < v89.x), 0u, u32(v89.z + d.z < v89.z));
+ b = ((b ^ v89) << ROTATE_1) | ((b ^ v89).yxwz >> ROTATE_31);
+
+ v67 = vec4(v67.xy, b.xy);
+ v45 = vec4(b.zw, v45.zw);
+ v1213 = vec4(v1213.xy, d.xy);
+ v1415 = vec4(d.zw, v1415.zw);
/**
* r=7, i=0, a=v[0], b=v[4], c=v[8], d=v[12]
- */
- v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x));
- v12 = (v12 ^ v0).yx;
- v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x));
- v4 = ((v4 ^ v8) >> ROTATE_24) | ((v4 ^ v8).yx << ROTATE_8);
- v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x));
- v12 = ((v12 ^ v0) >> ROTATE_16) | ((v12 ^ v0).yx << ROTATE_16);
- v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x));
- v4 = ((v4 ^ v8) << ROTATE_1) | ((v4 ^ v8).yx >> ROTATE_31);
-
- /**
* r=7, i=1, a=v[1], b=v[5], c=v[9], d=v[13]
*/
- v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x));
- v13 = (v13 ^ v1).yx;
- v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x));
- v5 = ((v5 ^ v9) >> ROTATE_24) | ((v5 ^ v9).yx << ROTATE_8);
- v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x));
- v13 = ((v13 ^ v1) >> ROTATE_16) | ((v13 ^ v1).yx << ROTATE_16);
- v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x));
- v5 = ((v5 ^ v9) << ROTATE_1) | ((v5 ^ v9).yx >> ROTATE_31);
+ // x = VEC4_0;
+ // y = VEC4_0;
- /**
- * r=7, i=2, a=v[2], b=v[6], c=v[10], d=v[14]
- */
- v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x));
- v14 = (v14 ^ v2).yx;
- v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x));
- v6 = ((v6 ^ v10) >> ROTATE_24) | ((v6 ^ v10).yx << ROTATE_8);
- v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x));
- v2 = v2 + m1 + vec2(0u, u32(v2.x + m1.x < v2.x));
- v14 = ((v14 ^ v2) >> ROTATE_16) | ((v14 ^ v2).yx << ROTATE_16);
- v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x));
- v6 = ((v6 ^ v10) << ROTATE_1) | ((v6 ^ v10).yx >> ROTATE_31);
+ v01 += v45 + vec4(0u, u32(v01.x + v45.x < v01.x), 0u, u32(v01.z + v45.z < v01.z));
+ // v01 += x + vec4(0u, u32(v01.x + x.x < v01.x), 0u, u32(v01.z + x.z < v01.z));
+ v1213 = (v1213 ^ v01).yxwz;
+ v89 += v1213 + vec4(0u, u32(v89.x + v1213.x < v89.x), 0u, u32(v89.z + v1213.z < v89.z));
+ v45 = ((v45 ^ v89) >> ROTATE_24) | ((v45 ^ v89).yxwz << ROTATE_8);
+ v01 += v45 + vec4(0u, u32(v01.x + v45.x < v01.x), 0u, u32(v01.z + v45.z < v01.z));
+ // v01 += y + vec4(0u, u32(v01.x + y.x < v01.x), 0u, u32(v01.z + y.z < v01.z));
+ v1213 = ((v1213 ^ v01) >> ROTATE_16) | ((v1213 ^ v01).yxwz << ROTATE_16);
+ v89 += v1213 + vec4(0u, u32(v89.x + v1213.x < v89.x), 0u, u32(v89.z + v1213.z < v89.z));
+ v45 = ((v45 ^ v89) << ROTATE_1) | ((v45 ^ v89).yxwz >> ROTATE_31);
/**
+ * r=7, i=2, a=v[2], b=v[6], c=v[10], d=v[14]
* r=7, i=3, a=v[3], b=v[7], c=v[11], d=v[15]
*/
- v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x));
- v3 = v3 + m3 + vec2(0u, u32(v3.x + m3.x < v3.x));
- v15 = (v15 ^ v3).yx;
- v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x));
- v7 = ((v7 ^ v11) >> ROTATE_24) | ((v7 ^ v11).yx << ROTATE_8);
- v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x));
- v15 = ((v15 ^ v3) >> ROTATE_16) | ((v15 ^ v3).yx << ROTATE_16);
- v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x));
- v7 = ((v7 ^ v11) << ROTATE_1) | ((v7 ^ v11).yx >> ROTATE_31);
+ x = vec4(VEC2_0, m3);
+ y = vec4(m1, VEC2_0);
- /**
- * r=7, i=4, a=v[0], b=v[5], c=v[10], d=v[15]
- */
- v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x));
- v15 = (v15 ^ v0).yx;
- v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x));
- v5 = ((v5 ^ v10) >> ROTATE_24) | ((v5 ^ v10).yx << ROTATE_8);
- v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x));
- v0 = v0 + m0 + vec2(0u, u32(v0.x + m0.x < v0.x));
- v15 = ((v15 ^ v0) >> ROTATE_16) | ((v15 ^ v0).yx << ROTATE_16);
- v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x));
- v5 = ((v5 ^ v10) << ROTATE_1) | ((v5 ^ v10).yx >> ROTATE_31);
+ v23 += v67 + vec4(0u, u32(v23.x + v67.x < v23.x), 0u, u32(v23.z + v67.z < v23.z));
+ v23 += x + vec4(0u, u32(v23.x + x.x < v23.x), 0u, u32(v23.z + x.z < v23.z));
+ v1415 = (v1415 ^ v23).yxwz;
+ v1011 += v1415 + vec4(0u, u32(v1011.x + v1415.x < v1011.x), 0u, u32(v1011.z + v1415.z < v1011.z));
+ v67 = ((v67 ^ v1011) >> ROTATE_24) | ((v67 ^ v1011).yxwz << ROTATE_8);
+ v23 += v67 + vec4(0u, u32(v23.x + v67.x < v23.x), 0u, u32(v23.z + v67.z < v23.z));
+ v23 += y + vec4(0u, u32(v23.x + y.x < v23.x), 0u, u32(v23.z + y.z < v23.z));
+ v1415 = ((v1415 ^ v23) >> ROTATE_16) | ((v1415 ^ v23).yxwz << ROTATE_16);
+ v1011 += v1415 + vec4(0u, u32(v1011.x + v1415.x < v1011.x), 0u, u32(v1011.z + v1415.z < v1011.z));
+ v67 = ((v67 ^ v1011) << ROTATE_1) | ((v67 ^ v1011).yxwz >> ROTATE_31);
/**
+ * r=7, i=4, a=v[0], b=v[5], c=v[10], d=v[15]
* r=7, i=5, a=v[1], b=v[6], c=v[11], d=v[12]
*/
- v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x));
- v12 = (v12 ^ v1).yx;
- v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x));
- v6 = ((v6 ^ v11) >> ROTATE_24) | ((v6 ^ v11).yx << ROTATE_8);
- v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x));
- v1 = v1 + m4 + vec2(0u, u32(v1.x + m4.x < v1.x));
- v12 = ((v12 ^ v1) >> ROTATE_16) | ((v12 ^ v1).yx << ROTATE_16);
- v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x));
- v6 = ((v6 ^ v11) << ROTATE_1) | ((v6 ^ v11).yx >> ROTATE_31);
+ b = vec4(v45.zw, v67.xy);
+ d = vec4(v1415.zw, v1213.xy);
+ // x = VEC4_0;
+ y = vec4(m0, m4);
- /**
- * r=7, i=6, a=v[2], b=v[7], c=v[8], d=v[13]
- */
- v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x));
- v13 = (v13 ^ v2).yx;
- v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x));
- v7 = ((v7 ^ v8) >> ROTATE_24) | ((v7 ^ v8).yx << ROTATE_8);
- v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x));
- v13 = ((v13 ^ v2) >> ROTATE_16) | ((v13 ^ v2).yx << ROTATE_16);
- v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x));
- v7 = ((v7 ^ v8) << ROTATE_1) | ((v7 ^ v8).yx >> ROTATE_31);
+ v01 += b + vec4(0u, u32(v01.x + b.x < v01.x), 0u, u32(v01.z + b.z < v01.z));
+ // v01 += x + vec4(0u, u32(v01.x + x.x < v01.x), 0u, u32(v01.z + x.z < v01.z));
+ d = (d ^ v01).yxwz;
+ v1011 += d + vec4(0u, u32(v1011.x + d.x < v1011.x), 0u, u32(v1011.z + d.z < v1011.z));
+ b = ((b ^ v1011) >> ROTATE_24) | ((b ^ v1011).yxwz << ROTATE_8);
+ v01 += b + vec4(0u, u32(v01.x + b.x < v01.x), 0u, u32(v01.z + b.z < v01.z));
+ v01 += y + vec4(0u, u32(v01.x + y.x < v01.x), 0u, u32(v01.z + y.z < v01.z));
+ d = ((d ^ v01) >> ROTATE_16) | ((d ^ v01).yxwz << ROTATE_16);
+ v1011 += d + vec4(0u, u32(v1011.x + d.x < v1011.x), 0u, u32(v1011.z + d.z < v1011.z));
+ b = ((b ^ v1011) << ROTATE_1) | ((b ^ v1011).yxwz >> ROTATE_31);
+
+ v45 = vec4(v45.xy, b.xy);
+ v67 = vec4(b.zw, v67.zw);
+ v1415 = vec4(v1415.xy, d.xy);
+ v1213 = vec4(d.zw, v1213.zw);
/**
+ * r=7, i=6, a=v[2], b=v[7], c=v[8], d=v[13]
* r=7, i=7, a=v[3], b=v[4], c=v[9], d=v[14]
*/
- v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x));
- v3 = v3 + m2 + vec2(0u, u32(v3.x + m2.x < v3.x));
- v14 = (v14 ^ v3).yx;
- v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x));
- v4 = ((v4 ^ v9) >> ROTATE_24) | ((v4 ^ v9).yx << ROTATE_8);
- v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x));
- v14 = ((v14 ^ v3) >> ROTATE_16) | ((v14 ^ v3).yx << ROTATE_16);
- v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x));
- v4 = ((v4 ^ v9) << ROTATE_1) | ((v4 ^ v9).yx >> ROTATE_31);
+ b = vec4(v67.zw, v45.xy);
+ d = vec4(v1213.zw, v1415.xy);
+ x = vec4(VEC2_0, m2);
+ // y = VEC4_0;
+
+ v23 += b + vec4(0u, u32(v23.x + b.x < v23.x), 0u, u32(v23.z + b.z < v23.z));
+ v23 += x + vec4(0u, u32(v23.x + x.x < v23.x), 0u, u32(v23.z + x.z < v23.z));
+ d = (d ^ v23).yxwz;
+ v89 += d + vec4(0u, u32(v89.x + d.x < v89.x), 0u, u32(v89.z + d.z < v89.z));
+ b = ((b ^ v89) >> ROTATE_24) | ((b ^ v89).yxwz << ROTATE_8);
+ v23 += b + vec4(0u, u32(v23.x + b.x < v23.x), 0u, u32(v23.z + b.z < v23.z));
+ // v23 += y + vec4(0u, u32(v23.x + y.x < v23.x), 0u, u32(v23.z + y.z < v23.z));
+ d = ((d ^ v23) >> ROTATE_16) | ((d ^ v23).yxwz << ROTATE_16);
+ v89 += d + vec4(0u, u32(v89.x + d.x < v89.x), 0u, u32(v89.z + d.z < v89.z));
+ b = ((b ^ v89) << ROTATE_1) | ((b ^ v89).yxwz >> ROTATE_31);
+
+ v67 = vec4(v67.xy, b.xy);
+ v45 = vec4(b.zw, v45.zw);
+ v1213 = vec4(v1213.xy, d.xy);
+ v1415 = vec4(d.zw, v1415.zw);
/**
* r=8, i=0, a=v[0], b=v[4], c=v[8], d=v[12]
- */
- v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x));
- v12 = (v12 ^ v0).yx;
- v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x));
- v4 = ((v4 ^ v8) >> ROTATE_24) | ((v4 ^ v8).yx << ROTATE_8);
- v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x));
- v12 = ((v12 ^ v0) >> ROTATE_16) | ((v12 ^ v0).yx << ROTATE_16);
- v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x));
- v4 = ((v4 ^ v8) << ROTATE_1) | ((v4 ^ v8).yx >> ROTATE_31);
-
- /**
* r=8, i=1, a=v[1], b=v[5], c=v[9], d=v[13]
*/
- v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x));
- v13 = (v13 ^ v1).yx;
- v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x));
- v5 = ((v5 ^ v9) >> ROTATE_24) | ((v5 ^ v9).yx << ROTATE_8);
- v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x));
- v13 = ((v13 ^ v1) >> ROTATE_16) | ((v13 ^ v1).yx << ROTATE_16);
- v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x));
- v5 = ((v5 ^ v9) << ROTATE_1) | ((v5 ^ v9).yx >> ROTATE_31);
+ // x = VEC4_0;
+ // y = VEC4_0;
- /**
- * r=8, i=2, a=v[2], b=v[6], c=v[10], d=v[14]
- */
- v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x));
- v14 = (v14 ^ v2).yx;
- v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x));
- v6 = ((v6 ^ v10) >> ROTATE_24) | ((v6 ^ v10).yx << ROTATE_8);
- v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x));
- v2 = v2 + m3 + vec2(0u, u32(v2.x + m3.x < v2.x));
- v14 = ((v14 ^ v2) >> ROTATE_16) | ((v14 ^ v2).yx << ROTATE_16);
- v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x));
- v6 = ((v6 ^ v10) << ROTATE_1) | ((v6 ^ v10).yx >> ROTATE_31);
+ v01 += v45 + vec4(0u, u32(v01.x + v45.x < v01.x), 0u, u32(v01.z + v45.z < v01.z));
+ // v01 += x + vec4(0u, u32(v01.x + x.x < v01.x), 0u, u32(v01.z + x.z < v01.z));
+ v1213 = (v1213 ^ v01).yxwz;
+ v89 += v1213 + vec4(0u, u32(v89.x + v1213.x < v89.x), 0u, u32(v89.z + v1213.z < v89.z));
+ v45 = ((v45 ^ v89) >> ROTATE_24) | ((v45 ^ v89).yxwz << ROTATE_8);
+ v01 += v45 + vec4(0u, u32(v01.x + v45.x < v01.x), 0u, u32(v01.z + v45.z < v01.z));
+ // v01 += y + vec4(0u, u32(v01.x + y.x < v01.x), 0u, u32(v01.z + y.z < v01.z));
+ v1213 = ((v1213 ^ v01) >> ROTATE_16) | ((v1213 ^ v01).yxwz << ROTATE_16);
+ v89 += v1213 + vec4(0u, u32(v89.x + v1213.x < v89.x), 0u, u32(v89.z + v1213.z < v89.z));
+ v45 = ((v45 ^ v89) << ROTATE_1) | ((v45 ^ v89).yxwz >> ROTATE_31);
/**
+ * r=8, i=2, a=v[2], b=v[6], c=v[10], d=v[14]
* r=8, i=3, a=v[3], b=v[7], c=v[11], d=v[15]
*/
- v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x));
- v3 = v3 + m0 + vec2(0u, u32(v3.x + m0.x < v3.x));
- v15 = (v15 ^ v3).yx;
- v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x));
- v7 = ((v7 ^ v11) >> ROTATE_24) | ((v7 ^ v11).yx << ROTATE_8);
- v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x));
- v15 = ((v15 ^ v3) >> ROTATE_16) | ((v15 ^ v3).yx << ROTATE_16);
- v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x));
- v7 = ((v7 ^ v11) << ROTATE_1) | ((v7 ^ v11).yx >> ROTATE_31);
+ x = vec4(VEC2_0, m0);
+ y = vec4(m3, VEC2_0);
- /**
- * r=8, i=4, a=v[0], b=v[5], c=v[10], d=v[15]
- */
- v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x));
- v15 = (v15 ^ v0).yx;
- v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x));
- v5 = ((v5 ^ v10) >> ROTATE_24) | ((v5 ^ v10).yx << ROTATE_8);
- v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x));
- v0 = v0 + m2 + vec2(0u, u32(v0.x + m2.x < v0.x));
- v15 = ((v15 ^ v0) >> ROTATE_16) | ((v15 ^ v0).yx << ROTATE_16);
- v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x));
- v5 = ((v5 ^ v10) << ROTATE_1) | ((v5 ^ v10).yx >> ROTATE_31);
+ v23 += v67 + vec4(0u, u32(v23.x + v67.x < v23.x), 0u, u32(v23.z + v67.z < v23.z));
+ v23 += x + vec4(0u, u32(v23.x + x.x < v23.x), 0u, u32(v23.z + x.z < v23.z));
+ v1415 = (v1415 ^ v23).yxwz;
+ v1011 += v1415 + vec4(0u, u32(v1011.x + v1415.x < v1011.x), 0u, u32(v1011.z + v1415.z < v1011.z));
+ v67 = ((v67 ^ v1011) >> ROTATE_24) | ((v67 ^ v1011).yxwz << ROTATE_8);
+ v23 += v67 + vec4(0u, u32(v23.x + v67.x < v23.x), 0u, u32(v23.z + v67.z < v23.z));
+ v23 += y + vec4(0u, u32(v23.x + y.x < v23.x), 0u, u32(v23.z + y.z < v23.z));
+ v1415 = ((v1415 ^ v23) >> ROTATE_16) | ((v1415 ^ v23).yxwz << ROTATE_16);
+ v1011 += v1415 + vec4(0u, u32(v1011.x + v1415.x < v1011.x), 0u, u32(v1011.z + v1415.z < v1011.z));
+ v67 = ((v67 ^ v1011) << ROTATE_1) | ((v67 ^ v1011).yxwz >> ROTATE_31);
/**
+ * r=8, i=4, a=v[0], b=v[5], c=v[10], d=v[15]
* r=8, i=5, a=v[1], b=v[6], c=v[11], d=v[12]
*/
- v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x));
- v12 = (v12 ^ v1).yx;
- v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x));
- v6 = ((v6 ^ v11) >> ROTATE_24) | ((v6 ^ v11).yx << ROTATE_8);
- v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x));
- v12 = ((v12 ^ v1) >> ROTATE_16) | ((v12 ^ v1).yx << ROTATE_16);
- v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x));
- v6 = ((v6 ^ v11) << ROTATE_1) | ((v6 ^ v11).yx >> ROTATE_31);
+ b = vec4(v45.zw, v67.xy);
+ d = vec4(v1415.zw, v1213.xy);
+ // x = VEC4_0;
+ y = vec4(m2, VEC2_0);
- /**
- * r=8, i=6, a=v[2], b=v[7], c=v[8], d=v[13]
- */
- v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x));
- v2 = v2 + m1 + vec2(0u, u32(v2.x + m1.x < v2.x));
- v13 = (v13 ^ v2).yx;
- v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x));
- v7 = ((v7 ^ v8) >> ROTATE_24) | ((v7 ^ v8).yx << ROTATE_8);
- v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x));
- v2 = v2 + m4 + vec2(0u, u32(v2.x + m4.x < v2.x));
- v13 = ((v13 ^ v2) >> ROTATE_16) | ((v13 ^ v2).yx << ROTATE_16);
- v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x));
- v7 = ((v7 ^ v8) << ROTATE_1) | ((v7 ^ v8).yx >> ROTATE_31);
+ v01 += b + vec4(0u, u32(v01.x + b.x < v01.x), 0u, u32(v01.z + b.z < v01.z));
+ // v01 += x + vec4(0u, u32(v01.x + x.x < v01.x), 0u, u32(v01.z + x.z < v01.z));
+ d = (d ^ v01).yxwz;
+ v1011 += d + vec4(0u, u32(v1011.x + d.x < v1011.x), 0u, u32(v1011.z + d.z < v1011.z));
+ b = ((b ^ v1011) >> ROTATE_24) | ((b ^ v1011).yxwz << ROTATE_8);
+ v01 += b + vec4(0u, u32(v01.x + b.x < v01.x), 0u, u32(v01.z + b.z < v01.z));
+ v01 += y + vec4(0u, u32(v01.x + y.x < v01.x), 0u, u32(v01.z + y.z < v01.z));
+ d = ((d ^ v01) >> ROTATE_16) | ((d ^ v01).yxwz << ROTATE_16);
+ v1011 += d + vec4(0u, u32(v1011.x + d.x < v1011.x), 0u, u32(v1011.z + d.z < v1011.z));
+ b = ((b ^ v1011) << ROTATE_1) | ((b ^ v1011).yxwz >> ROTATE_31);
+
+ v45 = vec4(v45.xy, b.xy);
+ v67 = vec4(b.zw, v67.zw);
+ v1415 = vec4(v1415.xy, d.xy);
+ v1213 = vec4(d.zw, v1213.zw);
/**
+ * r=8, i=6, a=v[2], b=v[7], c=v[8], d=v[13]
* r=8, i=7, a=v[3], b=v[4], c=v[9], d=v[14]
*/
- v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x));
- v14 = (v14 ^ v3).yx;
- v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x));
- v4 = ((v4 ^ v9) >> ROTATE_24) | ((v4 ^ v9).yx << ROTATE_8);
- v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x));
- v14 = ((v14 ^ v3) >> ROTATE_16) | ((v14 ^ v3).yx << ROTATE_16);
- v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x));
- v4 = ((v4 ^ v9) << ROTATE_1) | ((v4 ^ v9).yx >> ROTATE_31);
+ b = vec4(v67.zw, v45.xy);
+ d = vec4(v1213.zw, v1415.xy);
+ x = vec4(m1, VEC2_0);
+ y = vec4(m4, VEC2_0);
+
+ v23 += b + vec4(0u, u32(v23.x + b.x < v23.x), 0u, u32(v23.z + b.z < v23.z));
+ v23 += x + vec4(0u, u32(v23.x + x.x < v23.x), 0u, u32(v23.z + x.z < v23.z));
+ d = (d ^ v23).yxwz;
+ v89 += d + vec4(0u, u32(v89.x + d.x < v89.x), 0u, u32(v89.z + d.z < v89.z));
+ b = ((b ^ v89) >> ROTATE_24) | ((b ^ v89).yxwz << ROTATE_8);
+ v23 += b + vec4(0u, u32(v23.x + b.x < v23.x), 0u, u32(v23.z + b.z < v23.z));
+ v23 += y + vec4(0u, u32(v23.x + y.x < v23.x), 0u, u32(v23.z + y.z < v23.z));
+ d = ((d ^ v23) >> ROTATE_16) | ((d ^ v23).yxwz << ROTATE_16);
+ v89 += d + vec4(0u, u32(v89.x + d.x < v89.x), 0u, u32(v89.z + d.z < v89.z));
+ b = ((b ^ v89) << ROTATE_1) | ((b ^ v89).yxwz >> ROTATE_31);
+
+ v67 = vec4(v67.xy, b.xy);
+ v45 = vec4(b.zw, v45.zw);
+ v1213 = vec4(v1213.xy, d.xy);
+ v1415 = vec4(d.zw, v1415.zw);
/**
* r=9, i=0, a=v[0], b=v[4], c=v[8], d=v[12]
- */
- v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x));
- v12 = (v12 ^ v0).yx;
- v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x));
- v4 = ((v4 ^ v8) >> ROTATE_24) | ((v4 ^ v8).yx << ROTATE_8);
- v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x));
- v0 = v0 + m2 + vec2(0u, u32(v0.x + m2.x < v0.x));
- v12 = ((v12 ^ v0) >> ROTATE_16) | ((v12 ^ v0).yx << ROTATE_16);
- v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x));
- v4 = ((v4 ^ v8) << ROTATE_1) | ((v4 ^ v8).yx >> ROTATE_31);
-
- /**
* r=9, i=1, a=v[1], b=v[5], c=v[9], d=v[13]
*/
- v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x));
- v13 = (v13 ^ v1).yx;
- v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x));
- v5 = ((v5 ^ v9) >> ROTATE_24) | ((v5 ^ v9).yx << ROTATE_8);
- v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x));
- v1 = v1 + m4 + vec2(0u, u32(v1.x + m4.x < v1.x));
- v13 = ((v13 ^ v1) >> ROTATE_16) | ((v13 ^ v1).yx << ROTATE_16);
- v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x));
- v5 = ((v5 ^ v9) << ROTATE_1) | ((v5 ^ v9).yx >> ROTATE_31);
+ // x = VEC4_0;
+ y = vec4(m2, m4);
- /**
- * r=9, i=2, a=v[2], b=v[6], c=v[10], d=v[14]
- */
- v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x));
- v14 = (v14 ^ v2).yx;
- v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x));
- v6 = ((v6 ^ v10) >> ROTATE_24) | ((v6 ^ v10).yx << ROTATE_8);
- v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x));
- v14 = ((v14 ^ v2) >> ROTATE_16) | ((v14 ^ v2).yx << ROTATE_16);
- v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x));
- v6 = ((v6 ^ v10) << ROTATE_1) | ((v6 ^ v10).yx >> ROTATE_31);
+ v01 += v45 + vec4(0u, u32(v01.x + v45.x < v01.x), 0u, u32(v01.z + v45.z < v01.z));
+ // v01 += x + vec4(0u, u32(v01.x + x.x < v01.x), 0u, u32(v01.z + x.z < v01.z));
+ v1213 = (v1213 ^ v01).yxwz;
+ v89 += v1213 + vec4(0u, u32(v89.x + v1213.x < v89.x), 0u, u32(v89.z + v1213.z < v89.z));
+ v45 = ((v45 ^ v89) >> ROTATE_24) | ((v45 ^ v89).yxwz << ROTATE_8);
+ v01 += v45 + vec4(0u, u32(v01.x + v45.x < v01.x), 0u, u32(v01.z + v45.z < v01.z));
+ v01 += y + vec4(0u, u32(v01.x + y.x < v01.x), 0u, u32(v01.z + y.z < v01.z));
+ v1213 = ((v1213 ^ v01) >> ROTATE_16) | ((v1213 ^ v01).yxwz << ROTATE_16);
+ v89 += v1213 + vec4(0u, u32(v89.x + v1213.x < v89.x), 0u, u32(v89.z + v1213.z < v89.z));
+ v45 = ((v45 ^ v89) << ROTATE_1) | ((v45 ^ v89).yxwz >> ROTATE_31);
/**
+ * r=9, i=2, a=v[2], b=v[6], c=v[10], d=v[14]
* r=9, i=3, a=v[3], b=v[7], c=v[11], d=v[15]
*/
- v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x));
- v3 = v3 + m1 + vec2(0u, u32(v3.x + m1.x < v3.x));
- v15 = (v15 ^ v3).yx;
- v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x));
- v7 = ((v7 ^ v11) >> ROTATE_24) | ((v7 ^ v11).yx << ROTATE_8);
- v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x));
- v15 = ((v15 ^ v3) >> ROTATE_16) | ((v15 ^ v3).yx << ROTATE_16);
- v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x));
- v7 = ((v7 ^ v11) << ROTATE_1) | ((v7 ^ v11).yx >> ROTATE_31);
+ x = vec4(VEC2_0, m1);
+ // y = VEC4_0;
- /**
- * r=9, i=4, a=v[0], b=v[5], c=v[10], d=v[15]
- */
- v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x));
- v15 = (v15 ^ v0).yx;
- v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x));
- v5 = ((v5 ^ v10) >> ROTATE_24) | ((v5 ^ v10).yx << ROTATE_8);
- v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x));
- v15 = ((v15 ^ v0) >> ROTATE_16) | ((v15 ^ v0).yx << ROTATE_16);
- v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x));
- v5 = ((v5 ^ v10) << ROTATE_1) | ((v5 ^ v10).yx >> ROTATE_31);
+ v23 += v67 + vec4(0u, u32(v23.x + v67.x < v23.x), 0u, u32(v23.z + v67.z < v23.z));
+ v23 += x + vec4(0u, u32(v23.x + x.x < v23.x), 0u, u32(v23.z + x.z < v23.z));
+ v1415 = (v1415 ^ v23).yxwz;
+ v1011 += v1415 + vec4(0u, u32(v1011.x + v1415.x < v1011.x), 0u, u32(v1011.z + v1415.z < v1011.z));
+ v67 = ((v67 ^ v1011) >> ROTATE_24) | ((v67 ^ v1011).yxwz << ROTATE_8);
+ v23 += v67 + vec4(0u, u32(v23.x + v67.x < v23.x), 0u, u32(v23.z + v67.z < v23.z));
+ // v23 += y + vec4(0u, u32(v23.x + y.x < v23.x), 0u, u32(v23.z + y.z < v23.z));
+ v1415 = ((v1415 ^ v23) >> ROTATE_16) | ((v1415 ^ v23).yxwz << ROTATE_16);
+ v1011 += v1415 + vec4(0u, u32(v1011.x + v1415.x < v1011.x), 0u, u32(v1011.z + v1415.z < v1011.z));
+ v67 = ((v67 ^ v1011) << ROTATE_1) | ((v67 ^ v1011).yxwz >> ROTATE_31);
/**
+ * r=9, i=4, a=v[0], b=v[5], c=v[10], d=v[15]
* r=9, i=5, a=v[1], b=v[6], c=v[11], d=v[12]
*/
- v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x));
- v12 = (v12 ^ v1).yx;
- v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x));
- v6 = ((v6 ^ v11) >> ROTATE_24) | ((v6 ^ v11).yx << ROTATE_8);
- v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x));
- v12 = ((v12 ^ v1) >> ROTATE_16) | ((v12 ^ v1).yx << ROTATE_16);
- v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x));
- v6 = ((v6 ^ v11) << ROTATE_1) | ((v6 ^ v11).yx >> ROTATE_31);
+ b = vec4(v45.zw, v67.xy);
+ d = vec4(v1415.zw, v1213.xy);
+ // x = VEC4_0;
+ // y = VEC4_0;
- /**
- * r=9, i=6, a=v[2], b=v[7], c=v[8], d=v[13]
- */
- v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x));
- v2 = v2 + m3 + vec2(0u, u32(v2.x + m3.x < v2.x));
- v13 = (v13 ^ v2).yx;
- v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x));
- v7 = ((v7 ^ v8) >> ROTATE_24) | ((v7 ^ v8).yx << ROTATE_8);
- v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x));
- v13 = ((v13 ^ v2) >> ROTATE_16) | ((v13 ^ v2).yx << ROTATE_16);
- v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x));
- v7 = ((v7 ^ v8) << ROTATE_1) | ((v7 ^ v8).yx >> ROTATE_31);
+ v01 += b + vec4(0u, u32(v01.x + b.x < v01.x), 0u, u32(v01.z + b.z < v01.z));
+ // v01 += x + vec4(0u, u32(v01.x + x.x < v01.x), 0u, u32(v01.z + x.z < v01.z));
+ d = (d ^ v01).yxwz;
+ v1011 += d + vec4(0u, u32(v1011.x + d.x < v1011.x), 0u, u32(v1011.z + d.z < v1011.z));
+ b = ((b ^ v1011) >> ROTATE_24) | ((b ^ v1011).yxwz << ROTATE_8);
+ v01 += b + vec4(0u, u32(v01.x + b.x < v01.x), 0u, u32(v01.z + b.z < v01.z));
+ // v01 += y + vec4(0u, u32(v01.x + y.x < v01.x), 0u, u32(v01.z + y.z < v01.z));
+ d = ((d ^ v01) >> ROTATE_16) | ((d ^ v01).yxwz << ROTATE_16);
+ v1011 += d + vec4(0u, u32(v1011.x + d.x < v1011.x), 0u, u32(v1011.z + d.z < v1011.z));
+ b = ((b ^ v1011) << ROTATE_1) | ((b ^ v1011).yxwz >> ROTATE_31);
+
+ v45 = vec4(v45.xy, b.xy);
+ v67 = vec4(b.zw, v67.zw);
+ v1415 = vec4(v1415.xy, d.xy);
+ v1213 = vec4(d.zw, v1213.zw);
/**
+ * r=9, i=6, a=v[2], b=v[7], c=v[8], d=v[13]
* r=9, i=7, a=v[3], b=v[4], c=v[9], d=v[14]
*/
- v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x));
- v14 = (v14 ^ v3).yx;
- v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x));
- v4 = ((v4 ^ v9) >> ROTATE_24) | ((v4 ^ v9).yx << ROTATE_8);
- v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x));
- v3 = v3 + m0 + vec2(0u, u32(v3.x + m0.x < v3.x));
- v14 = ((v14 ^ v3) >> ROTATE_16) | ((v14 ^ v3).yx << ROTATE_16);
- v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x));
- v4 = ((v4 ^ v9) << ROTATE_1) | ((v4 ^ v9).yx >> ROTATE_31);
+ b = vec4(v67.zw, v45.xy);
+ d = vec4(v1213.zw, v1415.xy);
+ x = vec4(m3, VEC2_0);
+ y = vec4(VEC2_0, m0);
+
+ v23 += b + vec4(0u, u32(v23.x + b.x < v23.x), 0u, u32(v23.z + b.z < v23.z));
+ v23 += x + vec4(0u, u32(v23.x + x.x < v23.x), 0u, u32(v23.z + x.z < v23.z));
+ d = (d ^ v23).yxwz;
+ v89 += d + vec4(0u, u32(v89.x + d.x < v89.x), 0u, u32(v89.z + d.z < v89.z));
+ b = ((b ^ v89) >> ROTATE_24) | ((b ^ v89).yxwz << ROTATE_8);
+ v23 += b + vec4(0u, u32(v23.x + b.x < v23.x), 0u, u32(v23.z + b.z < v23.z));
+ v23 += y + vec4(0u, u32(v23.x + y.x < v23.x), 0u, u32(v23.z + y.z < v23.z));
+ d = ((d ^ v23) >> ROTATE_16) | ((d ^ v23).yxwz << ROTATE_16);
+ v89 += d + vec4(0u, u32(v89.x + d.x < v89.x), 0u, u32(v89.z + d.z < v89.z));
+ b = ((b ^ v89) << ROTATE_1) | ((b ^ v89).yxwz >> ROTATE_31);
+
+ v67 = vec4(v67.xy, b.xy);
+ v45 = vec4(b.zw, v45.zw);
+ v1213 = vec4(v1213.xy, d.xy);
+ v1415 = vec4(d.zw, v1415.zw);
/**
* r=10, i=0, a=v[0], b=v[4], c=v[8], d=v[12]
- */
- v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x));
- v0 = v0 + m0 + vec2(0u, u32(v0.x + m0.x < v0.x));
- v12 = (v12 ^ v0).yx;
- v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x));
- v4 = ((v4 ^ v8) >> ROTATE_24) | ((v4 ^ v8).yx << ROTATE_8);
- v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x));
- v0 = v0 + m1 + vec2(0u, u32(v0.x + m1.x < v0.x));
- v12 = ((v12 ^ v0) >> ROTATE_16) | ((v12 ^ v0).yx << ROTATE_16);
- v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x));
- v4 = ((v4 ^ v8) << ROTATE_1) | ((v4 ^ v8).yx >> ROTATE_31);
-
- /**
* r=10, i=1, a=v[1], b=v[5], c=v[9], d=v[13]
*/
- v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x));
- v1 = v1 + m2 + vec2(0u, u32(v1.x + m2.x < v1.x));
- v13 = (v13 ^ v1).yx;
- v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x));
- v5 = ((v5 ^ v9) >> ROTATE_24) | ((v5 ^ v9).yx << ROTATE_8);
- v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x));
- v1 = v1 + m3 + vec2(0u, u32(v1.x + m3.x < v1.x));
- v13 = ((v13 ^ v1) >> ROTATE_16) | ((v13 ^ v1).yx << ROTATE_16);
- v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x));
- v5 = ((v5 ^ v9) << ROTATE_1) | ((v5 ^ v9).yx >> ROTATE_31);
+ x = vec4(m0, m2);
+ y = vec4(m1, m3);
- /**
- * r=10, i=2, a=v[2], b=v[6], c=v[10], d=v[14]
- */
- v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x));
- v2 = v2 + m4 + vec2(0u, u32(v2.x + m4.x < v2.x));
- v14 = (v14 ^ v2).yx;
- v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x));
- v6 = ((v6 ^ v10) >> ROTATE_24) | ((v6 ^ v10).yx << ROTATE_8);
- v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x));
- v14 = ((v14 ^ v2) >> ROTATE_16) | ((v14 ^ v2).yx << ROTATE_16);
- v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x));
- v6 = ((v6 ^ v10) << ROTATE_1) | ((v6 ^ v10).yx >> ROTATE_31);
+ v01 += v45 + vec4(0u, u32(v01.x + v45.x < v01.x), 0u, u32(v01.z + v45.z < v01.z));
+ v01 += x + vec4(0u, u32(v01.x + x.x < v01.x), 0u, u32(v01.z + x.z < v01.z));
+ v1213 = (v1213 ^ v01).yxwz;
+ v89 += v1213 + vec4(0u, u32(v89.x + v1213.x < v89.x), 0u, u32(v89.z + v1213.z < v89.z));
+ v45 = ((v45 ^ v89) >> ROTATE_24) | ((v45 ^ v89).yxwz << ROTATE_8);
+ v01 += v45 + vec4(0u, u32(v01.x + v45.x < v01.x), 0u, u32(v01.z + v45.z < v01.z));
+ v01 += y + vec4(0u, u32(v01.x + y.x < v01.x), 0u, u32(v01.z + y.z < v01.z));
+ v1213 = ((v1213 ^ v01) >> ROTATE_16) | ((v1213 ^ v01).yxwz << ROTATE_16);
+ v89 += v1213 + vec4(0u, u32(v89.x + v1213.x < v89.x), 0u, u32(v89.z + v1213.z < v89.z));
+ v45 = ((v45 ^ v89) << ROTATE_1) | ((v45 ^ v89).yxwz >> ROTATE_31);
/**
+ * r=10, i=2, a=v[2], b=v[6], c=v[10], d=v[14]
* r=10, i=3, a=v[3], b=v[7], c=v[11], d=v[15]
*/
- v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x));
- v15 = (v15 ^ v3).yx;
- v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x));
- v7 = ((v7 ^ v11) >> ROTATE_24) | ((v7 ^ v11).yx << ROTATE_8);
- v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x));
- v15 = ((v15 ^ v3) >> ROTATE_16) | ((v15 ^ v3).yx << ROTATE_16);
- v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x));
- v7 = ((v7 ^ v11) << ROTATE_1) | ((v7 ^ v11).yx >> ROTATE_31);
+ x = vec4(m4, VEC2_0);
+ // y = VEC4_0;
- /**
- * r=10, i=4, a=v[0], b=v[5], c=v[10], d=v[15]
- */
- v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x));
- v15 = (v15 ^ v0).yx;
- v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x));
- v5 = ((v5 ^ v10) >> ROTATE_24) | ((v5 ^ v10).yx << ROTATE_8);
- v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x));
- v15 = ((v15 ^ v0) >> ROTATE_16) | ((v15 ^ v0).yx << ROTATE_16);
- v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x));
- v5 = ((v5 ^ v10) << ROTATE_1) | ((v5 ^ v10).yx >> ROTATE_31);
+ v23 += v67 + vec4(0u, u32(v23.x + v67.x < v23.x), 0u, u32(v23.z + v67.z < v23.z));
+ v23 += x + vec4(0u, u32(v23.x + x.x < v23.x), 0u, u32(v23.z + x.z < v23.z));
+ v1415 = (v1415 ^ v23).yxwz;
+ v1011 += v1415 + vec4(0u, u32(v1011.x + v1415.x < v1011.x), 0u, u32(v1011.z + v1415.z < v1011.z));
+ v67 = ((v67 ^ v1011) >> ROTATE_24) | ((v67 ^ v1011).yxwz << ROTATE_8);
+ v23 += v67 + vec4(0u, u32(v23.x + v67.x < v23.x), 0u, u32(v23.z + v67.z < v23.z));
+ // v23 += y + vec4(0u, u32(v23.x + y.x < v23.x), 0u, u32(v23.z + y.z < v23.z));
+ v1415 = ((v1415 ^ v23) >> ROTATE_16) | ((v1415 ^ v23).yxwz << ROTATE_16);
+ v1011 += v1415 + vec4(0u, u32(v1011.x + v1415.x < v1011.x), 0u, u32(v1011.z + v1415.z < v1011.z));
+ v67 = ((v67 ^ v1011) << ROTATE_1) | ((v67 ^ v1011).yxwz >> ROTATE_31);
/**
+ * r=10, i=4, a=v[0], b=v[5], c=v[10], d=v[15]
* r=10, i=5, a=v[1], b=v[6], c=v[11], d=v[12]
*/
- v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x));
- v12 = (v12 ^ v1).yx;
- v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x));
- v6 = ((v6 ^ v11) >> ROTATE_24) | ((v6 ^ v11).yx << ROTATE_8);
- v1 = v1 + v6 + vec2(0u, u32(v1.x + v6.x < v1.x));
- v12 = ((v12 ^ v1) >> ROTATE_16) | ((v12 ^ v1).yx << ROTATE_16);
- v11 = v11 + v12 + vec2(0u, u32(v11.x + v12.x < v11.x));
- v6 = ((v6 ^ v11) << ROTATE_1) | ((v6 ^ v11).yx >> ROTATE_31);
+ b = vec4(v45.zw, v67.xy);
+ d = vec4(v1415.zw, v1213.xy);
+ // x = VEC4_0;
+ // y = VEC4_0;
- /**
- * r=10, i=6, a=v[2], b=v[7], c=v[8], d=v[13]
- */
- v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x));
- v13 = (v13 ^ v2).yx;
- v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x));
- v7 = ((v7 ^ v8) >> ROTATE_24) | ((v7 ^ v8).yx << ROTATE_8);
- v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x));
- v13 = ((v13 ^ v2) >> ROTATE_16) | ((v13 ^ v2).yx << ROTATE_16);
- v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x));
- v7 = ((v7 ^ v8) << ROTATE_1) | ((v7 ^ v8).yx >> ROTATE_31);
+ v01 += b + vec4(0u, u32(v01.x + b.x < v01.x), 0u, u32(v01.z + b.z < v01.z));
+ // v01 += x + vec4(0u, u32(v01.x + x.x < v01.x), 0u, u32(v01.z + x.z < v01.z));
+ d = (d ^ v01).yxwz;
+ v1011 += d + vec4(0u, u32(v1011.x + d.x < v1011.x), 0u, u32(v1011.z + d.z < v1011.z));
+ b = ((b ^ v1011) >> ROTATE_24) | ((b ^ v1011).yxwz << ROTATE_8);
+ v01 += b + vec4(0u, u32(v01.x + b.x < v01.x), 0u, u32(v01.z + b.z < v01.z));
+ // v01 += y + vec4(0u, u32(v01.x + y.x < v01.x), 0u, u32(v01.z + y.z < v01.z));
+ d = ((d ^ v01) >> ROTATE_16) | ((d ^ v01).yxwz << ROTATE_16);
+ v1011 += d + vec4(0u, u32(v1011.x + d.x < v1011.x), 0u, u32(v1011.z + d.z < v1011.z));
+ b = ((b ^ v1011) << ROTATE_1) | ((b ^ v1011).yxwz >> ROTATE_31);
+
+ v45 = vec4(v45.xy, b.xy);
+ v67 = vec4(b.zw, v67.zw);
+ v1415 = vec4(v1415.xy, d.xy);
+ v1213 = vec4(d.zw, v1213.zw);
/**
+ * r=10, i=6, a=v[2], b=v[7], c=v[8], d=v[13]
* r=10, i=7, a=v[3], b=v[4], c=v[9], d=v[14]
*/
- v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x));
- v14 = (v14 ^ v3).yx;
- v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x));
- v4 = ((v4 ^ v9) >> ROTATE_24) | ((v4 ^ v9).yx << ROTATE_8);
- v3 = v3 + v4 + vec2(0u, u32(v3.x + v4.x < v3.x));
- v14 = ((v14 ^ v3) >> ROTATE_16) | ((v14 ^ v3).yx << ROTATE_16);
- v9 = v9 + v14 + vec2(0u, u32(v9.x + v14.x < v9.x));
- v4 = ((v4 ^ v9) << ROTATE_1) | ((v4 ^ v9).yx >> ROTATE_31);
+ b = vec4(v67.zw, v45.xy);
+ d = vec4(v1213.zw, v1415.xy);
+ // x = VEC4_0;
+ // y = VEC4_0;
+
+ v23 += b + vec4(0u, u32(v23.x + b.x < v23.x), 0u, u32(v23.z + b.z < v23.z));
+ // v23 += x + vec4(0u, u32(v23.x + x.x < v23.x), 0u, u32(v23.z + x.z < v23.z));
+ d = (d ^ v23).yxwz;
+ v89 += d + vec4(0u, u32(v89.x + d.x < v89.x), 0u, u32(v89.z + d.z < v89.z));
+ b = ((b ^ v89) >> ROTATE_24) | ((b ^ v89).yxwz << ROTATE_8);
+ v23 += b + vec4(0u, u32(v23.x + b.x < v23.x), 0u, u32(v23.z + b.z < v23.z));
+ // v23 += y + vec4(0u, u32(v23.x + y.x < v23.x), 0u, u32(v23.z + y.z < v23.z));
+ d = ((d ^ v23) >> ROTATE_16) | ((d ^ v23).yxwz << ROTATE_16);
+ v89 += d + vec4(0u, u32(v89.x + d.x < v89.x), 0u, u32(v89.z + d.z < v89.z));
+ b = ((b ^ v89) << ROTATE_1) | ((b ^ v89).yxwz >> ROTATE_31);
+
+ v67 = vec4(v67.xy, b.xy);
+ v45 = vec4(b.zw, v45.zw);
+ v1213 = vec4(v1213.xy, d.xy);
+ v1415 = vec4(d.zw, v1415.zw);
/**
* r=11, i=0, a=v[0], b=v[4], c=v[8], d=v[12]
- */
- v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x));
- v12 = (v12 ^ v0).yx;
- v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x));
- v4 = ((v4 ^ v8) >> ROTATE_24) | ((v4 ^ v8).yx << ROTATE_8);
- v0 = v0 + v4 + vec2(0u, u32(v0.x + v4.x < v0.x));
- v12 = ((v12 ^ v0) >> ROTATE_16) | ((v12 ^ v0).yx << ROTATE_16);
- v8 = v8 + v12 + vec2(0u, u32(v8.x + v12.x < v8.x));
- // skip since it does not affect the final values of `v0` and `v8`
-
- /**
* r=11, i=1, a=v[1], b=v[5], c=v[9], d=v[13]
*/
- v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x));
- v1 = v1 + m4 + vec2(0u, u32(v1.x + m4.x < v1.x));
- v13 = (v13 ^ v1).yx;
- v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x));
- v5 = ((v5 ^ v9) >> ROTATE_24) | ((v5 ^ v9).yx << ROTATE_8);
- v1 = v1 + v5 + vec2(0u, u32(v1.x + v5.x < v1.x));
- v13 = ((v13 ^ v1) >> ROTATE_16) | ((v13 ^ v1).yx << ROTATE_16);
- v9 = v9 + v13 + vec2(0u, u32(v9.x + v13.x < v9.x));
- v5 = ((v5 ^ v9) << ROTATE_1) | ((v5 ^ v9).yx >> ROTATE_31);
+ x = vec4(VEC2_0, m4);
+ // y = VEC4_0;
- /**
- * r=11, i=2, a=v[2], b=v[6], c=v[10], d=v[14]
- */
- v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x));
- v14 = (v14 ^ v2).yx;
- v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x));
- v6 = ((v6 ^ v10) >> ROTATE_24) | ((v6 ^ v10).yx << ROTATE_8);
- v2 = v2 + v6 + vec2(0u, u32(v2.x + v6.x < v2.x));
- v14 = ((v14 ^ v2) >> ROTATE_16) | ((v14 ^ v2).yx << ROTATE_16);
- v10 = v10 + v14 + vec2(0u, u32(v10.x + v14.x < v10.x));
- // skip since it does not affect the final values of `v0` and `v8`
+ v01 += v45 + vec4(0u, u32(v01.x + v45.x < v01.x), 0u, u32(v01.z + v45.z < v01.z));
+ v01 += x + vec4(0u, u32(v01.x + x.x < v01.x), 0u, u32(v01.z + x.z < v01.z));
+ v1213 = (v1213 ^ v01).yxwz;
+ v89 += v1213 + vec4(0u, u32(v89.x + v1213.x < v89.x), 0u, u32(v89.z + v1213.z < v89.z));
+ v45 = ((v45 ^ v89) >> ROTATE_24) | ((v45 ^ v89).yxwz << ROTATE_8);
+ v01 += v45 + vec4(0u, u32(v01.x + v45.x < v01.x), 0u, u32(v01.z + v45.z < v01.z));
+ // v01 += y + vec4(0u, u32(v01.x + y.x < v01.x), 0u, u32(v01.z + y.z < v01.z));
+ v1213 = ((v1213 ^ v01) >> ROTATE_16) | ((v1213 ^ v01).yxwz << ROTATE_16);
+ v89 += v1213 + vec4(0u, u32(v89.x + v1213.x < v89.x), 0u, u32(v89.z + v1213.z < v89.z));
+ v45 = ((v45 ^ v89) << ROTATE_1) | ((v45 ^ v89).yxwz >> ROTATE_31);
/**
+ * r=11, i=2, a=v[2], b=v[6], c=v[10], d=v[14]
* r=11, i=3, a=v[3], b=v[7], c=v[11], d=v[15]
*/
- v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x));
- v15 = (v15 ^ v3).yx;
- v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x));
- v7 = ((v7 ^ v11) >> ROTATE_24) | ((v7 ^ v11).yx << ROTATE_8);
- v3 = v3 + v7 + vec2(0u, u32(v3.x + v7.x < v3.x));
- v15 = ((v15 ^ v3) >> ROTATE_16) | ((v15 ^ v3).yx << ROTATE_16);
- v11 = v11 + v15 + vec2(0u, u32(v11.x + v15.x < v11.x));
- v7 = ((v7 ^ v11) << ROTATE_1) | ((v7 ^ v11).yx >> ROTATE_31);
+ // x = VEC4_0;
+ // y = VEC4_0;
- /**
- * r=11, i=4, a=v[0], b=v[5], c=v[10], d=v[15]
- */
- v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x));
- v0 = v0 + m1 + vec2(0u, u32(v0.x + m1.x < v0.x));
- v15 = (v15 ^ v0).yx;
- v10 = v10 + v15 + vec2(0u, u32(v10.x + v15.x < v10.x));
- v5 = ((v5 ^ v10) >> ROTATE_24) | ((v5 ^ v10).yx << ROTATE_8);
- v0 = v0 + v5 + vec2(0u, u32(v0.x + v5.x < v0.x));
- // skip since it does not affect the final values of `v0` and `v8`
- // skip since it does not affect the final values of `v0` and `v8`
- // skip since it does not affect the final values of `v0` and `v8`
+ v23 += v67 + vec4(0u, u32(v23.x + v67.x < v23.x), 0u, u32(v23.z + v67.z < v23.z));
+ // v23 += x + vec4(0u, u32(v23.x + x.x < v23.x), 0u, u32(v23.z + x.z < v23.z));
+ v1415 = (v1415 ^ v23).yxwz;
+ v1011 += v1415 + vec4(0u, u32(v1011.x + v1415.x < v1011.x), 0u, u32(v1011.z + v1415.z < v1011.z));
+ v67 = ((v67 ^ v1011) >> ROTATE_24) | ((v67 ^ v1011).yxwz << ROTATE_8);
+ v23 += v67 + vec4(0u, u32(v23.x + v67.x < v23.x), 0u, u32(v23.z + v67.z < v23.z));
+ // v23 += y + vec4(0u, u32(v23.x + y.x < v23.x), 0u, u32(v23.z + y.z < v23.z));
+ v1415 = ((v1415 ^ v23) >> ROTATE_16) | ((v1415 ^ v23).yxwz << ROTATE_16);
+ v1011 += v1415 + vec4(0u, u32(v1011.x + v1415.x < v1011.x), 0u, u32(v1011.z + v1415.z < v1011.z));
+ v67 = ((v67 ^ v1011) << ROTATE_1) | ((v67 ^ v1011).yxwz >> ROTATE_31);
/**
+ * r=11, i=4, a=v[0], b=v[5], c=v[10], d=v[15]
* r=11, i=5, a=v[1], b=v[6], c=v[11], d=v[12]
*/
- // skip entire step since it does not affect the final values of `v0` and `v8`
+ b = vec4(v45.zw, v67.xy);
+ d = vec4(v1415.zw, v1213.xy);
+ x = vec4(m1, m0);
+ y = vec4(VEC2_0, m2);
- /**
- * r=11, i=6, a=v[2], b=v[7], c=v[8], d=v[13]
- */
- v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x));
- v13 = (v13 ^ v2).yx;
- v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x));
- v7 = ((v7 ^ v8) >> ROTATE_24) | ((v7 ^ v8).yx << ROTATE_8);
- v2 = v2 + v7 + vec2(0u, u32(v2.x + v7.x < v2.x));
- v13 = ((v13 ^ v2) >> ROTATE_16) | ((v13 ^ v2).yx << ROTATE_16);
- v8 = v8 + v13 + vec2(0u, u32(v8.x + v13.x < v8.x));
- // skip since we already have the final values of `v0` and `v8`
+ v01 += b + vec4(0u, u32(v01.x + b.x < v01.x), 0u, u32(v01.z + b.z < v01.z));
+ v01 += x + vec4(0u, u32(v01.x + x.x < v01.x), 0u, u32(v01.z + x.z < v01.z));
+ d = (d ^ v01).yxwz;
+ v1011 += d + vec4(0u, u32(v1011.x + d.x < v1011.x), 0u, u32(v1011.z + d.z < v1011.z));
+ b = ((b ^ v1011) >> ROTATE_24) | ((b ^ v1011).yxwz << ROTATE_8);
+ v01 += b + vec4(0u, u32(v01.x + b.x < v01.x), 0u, u32(v01.z + b.z < v01.z));
+ v01 += y + vec4(0u, u32(v01.x + y.x < v01.x), 0u, u32(v01.z + y.z < v01.z));
+ d = ((d ^ v01) >> ROTATE_16) | ((d ^ v01).yxwz << ROTATE_16);
+ v1011 += d + vec4(0u, u32(v1011.x + d.x < v1011.x), 0u, u32(v1011.z + d.z < v1011.z));
+ b = ((b ^ v1011) << ROTATE_1) | ((b ^ v1011).yxwz >> ROTATE_31);
+
+ v45 = vec4(v45.xy, b.xy);
+ v67 = vec4(b.zw, v67.zw);
+ v1415 = vec4(v1415.xy, d.xy);
+ v1213 = vec4(d.zw, v1213.zw);
/**
+ * r=11, i=6, a=v[2], b=v[7], c=v[8], d=v[13]
* r=11, i=7, a=v[3], b=v[4], c=v[9], d=v[14]
*/
- // skip entire step since we already have the final values of `v0` and `v8`
+ b = vec4(v67.zw, v45.xy);
+ d = vec4(v1213.zw, v1415.xy);
+ // x = VEC4_0;
+ y = vec4(VEC2_0, m3);
+
+ v23 += b + vec4(0u, u32(v23.x + b.x < v23.x), 0u, u32(v23.z + b.z < v23.z));
+ // v23 += x + vec4(0u, u32(v23.x + x.x < v23.x), 0u, u32(v23.z + x.z < v23.z));
+ d = (d ^ v23).yxwz;
+ v89 += d + vec4(0u, u32(v89.x + d.x < v89.x), 0u, u32(v89.z + d.z < v89.z));
+ b = ((b ^ v89) >> ROTATE_24) | ((b ^ v89).yxwz << ROTATE_8);
+ v23 += b + vec4(0u, u32(v23.x + b.x < v23.x), 0u, u32(v23.z + b.z < v23.z));
+ v23 += y + vec4(0u, u32(v23.x + y.x < v23.x), 0u, u32(v23.z + y.z < v23.z));
+ d = ((d ^ v23) >> ROTATE_16) | ((d ^ v23).yxwz << ROTATE_16);
+ v89 += d + vec4(0u, u32(v89.x + d.x < v89.x), 0u, u32(v89.z + d.z < v89.z));
+ // b = ((b ^ v89) << ROTATE_1) | ((b ^ v89).yxwz >> ROTATE_31);
+
+ // v67 = vec4(v67.xy, b.xy);
+ // v45 = vec4(b.zw, v45.zw);
+ // v1213 = vec4(v1213.xy, d.xy);
+ // v1415 = vec4(d.zw, v1415.zw);
/**
* Set nonce if it passes the threshold and no other thread has set it
*/
- if ((BLAKE2B_IV32_0.y ^ v0.y ^ v8.y) > threshold) {
+ if ((BLAKE2B_IV32_0.y ^ v01.y ^ v89.y) > ubo.threshold) {
let wasFound: u32 = atomicExchange(&work.found, 1u);
if (wasFound == 0u) {
work.nonce = m0;