43 #if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_SIMD 44 #define SPH_SMALL_FOOTPRINT_SIMD 1 48 #pragma warning (disable: 4146) 55 #define ROL32 SPH_ROTL32 57 #define XCAT(x, y) XCAT_(x, y) 58 #define XCAT_(x, y) x ## y 64 1, 41, 139, 45, 46, 87, 226, 14, 60, 147, 116, 130,
65 190, 80, 196, 69, 2, 82, 21, 90, 92, 174, 195, 28,
66 120, 37, 232, 3, 123, 160, 135, 138, 4, 164, 42, 180,
67 184, 91, 133, 56, 240, 74, 207, 6, 246, 63, 13, 19,
68 8, 71, 84, 103, 111, 182, 9, 112, 223, 148, 157, 12,
69 235, 126, 26, 38, 16, 142, 168, 206, 222, 107, 18, 224,
70 189, 39, 57, 24, 213, 252, 52, 76, 32, 27, 79, 155,
71 187, 214, 36, 191, 121, 78, 114, 48, 169, 247, 104, 152,
72 64, 54, 158, 53, 117, 171, 72, 125, 242, 156, 228, 96,
73 81, 237, 208, 47, 128, 108, 59, 106, 234, 85, 144, 250,
74 227, 55, 199, 192, 162, 217, 159, 94, 256, 216, 118, 212,
75 211, 170, 31, 243, 197, 110, 141, 127, 67, 177, 61, 188,
76 255, 175, 236, 167, 165, 83, 62, 229, 137, 220, 25, 254,
77 134, 97, 122, 119, 253, 93, 215, 77, 73, 166, 124, 201,
78 17, 183, 50, 251, 11, 194, 244, 238, 249, 186, 173, 154,
79 146, 75, 248, 145, 34, 109, 100, 245, 22, 131, 231, 219,
80 241, 115, 89, 51, 35, 150, 239, 33, 68, 218, 200, 233,
81 44, 5, 205, 181, 225, 230, 178, 102, 70, 43, 221, 66,
82 136, 179, 143, 209, 88, 10, 153, 105, 193, 203, 99, 204,
83 140, 86, 185, 132, 15, 101, 29, 161, 176, 20, 49, 210,
84 129, 149, 198, 151, 23, 172, 113, 7, 30, 202, 58, 65,
93 #define REDS1(x) (((x) & 0xFF) - ((x) >> 8)) 94 #define REDS2(x) (((x) & 0xFFFF) + ((x) >> 16)) 102 #define FFT_LOOP(rb, hk, as, id) do { \ 105 s32 n = q[(rb) + (hk)]; \ 107 q[(rb) + (hk)] = m - n; \ 110 for (; u < (hk); u += 4, v += 4 * (as)) { \ 112 m = q[(rb) + u + 0]; \ 113 n = q[(rb) + u + 0 + (hk)]; \ 114 t = REDS2(n * alpha_tab[v + 0 * (as)]); \ 115 q[(rb) + u + 0] = m + t; \ 116 q[(rb) + u + 0 + (hk)] = m - t; \ 118 m = q[(rb) + u + 1]; \ 119 n = q[(rb) + u + 1 + (hk)]; \ 120 t = REDS2(n * alpha_tab[v + 1 * (as)]); \ 121 q[(rb) + u + 1] = m + t; \ 122 q[(rb) + u + 1 + (hk)] = m - t; \ 123 m = q[(rb) + u + 2]; \ 124 n = q[(rb) + u + 2 + (hk)]; \ 125 t = REDS2(n * alpha_tab[v + 2 * (as)]); \ 126 q[(rb) + u + 2] = m + t; \ 127 q[(rb) + u + 2 + (hk)] = m - t; \ 128 m = q[(rb) + u + 3]; \ 129 n = q[(rb) + u + 3 + (hk)]; \ 130 t = REDS2(n * alpha_tab[v + 3 * (as)]); \ 131 q[(rb) + u + 3] = m + t; \ 132 q[(rb) + u + 3 + (hk)] = m - t; \ 147 #define FFT8(xb, xs, d) do { \ 149 s32 x1 = x[(xb) + (xs)]; \ 150 s32 x2 = x[(xb) + 2 * (xs)]; \ 151 s32 x3 = x[(xb) + 3 * (xs)]; \ 153 s32 a1 = x0 + (x2 << 4); \ 155 s32 a3 = x0 - (x2 << 4); \ 157 s32 b1 = REDS1((x1 << 2) + (x3 << 6)); \ 158 s32 b2 = (x1 << 4) - (x3 << 4); \ 159 s32 b3 = REDS1((x1 << 6) + (x3 << 2)); \ 176 #define FFT16(xb, xs, rb) do { \ 177 s32 d1_0, d1_1, d1_2, d1_3, d1_4, d1_5, d1_6, d1_7; \ 178 s32 d2_0, d2_1, d2_2, d2_3, d2_4, d2_5, d2_6, d2_7; \ 179 FFT8(xb, (xs) << 1, d1_); \ 180 FFT8((xb) + (xs), (xs) << 1, d2_); \ 181 q[(rb) + 0] = d1_0 + d2_0; \ 182 q[(rb) + 1] = d1_1 + (d2_1 << 1); \ 183 q[(rb) + 2] = d1_2 + (d2_2 << 2); \ 184 q[(rb) + 3] = d1_3 + (d2_3 << 3); \ 185 q[(rb) + 4] = d1_4 + (d2_4 << 4); \ 186 q[(rb) + 5] = d1_5 + (d2_5 << 5); \ 187 q[(rb) + 6] = d1_6 + (d2_6 << 6); \ 188 q[(rb) + 7] = d1_7 + (d2_7 << 7); \ 189 q[(rb) + 8] = d1_0 - d2_0; \ 190 q[(rb) + 9] = d1_1 - (d2_1 << 1); \ 191 q[(rb) + 10] = d1_2 - (d2_2 << 2); \ 192 q[(rb) + 11] = d1_3 - (d2_3 << 3); \ 193 q[(rb) + 12] = d1_4 - (d2_4 << 4); \ 194 q[(rb) + 13] = d1_5 - (d2_5 << 5); \ 195 q[(rb) + 14] = d1_6 - (d2_6 << 6); \ 196 q[(rb) + 15] = d1_7 - (d2_7 << 7); \ 202 #define FFT32(xb, xs, rb, id) do { \ 203 FFT16(xb, (xs) << 1, rb); \ 204 FFT16((xb) + (xs), (xs) << 1, (rb) + 16); \ 205 FFT_LOOP(rb, 16, 8, id); \ 211 #define FFT64(xb, xs, rb, id) do { \ 212 FFT32(xb, (xs) << 1, rb, XCAT(id, a)); \ 213 FFT32((xb) + (xs), (xs) << 1, (rb) + 32, XCAT(id, b)); \ 214 FFT_LOOP(rb, 32, 4, id); \ 217 #if SPH_SMALL_FOOTPRINT_SIMD 220 fft32(
unsigned char *x,
size_t xs,
s32 *q)
230 #define FFT128(xb, xs, rb, id) do { \ 231 fft32(x + (xb) + ((xs) * 0), (xs) << 2, &q[(rb) + 0]); \ 232 fft32(x + (xb) + ((xs) * 2), (xs) << 2, &q[(rb) + 32]); \ 233 FFT_LOOP(rb, 32, 4, XCAT(id, aa)); \ 234 fft32(x + (xb) + ((xs) * 1), (xs) << 2, &q[(rb) + 64]); \ 235 fft32(x + (xb) + ((xs) * 3), (xs) << 2, &q[(rb) + 96]); \ 236 FFT_LOOP((rb) + 64, 32, 4, XCAT(id, ab)); \ 237 FFT_LOOP(rb, 64, 2, XCAT(id, a)); \ 245 #define FFT128(xb, xs, rb, id) do { \ 246 FFT64(xb, (xs) << 1, rb, XCAT(id, a)); \ 247 FFT64((xb) + (xs), (xs) << 1, (rb) + 64, XCAT(id, b)); \ 248 FFT_LOOP(rb, 64, 2, id); \ 265 FFT32(0, xd, 0, label_a);
266 FFT32(xs, xd, 32, label_b);
273 #define FFT256(xb, xs, rb, id) do { \ 274 fft64(x + (xb) + ((xs) * 0), (xs) << 2, &q[(rb) + 0]); \ 275 fft64(x + (xb) + ((xs) * 2), (xs) << 2, &q[(rb) + 64]); \ 276 FFT_LOOP(rb, 64, 2, XCAT(id, aa)); \ 277 fft64(x + (xb) + ((xs) * 1), (xs) << 2, &q[(rb) + 128]); \ 278 fft64(x + (xb) + ((xs) * 3), (xs) << 2, &q[(rb) + 192]); \ 279 FFT_LOOP((rb) + 128, 64, 2, XCAT(id, ab)); \ 280 FFT_LOOP(rb, 128, 1, XCAT(id, a)); \ 287 1, 98, 95, 58, 30, 113, 23, 198, 129, 49, 176, 29,
288 15, 185, 140, 99, 193, 153, 88, 143, 136, 221, 70, 178,
289 225, 205, 44, 200, 68, 239, 35, 89, 241, 231, 22, 100,
290 34, 248, 146, 173, 249, 244, 11, 50, 17, 124, 73, 215,
291 253, 122, 134, 25, 137, 62, 165, 236, 255, 61, 67, 141,
292 197, 31, 211, 118, 256, 159, 162, 199, 227, 144, 234, 59,
293 128, 208, 81, 228, 242, 72, 117, 158, 64, 104, 169, 114,
294 121, 36, 187, 79, 32, 52, 213, 57, 189, 18, 222, 168,
295 16, 26, 235, 157, 223, 9, 111, 84, 8, 13, 246, 207,
296 240, 133, 184, 42, 4, 135, 123, 232, 120, 195, 92, 21,
297 2, 196, 190, 116, 60, 226, 46, 139
304 2, 156, 118, 107, 45, 212, 111, 162, 97, 249, 211, 3,
305 49, 101, 151, 223, 189, 178, 253, 204, 76, 82, 232, 65,
306 96, 176, 161, 47, 189, 61, 248, 107, 0, 131, 133, 113,
307 17, 33, 12, 111, 251, 103, 57, 148, 47, 65, 249, 143,
308 189, 8, 204, 230, 205, 151, 187, 227, 247, 111, 140, 6,
309 77, 10, 21, 149, 255, 101, 139, 150, 212, 45, 146, 95,
310 160, 8, 46, 254, 208, 156, 106, 34, 68, 79, 4, 53,
311 181, 175, 25, 192, 161, 81, 96, 210, 68, 196, 9, 150,
312 0, 126, 124, 144, 240, 224, 245, 146, 6, 154, 200, 109,
313 210, 192, 8, 114, 68, 249, 53, 27, 52, 106, 70, 30,
314 10, 146, 117, 251, 180, 247, 236, 108
321 1, 163, 98, 40, 95, 65, 58, 202, 30, 7, 113, 172,
322 23, 151, 198, 149, 129, 210, 49, 20, 176, 161, 29, 101,
323 15, 132, 185, 86, 140, 204, 99, 203, 193, 105, 153, 10,
324 88, 209, 143, 179, 136, 66, 221, 43, 70, 102, 178, 230,
325 225, 181, 205, 5, 44, 233, 200, 218, 68, 33, 239, 150,
326 35, 51, 89, 115, 241, 219, 231, 131, 22, 245, 100, 109,
327 34, 145, 248, 75, 146, 154, 173, 186, 249, 238, 244, 194,
328 11, 251, 50, 183, 17, 201, 124, 166, 73, 77, 215, 93,
329 253, 119, 122, 97, 134, 254, 25, 220, 137, 229, 62, 83,
330 165, 167, 236, 175, 255, 188, 61, 177, 67, 127, 141, 110,
331 197, 243, 31, 170, 211, 212, 118, 216, 256, 94, 159, 217,
332 162, 192, 199, 55, 227, 250, 144, 85, 234, 106, 59, 108,
333 128, 47, 208, 237, 81, 96, 228, 156, 242, 125, 72, 171,
334 117, 53, 158, 54, 64, 152, 104, 247, 169, 48, 114, 78,
335 121, 191, 36, 214, 187, 155, 79, 27, 32, 76, 52, 252,
336 213, 24, 57, 39, 189, 224, 18, 107, 222, 206, 168, 142,
337 16, 38, 26, 126, 235, 12, 157, 148, 223, 112, 9, 182,
338 111, 103, 84, 71, 8, 19, 13, 63, 246, 6, 207, 74,
339 240, 56, 133, 91, 184, 180, 42, 164, 4, 138, 135, 160,
340 123, 3, 232, 37, 120, 28, 195, 174, 92, 90, 21, 82,
341 2, 69, 196, 80, 190, 130, 116, 147, 60, 14, 226, 87,
349 2, 203, 156, 47, 118, 214, 107, 106, 45, 93, 212, 20,
350 111, 73, 162, 251, 97, 215, 249, 53, 211, 19, 3, 89,
351 49, 207, 101, 67, 151, 130, 223, 23, 189, 202, 178, 239,
352 253, 127, 204, 49, 76, 236, 82, 137, 232, 157, 65, 79,
353 96, 161, 176, 130, 161, 30, 47, 9, 189, 247, 61, 226,
354 248, 90, 107, 64, 0, 88, 131, 243, 133, 59, 113, 115,
355 17, 236, 33, 213, 12, 191, 111, 19, 251, 61, 103, 208,
356 57, 35, 148, 248, 47, 116, 65, 119, 249, 178, 143, 40,
357 189, 129, 8, 163, 204, 227, 230, 196, 205, 122, 151, 45,
358 187, 19, 227, 72, 247, 125, 111, 121, 140, 220, 6, 107,
359 77, 69, 10, 101, 21, 65, 149, 171, 255, 54, 101, 210,
360 139, 43, 150, 151, 212, 164, 45, 237, 146, 184, 95, 6,
361 160, 42, 8, 204, 46, 238, 254, 168, 208, 50, 156, 190,
362 106, 127, 34, 234, 68, 55, 79, 18, 4, 130, 53, 208,
363 181, 21, 175, 120, 25, 100, 192, 178, 161, 96, 81, 127,
364 96, 227, 210, 248, 68, 10, 196, 31, 9, 167, 150, 193,
365 0, 169, 126, 14, 124, 198, 144, 142, 240, 21, 224, 44,
366 245, 66, 146, 238, 6, 196, 154, 49, 200, 222, 109, 9,
367 210, 141, 192, 138, 8, 79, 114, 217, 68, 128, 249, 94,
368 53, 30, 27, 61, 52, 135, 106, 212, 70, 238, 30, 185,
369 10, 132, 146, 136, 117, 37, 251, 150, 180, 188, 247, 156,
373 #define INNER(l, h, mm) (((u32)((l) * (mm)) & 0xFFFFU) \ 374 + ((u32)((h) * (mm)) << 16)) 376 #define W_SMALL(sb, o1, o2, mm) \ 377 (INNER(q[8 * (sb) + 2 * 0 + o1], q[8 * (sb) + 2 * 0 + o2], mm), \ 378 INNER(q[8 * (sb) + 2 * 1 + o1], q[8 * (sb) + 2 * 1 + o2], mm), \ 379 INNER(q[8 * (sb) + 2 * 2 + o1], q[8 * (sb) + 2 * 2 + o2], mm), \ 380 INNER(q[8 * (sb) + 2 * 3 + o1], q[8 * (sb) + 2 * 3 + o2], mm) 382 #define WS_0_0 W_SMALL( 4, 0, 1, 185) 383 #define WS_0_1 W_SMALL( 6, 0, 1, 185) 384 #define WS_0_2 W_SMALL( 0, 0, 1, 185) 385 #define WS_0_3 W_SMALL( 2, 0, 1, 185) 386 #define WS_0_4 W_SMALL( 7, 0, 1, 185) 387 #define WS_0_5 W_SMALL( 5, 0, 1, 185) 388 #define WS_0_6 W_SMALL( 3, 0, 1, 185) 389 #define WS_0_7 W_SMALL( 1, 0, 1, 185) 390 #define WS_1_0 W_SMALL(15, 0, 1, 185) 391 #define WS_1_1 W_SMALL(11, 0, 1, 185) 392 #define WS_1_2 W_SMALL(12, 0, 1, 185) 393 #define WS_1_3 W_SMALL( 8, 0, 1, 185) 394 #define WS_1_4 W_SMALL( 9, 0, 1, 185) 395 #define WS_1_5 W_SMALL(13, 0, 1, 185) 396 #define WS_1_6 W_SMALL(10, 0, 1, 185) 397 #define WS_1_7 W_SMALL(14, 0, 1, 185) 398 #define WS_2_0 W_SMALL(17, -128, -64, 233) 399 #define WS_2_1 W_SMALL(18, -128, -64, 233) 400 #define WS_2_2 W_SMALL(23, -128, -64, 233) 401 #define WS_2_3 W_SMALL(20, -128, -64, 233) 402 #define WS_2_4 W_SMALL(22, -128, -64, 233) 403 #define WS_2_5 W_SMALL(21, -128, -64, 233) 404 #define WS_2_6 W_SMALL(16, -128, -64, 233) 405 #define WS_2_7 W_SMALL(19, -128, -64, 233) 406 #define WS_3_0 W_SMALL(30, -191, -127, 233) 407 #define WS_3_1 W_SMALL(24, -191, -127, 233) 408 #define WS_3_2 W_SMALL(25, -191, -127, 233) 409 #define WS_3_3 W_SMALL(31, -191, -127, 233) 410 #define WS_3_4 W_SMALL(27, -191, -127, 233) 411 #define WS_3_5 W_SMALL(29, -191, -127, 233) 412 #define WS_3_6 W_SMALL(28, -191, -127, 233) 413 #define WS_3_7 W_SMALL(26, -191, -127, 233) 415 #define W_BIG(sb, o1, o2, mm) \ 416 (INNER(q[16 * (sb) + 2 * 0 + o1], q[16 * (sb) + 2 * 0 + o2], mm), \ 417 INNER(q[16 * (sb) + 2 * 1 + o1], q[16 * (sb) + 2 * 1 + o2], mm), \ 418 INNER(q[16 * (sb) + 2 * 2 + o1], q[16 * (sb) + 2 * 2 + o2], mm), \ 419 INNER(q[16 * (sb) + 2 * 3 + o1], q[16 * (sb) + 2 * 3 + o2], mm), \ 420 INNER(q[16 * (sb) + 2 * 4 + o1], q[16 * (sb) + 2 * 4 + o2], mm), \ 421 INNER(q[16 * (sb) + 2 * 5 + o1], q[16 * (sb) + 2 * 5 + o2], mm), \ 422 INNER(q[16 * (sb) + 2 * 6 + o1], q[16 * (sb) + 2 * 6 + o2], mm), \ 423 INNER(q[16 * (sb) + 2 * 7 + o1], q[16 * (sb) + 2 * 7 + o2], mm) 425 #define WB_0_0 W_BIG( 4, 0, 1, 185) 426 #define WB_0_1 W_BIG( 6, 0, 1, 185) 427 #define WB_0_2 W_BIG( 0, 0, 1, 185) 428 #define WB_0_3 W_BIG( 2, 0, 1, 185) 429 #define WB_0_4 W_BIG( 7, 0, 1, 185) 430 #define WB_0_5 W_BIG( 5, 0, 1, 185) 431 #define WB_0_6 W_BIG( 3, 0, 1, 185) 432 #define WB_0_7 W_BIG( 1, 0, 1, 185) 433 #define WB_1_0 W_BIG(15, 0, 1, 185) 434 #define WB_1_1 W_BIG(11, 0, 1, 185) 435 #define WB_1_2 W_BIG(12, 0, 1, 185) 436 #define WB_1_3 W_BIG( 8, 0, 1, 185) 437 #define WB_1_4 W_BIG( 9, 0, 1, 185) 438 #define WB_1_5 W_BIG(13, 0, 1, 185) 439 #define WB_1_6 W_BIG(10, 0, 1, 185) 440 #define WB_1_7 W_BIG(14, 0, 1, 185) 441 #define WB_2_0 W_BIG(17, -256, -128, 233) 442 #define WB_2_1 W_BIG(18, -256, -128, 233) 443 #define WB_2_2 W_BIG(23, -256, -128, 233) 444 #define WB_2_3 W_BIG(20, -256, -128, 233) 445 #define WB_2_4 W_BIG(22, -256, -128, 233) 446 #define WB_2_5 W_BIG(21, -256, -128, 233) 447 #define WB_2_6 W_BIG(16, -256, -128, 233) 448 #define WB_2_7 W_BIG(19, -256, -128, 233) 449 #define WB_3_0 W_BIG(30, -383, -255, 233) 450 #define WB_3_1 W_BIG(24, -383, -255, 233) 451 #define WB_3_2 W_BIG(25, -383, -255, 233) 452 #define WB_3_3 W_BIG(31, -383, -255, 233) 453 #define WB_3_4 W_BIG(27, -383, -255, 233) 454 #define WB_3_5 W_BIG(29, -383, -255, 233) 455 #define WB_3_6 W_BIG(28, -383, -255, 233) 456 #define WB_3_7 W_BIG(26, -383, -255, 233) 458 #define IF(x, y, z) ((((y) ^ (z)) & (x)) ^ (z)) 459 #define MAJ(x, y, z) (((x) & (y)) | (((x) | (y)) & (z))) 539 #define DECL_STATE_SMALL 540 #define READ_STATE_SMALL(sc) 541 #define WRITE_STATE_SMALL(sc) 542 #define DECL_STATE_BIG 543 #define READ_STATE_BIG(sc) 544 #define WRITE_STATE_BIG(sc) 548 #define DECL_STATE_SMALL \ 549 u32 A0, A1, A2, A3, B0, B1, B2, B3, C0, C1, C2, C3, D0, D1, D2, D3; 551 #define READ_STATE_SMALL(sc) do { \ 552 A0 = (sc)->state[ 0]; \ 553 A1 = (sc)->state[ 1]; \ 554 A2 = (sc)->state[ 2]; \ 555 A3 = (sc)->state[ 3]; \ 556 B0 = (sc)->state[ 4]; \ 557 B1 = (sc)->state[ 5]; \ 558 B2 = (sc)->state[ 6]; \ 559 B3 = (sc)->state[ 7]; \ 560 C0 = (sc)->state[ 8]; \ 561 C1 = (sc)->state[ 9]; \ 562 C2 = (sc)->state[10]; \ 563 C3 = (sc)->state[11]; \ 564 D0 = (sc)->state[12]; \ 565 D1 = (sc)->state[13]; \ 566 D2 = (sc)->state[14]; \ 567 D3 = (sc)->state[15]; \ 570 #define WRITE_STATE_SMALL(sc) do { \ 571 (sc)->state[ 0] = A0; \ 572 (sc)->state[ 1] = A1; \ 573 (sc)->state[ 2] = A2; \ 574 (sc)->state[ 3] = A3; \ 575 (sc)->state[ 4] = B0; \ 576 (sc)->state[ 5] = B1; \ 577 (sc)->state[ 6] = B2; \ 578 (sc)->state[ 7] = B3; \ 579 (sc)->state[ 8] = C0; \ 580 (sc)->state[ 9] = C1; \ 581 (sc)->state[10] = C2; \ 582 (sc)->state[11] = C3; \ 583 (sc)->state[12] = D0; \ 584 (sc)->state[13] = D1; \ 585 (sc)->state[14] = D2; \ 586 (sc)->state[15] = D3; \ 589 #define DECL_STATE_BIG \ 590 u32 A0, A1, A2, A3, A4, A5, A6, A7; \ 591 u32 B0, B1, B2, B3, B4, B5, B6, B7; \ 592 u32 C0, C1, C2, C3, C4, C5, C6, C7; \ 593 u32 D0, D1, D2, D3, D4, D5, D6, D7; 595 #define READ_STATE_BIG(sc) do { \ 596 A0 = (sc)->state[ 0]; \ 597 A1 = (sc)->state[ 1]; \ 598 A2 = (sc)->state[ 2]; \ 599 A3 = (sc)->state[ 3]; \ 600 A4 = (sc)->state[ 4]; \ 601 A5 = (sc)->state[ 5]; \ 602 A6 = (sc)->state[ 6]; \ 603 A7 = (sc)->state[ 7]; \ 604 B0 = (sc)->state[ 8]; \ 605 B1 = (sc)->state[ 9]; \ 606 B2 = (sc)->state[10]; \ 607 B3 = (sc)->state[11]; \ 608 B4 = (sc)->state[12]; \ 609 B5 = (sc)->state[13]; \ 610 B6 = (sc)->state[14]; \ 611 B7 = (sc)->state[15]; \ 612 C0 = (sc)->state[16]; \ 613 C1 = (sc)->state[17]; \ 614 C2 = (sc)->state[18]; \ 615 C3 = (sc)->state[19]; \ 616 C4 = (sc)->state[20]; \ 617 C5 = (sc)->state[21]; \ 618 C6 = (sc)->state[22]; \ 619 C7 = (sc)->state[23]; \ 620 D0 = (sc)->state[24]; \ 621 D1 = (sc)->state[25]; \ 622 D2 = (sc)->state[26]; \ 623 D3 = (sc)->state[27]; \ 624 D4 = (sc)->state[28]; \ 625 D5 = (sc)->state[29]; \ 626 D6 = (sc)->state[30]; \ 627 D7 = (sc)->state[31]; \ 630 #define WRITE_STATE_BIG(sc) do { \ 631 (sc)->state[ 0] = A0; \ 632 (sc)->state[ 1] = A1; \ 633 (sc)->state[ 2] = A2; \ 634 (sc)->state[ 3] = A3; \ 635 (sc)->state[ 4] = A4; \ 636 (sc)->state[ 5] = A5; \ 637 (sc)->state[ 6] = A6; \ 638 (sc)->state[ 7] = A7; \ 639 (sc)->state[ 8] = B0; \ 640 (sc)->state[ 9] = B1; \ 641 (sc)->state[10] = B2; \ 642 (sc)->state[11] = B3; \ 643 (sc)->state[12] = B4; \ 644 (sc)->state[13] = B5; \ 645 (sc)->state[14] = B6; \ 646 (sc)->state[15] = B7; \ 647 (sc)->state[16] = C0; \ 648 (sc)->state[17] = C1; \ 649 (sc)->state[18] = C2; \ 650 (sc)->state[19] = C3; \ 651 (sc)->state[20] = C4; \ 652 (sc)->state[21] = C5; \ 653 (sc)->state[22] = C6; \ 654 (sc)->state[23] = C7; \ 655 (sc)->state[24] = D0; \ 656 (sc)->state[25] = D1; \ 657 (sc)->state[26] = D2; \ 658 (sc)->state[27] = D3; \ 659 (sc)->state[28] = D4; \ 660 (sc)->state[29] = D5; \ 661 (sc)->state[30] = D6; \ 662 (sc)->state[31] = D7; \ 667 #define STEP_ELT(n, w, fun, s, ppb) do { \ 668 u32 tt = T32(D ## n + (w) + fun(A ## n, B ## n, C ## n)); \ 669 A ## n = T32(ROL32(tt, s) + XCAT(tA, XCAT(ppb, n))); \ 675 #define STEP_SMALL(w0, w1, w2, w3, fun, r, s, pp4b) do { \ 676 u32 tA0 = ROL32(A0, r); \ 677 u32 tA1 = ROL32(A1, r); \ 678 u32 tA2 = ROL32(A2, r); \ 679 u32 tA3 = ROL32(A3, r); \ 680 STEP_ELT(0, w0, fun, s, pp4b); \ 681 STEP_ELT(1, w1, fun, s, pp4b); \ 682 STEP_ELT(2, w2, fun, s, pp4b); \ 683 STEP_ELT(3, w3, fun, s, pp4b); \ 686 #define STEP_BIG(w0, w1, w2, w3, w4, w5, w6, w7, fun, r, s, pp8b) do { \ 687 u32 tA0 = ROL32(A0, r); \ 688 u32 tA1 = ROL32(A1, r); \ 689 u32 tA2 = ROL32(A2, r); \ 690 u32 tA3 = ROL32(A3, r); \ 691 u32 tA4 = ROL32(A4, r); \ 692 u32 tA5 = ROL32(A5, r); \ 693 u32 tA6 = ROL32(A6, r); \ 694 u32 tA7 = ROL32(A7, r); \ 695 STEP_ELT(0, w0, fun, s, pp8b); \ 696 STEP_ELT(1, w1, fun, s, pp8b); \ 697 STEP_ELT(2, w2, fun, s, pp8b); \ 698 STEP_ELT(3, w3, fun, s, pp8b); \ 699 STEP_ELT(4, w4, fun, s, pp8b); \ 700 STEP_ELT(5, w5, fun, s, pp8b); \ 701 STEP_ELT(6, w6, fun, s, pp8b); \ 702 STEP_ELT(7, w7, fun, s, pp8b); \ 732 #define STEP_SMALL_(w, fun, r, s, pp4b) STEP_SMALL w, fun, r, s, pp4b) 734 #define ONE_ROUND_SMALL(ri, isp, p0, p1, p2, p3) do { \ 735 STEP_SMALL_(WS_ ## ri ## 0, \ 736 IF, p0, p1, XCAT(PP4_, M3_0_ ## isp)); \ 737 STEP_SMALL_(WS_ ## ri ## 1, \ 738 IF, p1, p2, XCAT(PP4_, M3_1_ ## isp)); \ 739 STEP_SMALL_(WS_ ## ri ## 2, \ 740 IF, p2, p3, XCAT(PP4_, M3_2_ ## isp)); \ 741 STEP_SMALL_(WS_ ## ri ## 3, \ 742 IF, p3, p0, XCAT(PP4_, M3_3_ ## isp)); \ 743 STEP_SMALL_(WS_ ## ri ## 4, \ 744 MAJ, p0, p1, XCAT(PP4_, M3_4_ ## isp)); \ 745 STEP_SMALL_(WS_ ## ri ## 5, \ 746 MAJ, p1, p2, XCAT(PP4_, M3_5_ ## isp)); \ 747 STEP_SMALL_(WS_ ## ri ## 6, \ 748 MAJ, p2, p3, XCAT(PP4_, M3_6_ ## isp)); \ 749 STEP_SMALL_(WS_ ## ri ## 7, \ 750 MAJ, p3, p0, XCAT(PP4_, M3_7_ ## isp)); \ 789 #define STEP_BIG_(w, fun, r, s, pp8b) STEP_BIG w, fun, r, s, pp8b) 791 #define ONE_ROUND_BIG(ri, isp, p0, p1, p2, p3) do { \ 792 STEP_BIG_(WB_ ## ri ## 0, \ 793 IF, p0, p1, XCAT(PP8_, M7_0_ ## isp)); \ 794 STEP_BIG_(WB_ ## ri ## 1, \ 795 IF, p1, p2, XCAT(PP8_, M7_1_ ## isp)); \ 796 STEP_BIG_(WB_ ## ri ## 2, \ 797 IF, p2, p3, XCAT(PP8_, M7_2_ ## isp)); \ 798 STEP_BIG_(WB_ ## ri ## 3, \ 799 IF, p3, p0, XCAT(PP8_, M7_3_ ## isp)); \ 800 STEP_BIG_(WB_ ## ri ## 4, \ 801 MAJ, p0, p1, XCAT(PP8_, M7_4_ ## isp)); \ 802 STEP_BIG_(WB_ ## ri ## 5, \ 803 MAJ, p1, p2, XCAT(PP8_, M7_5_ ## isp)); \ 804 STEP_BIG_(WB_ ## ri ## 6, \ 805 MAJ, p2, p3, XCAT(PP8_, M7_6_ ## isp)); \ 806 STEP_BIG_(WB_ ## ri ## 7, \ 807 MAJ, p3, p0, XCAT(PP8_, M7_7_ ## isp)); \ 810 #if SPH_SMALL_FOOTPRINT_SIMD 829 #define STEP2_ELT(n, w, fun, s, ppb) do { \ 830 u32 tt = T32(D ## n + (w) + fun(A ## n, B ## n, C ## n)); \ 831 A ## n = T32(ROL32(tt, s) + tA[(ppb) ^ n]); \ 837 #define STEP2_SMALL(w0, w1, w2, w3, fun, r, s, pp4b) do { \ 839 tA[0] = ROL32(A0, r); \ 840 tA[1] = ROL32(A1, r); \ 841 tA[2] = ROL32(A2, r); \ 842 tA[3] = ROL32(A3, r); \ 843 STEP2_ELT(0, w0, fun, s, pp4b); \ 844 STEP2_ELT(1, w1, fun, s, pp4b); \ 845 STEP2_ELT(2, w2, fun, s, pp4b); \ 846 STEP2_ELT(3, w3, fun, s, pp4b); \ 850 one_round_small(
u32 *state,
u32 *w,
int isp,
int p0,
int p1,
int p2,
int p3)
852 static const int pp4k[] = { 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2 };
854 STEP2_SMALL(w[ 0], w[ 1], w[ 2], w[ 3],
IF, p0, p1, pp4k[isp + 0]);
855 STEP2_SMALL(w[ 4], w[ 5], w[ 6], w[ 7],
IF, p1, p2, pp4k[isp + 1]);
856 STEP2_SMALL(w[ 8], w[ 9], w[10], w[11],
IF, p2, p3, pp4k[isp + 2]);
857 STEP2_SMALL(w[12], w[13], w[14], w[15],
IF, p3, p0, pp4k[isp + 3]);
858 STEP2_SMALL(w[16], w[17], w[18], w[19],
MAJ, p0, p1, pp4k[isp + 4]);
859 STEP2_SMALL(w[20], w[21], w[22], w[23],
MAJ, p1, p2, pp4k[isp + 5]);
860 STEP2_SMALL(w[24], w[25], w[26], w[27],
MAJ, p2, p3, pp4k[isp + 6]);
861 STEP2_SMALL(w[28], w[29], w[30], w[31],
MAJ, p3, p0, pp4k[isp + 7]);
874 static const size_t wsp[32] = {
875 4 << 3, 6 << 3, 0 << 3, 2 << 3,
876 7 << 3, 5 << 3, 3 << 3, 1 << 3,
877 15 << 3, 11 << 3, 12 << 3, 8 << 3,
878 9 << 3, 13 << 3, 10 << 3, 14 << 3,
879 17 << 3, 18 << 3, 23 << 3, 20 << 3,
880 22 << 3, 21 << 3, 16 << 3, 19 << 3,
881 30 << 3, 24 << 3, 25 << 3, 31 << 3,
882 27 << 3, 29 << 3, 28 << 3, 26 << 3
888 for (i = 0; i < 128; i ++) {
895 q[i] = (tq <= 128 ? tq : tq - 257);
898 for (i = 0; i < 128; i ++) {
905 q[i] = (tq <= 128 ? tq : tq - 257);
909 for (i = 0; i < 16; i += 4) {
910 state[i + 0] = sc->
state[i + 0]
912 state[i + 1] = sc->
state[i + 1]
914 state[i + 2] = sc->
state[i + 2]
916 state[i + 3] = sc->
state[i + 3]
920 #define WSREAD(sb, o1, o2, mm) do { \ 921 for (u = 0; u < 32; u += 4) { \ 922 size_t v = wsp[(u >> 2) + (sb)]; \ 923 w[u + 0] = INNER(q[v + 2 * 0 + (o1)], \ 924 q[v + 2 * 0 + (o2)], mm); \ 925 w[u + 1] = INNER(q[v + 2 * 1 + (o1)], \ 926 q[v + 2 * 1 + (o2)], mm); \ 927 w[u + 2] = INNER(q[v + 2 * 2 + (o1)], \ 928 q[v + 2 * 2 + (o2)], mm); \ 929 w[u + 3] = INNER(q[v + 2 * 3 + (o1)], \ 930 q[v + 2 * 3 + (o2)], mm); \ 934 WSREAD( 0, 0, 1, 185);
935 one_round_small(state, w, 0, 3, 23, 17, 27);
936 WSREAD( 8, 0, 1, 185);
937 one_round_small(state, w, 2, 28, 19, 22, 7);
938 WSREAD(16, -128, -64, 233);
939 one_round_small(state, w, 1, 29, 9, 15, 5);
940 WSREAD(24, -191, -127, 233);
941 one_round_small(state, w, 0, 4, 13, 10, 25);
977 #define A0 (sc->state[ 0]) 978 #define A1 (sc->state[ 1]) 979 #define A2 (sc->state[ 2]) 980 #define A3 (sc->state[ 3]) 981 #define B0 (sc->state[ 4]) 982 #define B1 (sc->state[ 5]) 983 #define B2 (sc->state[ 6]) 984 #define B3 (sc->state[ 7]) 985 #define C0 (sc->state[ 8]) 986 #define C1 (sc->state[ 9]) 987 #define C2 (sc->state[10]) 988 #define C3 (sc->state[11]) 989 #define D0 (sc->state[12]) 990 #define D1 (sc->state[13]) 991 #define D2 (sc->state[14]) 992 #define D3 (sc->state[15]) 1012 for (i = 0; i < 128; i ++) {
1019 q[i] = (tq <= 128 ? tq : tq - 257);
1022 for (i = 0; i < 128; i ++) {
1029 q[i] = (tq <= 128 ? tq : tq - 257);
1054 STEP_SMALL(saved[ 0], saved[ 1], saved[ 2], saved[ 3],
1056 STEP_SMALL(saved[ 4], saved[ 5], saved[ 6], saved[ 7],
1057 IF, 13, 10, PP4_0_);
1058 STEP_SMALL(saved[ 8], saved[ 9], saved[10], saved[11],
1059 IF, 10, 25, PP4_1_);
1060 STEP_SMALL(saved[12], saved[13], saved[14], saved[15],
1066 IF, 13, 10, PP4_0_);
1068 IF, 10, 25, PP4_1_);
1096 #if SPH_SMALL_FOOTPRINT_SIMD 1098 #define A0 state[ 0] 1099 #define A1 state[ 1] 1100 #define A2 state[ 2] 1101 #define A3 state[ 3] 1102 #define A4 state[ 4] 1103 #define A5 state[ 5] 1104 #define A6 state[ 6] 1105 #define A7 state[ 7] 1106 #define B0 state[ 8] 1107 #define B1 state[ 9] 1108 #define B2 state[10] 1109 #define B3 state[11] 1110 #define B4 state[12] 1111 #define B5 state[13] 1112 #define B6 state[14] 1113 #define B7 state[15] 1114 #define C0 state[16] 1115 #define C1 state[17] 1116 #define C2 state[18] 1117 #define C3 state[19] 1118 #define C4 state[20] 1119 #define C5 state[21] 1120 #define C6 state[22] 1121 #define C7 state[23] 1122 #define D0 state[24] 1123 #define D1 state[25] 1124 #define D2 state[26] 1125 #define D3 state[27] 1126 #define D4 state[28] 1127 #define D5 state[29] 1128 #define D6 state[30] 1129 #define D7 state[31] 1143 #define STEP2_BIG(w0, w1, w2, w3, w4, w5, w6, w7, fun, r, s, pp8b) do { \ 1145 tA[0] = ROL32(A0, r); \ 1146 tA[1] = ROL32(A1, r); \ 1147 tA[2] = ROL32(A2, r); \ 1148 tA[3] = ROL32(A3, r); \ 1149 tA[4] = ROL32(A4, r); \ 1150 tA[5] = ROL32(A5, r); \ 1151 tA[6] = ROL32(A6, r); \ 1152 tA[7] = ROL32(A7, r); \ 1153 STEP2_ELT(0, w0, fun, s, pp8b); \ 1154 STEP2_ELT(1, w1, fun, s, pp8b); \ 1155 STEP2_ELT(2, w2, fun, s, pp8b); \ 1156 STEP2_ELT(3, w3, fun, s, pp8b); \ 1157 STEP2_ELT(4, w4, fun, s, pp8b); \ 1158 STEP2_ELT(5, w5, fun, s, pp8b); \ 1159 STEP2_ELT(6, w6, fun, s, pp8b); \ 1160 STEP2_ELT(7, w7, fun, s, pp8b); \ 1164 one_round_big(
u32 *state,
u32 *w,
int isp,
int p0,
int p1,
int p2,
int p3)
1166 static const int pp8k[] = { 1, 6, 2, 3, 5, 7, 4, 1, 6, 2, 3 };
1168 STEP2_BIG(w[ 0], w[ 1], w[ 2], w[ 3], w[ 4], w[ 5], w[ 6], w[ 7],
1169 IF, p0, p1, pp8k[isp + 0]);
1170 STEP2_BIG(w[ 8], w[ 9], w[10], w[11], w[12], w[13], w[14], w[15],
1171 IF, p1, p2, pp8k[isp + 1]);
1172 STEP2_BIG(w[16], w[17], w[18], w[19], w[20], w[21], w[22], w[23],
1173 IF, p2, p3, pp8k[isp + 2]);
1174 STEP2_BIG(w[24], w[25], w[26], w[27], w[28], w[29], w[30], w[31],
1175 IF, p3, p0, pp8k[isp + 3]);
1176 STEP2_BIG(w[32], w[33], w[34], w[35], w[36], w[37], w[38], w[39],
1177 MAJ, p0, p1, pp8k[isp + 4]);
1178 STEP2_BIG(w[40], w[41], w[42], w[43], w[44], w[45], w[46], w[47],
1179 MAJ, p1, p2, pp8k[isp + 5]);
1180 STEP2_BIG(w[48], w[49], w[50], w[51], w[52], w[53], w[54], w[55],
1181 MAJ, p2, p3, pp8k[isp + 6]);
1182 STEP2_BIG(w[56], w[57], w[58], w[59], w[60], w[61], w[62], w[63],
1183 MAJ, p3, p0, pp8k[isp + 7]);
1196 static const size_t wbp[32] = {
1197 4 << 4, 6 << 4, 0 << 4, 2 << 4,
1198 7 << 4, 5 << 4, 3 << 4, 1 << 4,
1199 15 << 4, 11 << 4, 12 << 4, 8 << 4,
1200 9 << 4, 13 << 4, 10 << 4, 14 << 4,
1201 17 << 4, 18 << 4, 23 << 4, 20 << 4,
1202 22 << 4, 21 << 4, 16 << 4, 19 << 4,
1203 30 << 4, 24 << 4, 25 << 4, 31 << 4,
1204 27 << 4, 29 << 4, 28 << 4, 26 << 4
1210 for (i = 0; i < 256; i ++) {
1217 q[i] = (tq <= 128 ? tq : tq - 257);
1220 for (i = 0; i < 256; i ++) {
1227 q[i] = (tq <= 128 ? tq : tq - 257);
1231 for (i = 0; i < 32; i += 8) {
1232 state[i + 0] = sc->
state[i + 0]
1234 state[i + 1] = sc->
state[i + 1]
1236 state[i + 2] = sc->
state[i + 2]
1238 state[i + 3] = sc->
state[i + 3]
1240 state[i + 4] = sc->
state[i + 4]
1242 state[i + 5] = sc->
state[i + 5]
1244 state[i + 6] = sc->
state[i + 6]
1246 state[i + 7] = sc->
state[i + 7]
1250 #define WBREAD(sb, o1, o2, mm) do { \ 1251 for (u = 0; u < 64; u += 8) { \ 1252 size_t v = wbp[(u >> 3) + (sb)]; \ 1253 w[u + 0] = INNER(q[v + 2 * 0 + (o1)], \ 1254 q[v + 2 * 0 + (o2)], mm); \ 1255 w[u + 1] = INNER(q[v + 2 * 1 + (o1)], \ 1256 q[v + 2 * 1 + (o2)], mm); \ 1257 w[u + 2] = INNER(q[v + 2 * 2 + (o1)], \ 1258 q[v + 2 * 2 + (o2)], mm); \ 1259 w[u + 3] = INNER(q[v + 2 * 3 + (o1)], \ 1260 q[v + 2 * 3 + (o2)], mm); \ 1261 w[u + 4] = INNER(q[v + 2 * 4 + (o1)], \ 1262 q[v + 2 * 4 + (o2)], mm); \ 1263 w[u + 5] = INNER(q[v + 2 * 5 + (o1)], \ 1264 q[v + 2 * 5 + (o2)], mm); \ 1265 w[u + 6] = INNER(q[v + 2 * 6 + (o1)], \ 1266 q[v + 2 * 6 + (o2)], mm); \ 1267 w[u + 7] = INNER(q[v + 2 * 7 + (o1)], \ 1268 q[v + 2 * 7 + (o2)], mm); \ 1272 WBREAD( 0, 0, 1, 185);
1273 one_round_big(state, w, 0, 3, 23, 17, 27);
1274 WBREAD( 8, 0, 1, 185);
1275 one_round_big(state, w, 1, 28, 19, 22, 7);
1276 WBREAD(16, -256, -128, 233);
1277 one_round_big(state, w, 2, 29, 9, 15, 5);
1278 WBREAD(24, -383, -255, 233);
1279 one_round_big(state, w, 3, 4, 13, 10, 25);
1290 IF, 13, 10, PP8_5_);
1294 IF, 10, 25, PP8_6_);
1339 #define A0 (sc->state[ 0]) 1340 #define A1 (sc->state[ 1]) 1341 #define A2 (sc->state[ 2]) 1342 #define A3 (sc->state[ 3]) 1343 #define A4 (sc->state[ 4]) 1344 #define A5 (sc->state[ 5]) 1345 #define A6 (sc->state[ 6]) 1346 #define A7 (sc->state[ 7]) 1347 #define B0 (sc->state[ 8]) 1348 #define B1 (sc->state[ 9]) 1349 #define B2 (sc->state[10]) 1350 #define B3 (sc->state[11]) 1351 #define B4 (sc->state[12]) 1352 #define B5 (sc->state[13]) 1353 #define B6 (sc->state[14]) 1354 #define B7 (sc->state[15]) 1355 #define C0 (sc->state[16]) 1356 #define C1 (sc->state[17]) 1357 #define C2 (sc->state[18]) 1358 #define C3 (sc->state[19]) 1359 #define C4 (sc->state[20]) 1360 #define C5 (sc->state[21]) 1361 #define C6 (sc->state[22]) 1362 #define C7 (sc->state[23]) 1363 #define D0 (sc->state[24]) 1364 #define D1 (sc->state[25]) 1365 #define D2 (sc->state[26]) 1366 #define D3 (sc->state[27]) 1367 #define D4 (sc->state[28]) 1368 #define D5 (sc->state[29]) 1369 #define D6 (sc->state[30]) 1370 #define D7 (sc->state[31]) 1391 for (i = 0; i < 256; i ++) {
1398 q[i] = (tq <= 128 ? tq : tq - 257);
1401 for (i = 0; i < 256; i ++) {
1408 q[i] = (tq <= 128 ? tq : tq - 257);
1451 saved[ 0], saved[ 1], saved[ 2], saved[ 3],
1452 saved[ 4], saved[ 5], saved[ 6], saved[ 7],
1455 saved[ 8], saved[ 9], saved[10], saved[11],
1456 saved[12], saved[13], saved[14], saved[15],
1457 IF, 13, 10, PP8_5_);
1459 saved[16], saved[17], saved[18], saved[19],
1460 saved[20], saved[21], saved[22], saved[23],
1461 IF, 10, 25, PP8_6_);
1463 saved[24], saved[25], saved[26], saved[27],
1464 saved[28], saved[29], saved[30], saved[31],
1474 IF, 13, 10, PP8_5_);
1478 IF, 10, 25, PP8_6_);
1525 C32(0x33586E9F),
C32(0x12FFF033),
C32(0xB2D9F64D),
C32(0x6F8FEA53),
1526 C32(0xDE943106),
C32(0x2742E439),
C32(0x4FBAB5AC),
C32(0x62B9FF96),
1527 C32(0x22E7B0AF),
C32(0xC862B3A8),
C32(0x33E00CDC),
C32(0x236B86A6),
1528 C32(0xF64AE77C),
C32(0xFA373B76),
C32(0x7DC1EE5B),
C32(0x7FB29CE8)
1532 C32(0x4D567983),
C32(0x07190BA9),
C32(0x8474577B),
C32(0x39D726E9),
1533 C32(0xAAF3D925),
C32(0x3EE20B03),
C32(0xAFD5E751),
C32(0xC96006D3),
1534 C32(0xC2C2BA14),
C32(0x49B3BCB4),
C32(0xF67CAF46),
C32(0x668626C9),
1535 C32(0xE2EAA8D2),
C32(0x1FF47833),
C32(0xD0C661A5),
C32(0x55693DE1)
1539 C32(0x8A36EEBC),
C32(0x94A3BD90),
C32(0xD1537B83),
C32(0xB25B070B),
1540 C32(0xF463F1B5),
C32(0xB6F81E20),
C32(0x0055C339),
C32(0xB4D144D1),
1541 C32(0x7360CA61),
C32(0x18361A03),
C32(0x17DCB4B9),
C32(0x3414C45A),
1542 C32(0xA699A9D2),
C32(0xE39E9664),
C32(0x468BFE77),
C32(0x51D062F8),
1543 C32(0xB9E3BFE8),
C32(0x63BECE2A),
C32(0x8FE506B9),
C32(0xF8CC4AC2),
1544 C32(0x7AE11542),
C32(0xB1AADDA1),
C32(0x64B06794),
C32(0x28D2F462),
1545 C32(0xE64071EC),
C32(0x1DEB91A8),
C32(0x8AC8DB23),
C32(0x3F782AB5),
1546 C32(0x039B5CB8),
C32(0x71DDD962),
C32(0xFADE2CEA),
C32(0x1416DF71)
1550 C32(0x0BA16B95),
C32(0x72F999AD),
C32(0x9FECC2AE),
C32(0xBA3264FC),
1551 C32(0x5E894929),
C32(0x8E9F30E5),
C32(0x2F1DAA37),
C32(0xF0F2C558),
1552 C32(0xAC506643),
C32(0xA90635A5),
C32(0xE25B878B),
C32(0xAAB7878F),
1553 C32(0x88817F7A),
C32(0x0A02892B),
C32(0x559A7550),
C32(0x598F657E),
1554 C32(0x7EEF60A1),
C32(0x6B70E3E8),
C32(0x9C1714D1),
C32(0xB958E2A8),
1555 C32(0xAB02675E),
C32(0xED1C014F),
C32(0xCD8D65BB),
C32(0xFDB7A257),
1556 C32(0x09254899),
C32(0xD699C7BC),
C32(0x9019B6DC),
C32(0x2B9022E4),
1557 C32(0x8FA14956),
C32(0x21BF9BD3),
C32(0xB94D0943),
C32(0x6FFDDC22)
1591 clen = (
sizeof sc->
buf) - sc->
ptr;
1595 data = (
const unsigned char *)data + clen;
1597 if ((sc->
ptr += clen) ==
sizeof sc->
buf) {
1616 clen = (
sizeof sc->
buf) - sc->
ptr;
1620 data = (
const unsigned char *)data + clen;
1622 if ((sc->
ptr += clen) ==
sizeof sc->
buf) {
1638 low += (ptr << 3) + n;
1649 low += (ptr << 3) + n;
1662 if (sc->
ptr > 0 || n > 0) {
1663 memset(sc->
buf + sc->
ptr, 0,
1664 (
sizeof sc->
buf) - sc->
ptr);
1665 sc->
buf[sc->
ptr] = ub & (0xFF << (8 - n));
1668 memset(sc->
buf, 0,
sizeof sc->
buf);
1672 for (d = dst, u = 0; u < dst_len; u ++)
1684 if (sc->
ptr > 0 || n > 0) {
1685 memset(sc->
buf + sc->
ptr, 0,
1686 (
sizeof sc->
buf) - sc->
ptr);
1687 sc->
buf[sc->
ptr] = ub & (0xFF << (8 - n));
1690 memset(sc->
buf, 0,
sizeof sc->
buf);
1694 for (d = dst, u = 0; u < dst_len; u ++)
void sph_simd224(void *cc, const void *data, size_t len)
Process some data bytes.
#define FFT32(xb, xs, rb, id)
#define READ_STATE_BIG(sc)
static void update_big(void *cc, const void *data, size_t len)
#define FFT_LOOP(rb, hk, as, id)
void sph_simd384_close(void *cc, void *dst)
Terminate the current SIMD-384 computation and output the result into the provided buffer...
This structure is a context for SIMD computations: it contains the intermediate values and some data ...
static const unsigned short yoff_s_n[]
void sph_simd384_init(void *cc)
Initialize an SIMD-384 context.
void sph_simd512(void *cc, const void *data, size_t len)
Process some data bytes.
void sph_simd512_close(void *cc, void *dst)
Terminate the current SIMD-512 computation and output the result into the provided buffer...
void sph_simd256_init(void *cc)
Initialize an SIMD-256 context.
static void finalize_big(void *cc, unsigned ub, unsigned n, void *dst, size_t dst_len)
#define ONE_ROUND_BIG(ri, isp, p0, p1, p2, p3)
#define WRITE_STATE_BIG(sc)
void sph_simd384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
Add a few additional bits (0 to 7) to the current computation, then terminate it and output the resul...
#define STEP_BIG(w0, w1, w2, w3, w4, w5, w6, w7, fun, r, s, pp8b)
static SPH_INLINE sph_u32 sph_dec32le_aligned(const void *src)
Decode a 32-bit value from the provided buffer (little endian convention).
static void encode_count_small(unsigned char *dst, u32 low, u32 high, size_t ptr, unsigned n)
static const unsigned short yoff_b_f[]
#define READ_STATE_SMALL(sc)
#define WRITE_STATE_SMALL(sc)
This structure is a context for SIMD computations: it contains the intermediate values and some data ...
void sph_simd512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
Add a few additional bits (0 to 7) to the current computation, then terminate it and output the resul...
static void compress_big(sph_simd_big_context *sc, int last)
#define ONE_ROUND_SMALL(ri, isp, p0, p1, p2, p3)
static const unsigned short yoff_b_n[]
static void encode_count_big(unsigned char *dst, u32 low, u32 high, size_t ptr, unsigned n)
#define STEP_SMALL(w0, w1, w2, w3, fun, r, s, pp4b)
static const s32 alpha_tab[]
static SPH_INLINE void sph_enc32le(void *dst, sph_u32 val)
Encode a 32-bit value into the provided buffer (little endian convention).
void sph_simd256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
Add a few additional bits (0 to 7) to the current computation, then terminate it and output the resul...
void sph_simd256_close(void *cc, void *dst)
Terminate the current SIMD-256 computation and output the result into the provided buffer...
static void init_small(void *cc, const u32 *iv)
static const unsigned short yoff_s_f[]
void sph_simd224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
Add a few additional bits (0 to 7) to the current computation, then terminate it and output the resul...
static void finalize_small(void *cc, unsigned ub, unsigned n, void *dst, size_t dst_len)
void sph_simd384(void *cc, const void *data, size_t len)
Process some data bytes.
void sph_simd224_init(void *cc)
Initialize an SIMD-224 context.
#define FFT256(xb, xs, rb, id)
static void update_small(void *cc, const void *data, size_t len)
void sph_simd256(void *cc, const void *data, size_t len)
Process some data bytes.
void * memcpy(void *a, const void *b, size_t c)
#define FFT128(xb, xs, rb, id)
static void compress_small(sph_simd_small_context *sc, int last)
#define FFT16(xb, xs, rb)
void sph_simd512_init(void *cc)
Initialize an SIMD-512 context.
std::string _(const char *psz)
Translation function: Call Translate signal on UI interface, which returns a boost::optional result...
static void init_big(void *cc, const u32 *iv)
static void fft64(unsigned char *x, size_t xs, s32 *q)
void sph_simd224_close(void *cc, void *dst)
Terminate the current SIMD-224 computation and output the result into the provided buffer...