43 #if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_BMW 44 #define SPH_SMALL_FOOTPRINT_BMW 1 48 #pragma warning (disable: 4146) 75 static const sph_u64
IV384[] = {
76 SPH_C64(0x0001020304050607), SPH_C64(0x08090A0B0C0D0E0F),
77 SPH_C64(0x1011121314151617), SPH_C64(0x18191A1B1C1D1E1F),
78 SPH_C64(0x2021222324252627), SPH_C64(0x28292A2B2C2D2E2F),
79 SPH_C64(0x3031323334353637), SPH_C64(0x38393A3B3C3D3E3F),
80 SPH_C64(0x4041424344454647), SPH_C64(0x48494A4B4C4D4E4F),
81 SPH_C64(0x5051525354555657), SPH_C64(0x58595A5B5C5D5E5F),
82 SPH_C64(0x6061626364656667), SPH_C64(0x68696A6B6C6D6E6F),
83 SPH_C64(0x7071727374757677), SPH_C64(0x78797A7B7C7D7E7F)
86 static const sph_u64
IV512[] = {
87 SPH_C64(0x8081828384858687), SPH_C64(0x88898A8B8C8D8E8F),
88 SPH_C64(0x9091929394959697), SPH_C64(0x98999A9B9C9D9E9F),
89 SPH_C64(0xA0A1A2A3A4A5A6A7), SPH_C64(0xA8A9AAABACADAEAF),
90 SPH_C64(0xB0B1B2B3B4B5B6B7), SPH_C64(0xB8B9BABBBCBDBEBF),
91 SPH_C64(0xC0C1C2C3C4C5C6C7), SPH_C64(0xC8C9CACBCCCDCECF),
92 SPH_C64(0xD0D1D2D3D4D5D6D7), SPH_C64(0xD8D9DADBDCDDDEDF),
93 SPH_C64(0xE0E1E2E3E4E5E6E7), SPH_C64(0xE8E9EAEBECEDEEEF),
94 SPH_C64(0xF0F1F2F3F4F5F6F7), SPH_C64(0xF8F9FAFBFCFDFEFF)
99 #define XCAT(x, y) XCAT_(x, y) 100 #define XCAT_(x, y) x ## y 104 #define I16_16 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 105 #define I16_17 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 106 #define I16_18 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 107 #define I16_19 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18 108 #define I16_20 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 109 #define I16_21 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20 110 #define I16_22 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 111 #define I16_23 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22 112 #define I16_24 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 113 #define I16_25 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24 114 #define I16_26 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 115 #define I16_27 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26 116 #define I16_28 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27 117 #define I16_29 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28 118 #define I16_30 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29 119 #define I16_31 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30 121 #define M16_16 0, 1, 3, 4, 7, 10, 11 122 #define M16_17 1, 2, 4, 5, 8, 11, 12 123 #define M16_18 2, 3, 5, 6, 9, 12, 13 124 #define M16_19 3, 4, 6, 7, 10, 13, 14 125 #define M16_20 4, 5, 7, 8, 11, 14, 15 126 #define M16_21 5, 6, 8, 9, 12, 15, 16 127 #define M16_22 6, 7, 9, 10, 13, 0, 1 128 #define M16_23 7, 8, 10, 11, 14, 1, 2 129 #define M16_24 8, 9, 11, 12, 15, 2, 3 130 #define M16_25 9, 10, 12, 13, 0, 3, 4 131 #define M16_26 10, 11, 13, 14, 1, 4, 5 132 #define M16_27 11, 12, 14, 15, 2, 5, 6 133 #define M16_28 12, 13, 15, 16, 3, 6, 7 134 #define M16_29 13, 14, 0, 1, 4, 7, 8 135 #define M16_30 14, 15, 1, 2, 5, 8, 9 136 #define M16_31 15, 16, 2, 3, 6, 9, 10 138 #define ss0(x) (((x) >> 1) ^ SPH_T32((x) << 3) \ 139 ^ SPH_ROTL32(x, 4) ^ SPH_ROTL32(x, 19)) 140 #define ss1(x) (((x) >> 1) ^ SPH_T32((x) << 2) \ 141 ^ SPH_ROTL32(x, 8) ^ SPH_ROTL32(x, 23)) 142 #define ss2(x) (((x) >> 2) ^ SPH_T32((x) << 1) \ 143 ^ SPH_ROTL32(x, 12) ^ SPH_ROTL32(x, 25)) 144 #define ss3(x) (((x) >> 2) ^ SPH_T32((x) << 2) \ 145 ^ SPH_ROTL32(x, 15) ^ SPH_ROTL32(x, 29)) 146 #define ss4(x) (((x) >> 1) ^ (x)) 147 #define ss5(x) (((x) >> 2) ^ (x)) 148 #define rs1(x) SPH_ROTL32(x, 3) 149 #define rs2(x) SPH_ROTL32(x, 7) 150 #define rs3(x) SPH_ROTL32(x, 13) 151 #define rs4(x) SPH_ROTL32(x, 16) 152 #define rs5(x) SPH_ROTL32(x, 19) 153 #define rs6(x) SPH_ROTL32(x, 23) 154 #define rs7(x) SPH_ROTL32(x, 27) 156 #define Ks(j) SPH_T32((sph_u32)(j) * SPH_C32(0x05555555)) 158 #define add_elt_s(mf, hf, j0m, j1m, j3m, j4m, j7m, j10m, j11m, j16) \ 159 (SPH_T32(SPH_ROTL32(mf(j0m), j1m) + SPH_ROTL32(mf(j3m), j4m) \ 160 - SPH_ROTL32(mf(j10m), j11m) + Ks(j16)) ^ hf(j7m)) 162 #define expand1s_inner(qf, mf, hf, i16, \ 163 i0, i1, i2, i3, i4, i5, i6, i7, i8, \ 164 i9, i10, i11, i12, i13, i14, i15, \ 165 i0m, i1m, i3m, i4m, i7m, i10m, i11m) \ 166 SPH_T32(ss1(qf(i0)) + ss2(qf(i1)) + ss3(qf(i2)) + ss0(qf(i3)) \ 167 + ss1(qf(i4)) + ss2(qf(i5)) + ss3(qf(i6)) + ss0(qf(i7)) \ 168 + ss1(qf(i8)) + ss2(qf(i9)) + ss3(qf(i10)) + ss0(qf(i11)) \ 169 + ss1(qf(i12)) + ss2(qf(i13)) + ss3(qf(i14)) + ss0(qf(i15)) \ 170 + add_elt_s(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16)) 172 #define expand1s(qf, mf, hf, i16) \ 173 expand1s_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16) 174 #define expand1s_(qf, mf, hf, i16, ix, iy) \ 175 expand1s_inner LPAR qf, mf, hf, i16, ix, iy) 177 #define expand2s_inner(qf, mf, hf, i16, \ 178 i0, i1, i2, i3, i4, i5, i6, i7, i8, \ 179 i9, i10, i11, i12, i13, i14, i15, \ 180 i0m, i1m, i3m, i4m, i7m, i10m, i11m) \ 181 SPH_T32(qf(i0) + rs1(qf(i1)) + qf(i2) + rs2(qf(i3)) \ 182 + qf(i4) + rs3(qf(i5)) + qf(i6) + rs4(qf(i7)) \ 183 + qf(i8) + rs5(qf(i9)) + qf(i10) + rs6(qf(i11)) \ 184 + qf(i12) + rs7(qf(i13)) + ss4(qf(i14)) + ss5(qf(i15)) \ 185 + add_elt_s(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16)) 187 #define expand2s(qf, mf, hf, i16) \ 188 expand2s_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16) 189 #define expand2s_(qf, mf, hf, i16, ix, iy) \ 190 expand2s_inner LPAR qf, mf, hf, i16, ix, iy) 194 #define sb0(x) (((x) >> 1) ^ SPH_T64((x) << 3) \ 195 ^ SPH_ROTL64(x, 4) ^ SPH_ROTL64(x, 37)) 196 #define sb1(x) (((x) >> 1) ^ SPH_T64((x) << 2) \ 197 ^ SPH_ROTL64(x, 13) ^ SPH_ROTL64(x, 43)) 198 #define sb2(x) (((x) >> 2) ^ SPH_T64((x) << 1) \ 199 ^ SPH_ROTL64(x, 19) ^ SPH_ROTL64(x, 53)) 200 #define sb3(x) (((x) >> 2) ^ SPH_T64((x) << 2) \ 201 ^ SPH_ROTL64(x, 28) ^ SPH_ROTL64(x, 59)) 202 #define sb4(x) (((x) >> 1) ^ (x)) 203 #define sb5(x) (((x) >> 2) ^ (x)) 204 #define rb1(x) SPH_ROTL64(x, 5) 205 #define rb2(x) SPH_ROTL64(x, 11) 206 #define rb3(x) SPH_ROTL64(x, 27) 207 #define rb4(x) SPH_ROTL64(x, 32) 208 #define rb5(x) SPH_ROTL64(x, 37) 209 #define rb6(x) SPH_ROTL64(x, 43) 210 #define rb7(x) SPH_ROTL64(x, 53) 212 #define Kb(j) SPH_T64((sph_u64)(j) * SPH_C64(0x0555555555555555)) 214 #if SPH_SMALL_FOOTPRINT_BMW 216 static const sph_u64 Kb_tab[] = {
217 Kb(16), Kb(17), Kb(18), Kb(19), Kb(20), Kb(21), Kb(22), Kb(23),
218 Kb(24), Kb(25), Kb(26), Kb(27), Kb(28), Kb(29), Kb(30), Kb(31)
221 #define rol_off(mf, j, off) \ 222 SPH_ROTL64(mf(((j) + (off)) & 15), (((j) + (off)) & 15) + 1) 224 #define add_elt_b(mf, hf, j) \ 225 (SPH_T64(rol_off(mf, j, 0) + rol_off(mf, j, 3) \ 226 - rol_off(mf, j, 10) + Kb_tab[j]) ^ hf(((j) + 7) & 15)) 228 #define expand1b(qf, mf, hf, i) \ 229 SPH_T64(sb1(qf((i) - 16)) + sb2(qf((i) - 15)) \ 230 + sb3(qf((i) - 14)) + sb0(qf((i) - 13)) \ 231 + sb1(qf((i) - 12)) + sb2(qf((i) - 11)) \ 232 + sb3(qf((i) - 10)) + sb0(qf((i) - 9)) \ 233 + sb1(qf((i) - 8)) + sb2(qf((i) - 7)) \ 234 + sb3(qf((i) - 6)) + sb0(qf((i) - 5)) \ 235 + sb1(qf((i) - 4)) + sb2(qf((i) - 3)) \ 236 + sb3(qf((i) - 2)) + sb0(qf((i) - 1)) \ 237 + add_elt_b(mf, hf, (i) - 16)) 239 #define expand2b(qf, mf, hf, i) \ 240 SPH_T64(qf((i) - 16) + rb1(qf((i) - 15)) \ 241 + qf((i) - 14) + rb2(qf((i) - 13)) \ 242 + qf((i) - 12) + rb3(qf((i) - 11)) \ 243 + qf((i) - 10) + rb4(qf((i) - 9)) \ 244 + qf((i) - 8) + rb5(qf((i) - 7)) \ 245 + qf((i) - 6) + rb6(qf((i) - 5)) \ 246 + qf((i) - 4) + rb7(qf((i) - 3)) \ 247 + sb4(qf((i) - 2)) + sb5(qf((i) - 1)) \ 248 + add_elt_b(mf, hf, (i) - 16)) 252 #define add_elt_b(mf, hf, j0m, j1m, j3m, j4m, j7m, j10m, j11m, j16) \ 253 (SPH_T64(SPH_ROTL64(mf(j0m), j1m) + SPH_ROTL64(mf(j3m), j4m) \ 254 - SPH_ROTL64(mf(j10m), j11m) + Kb(j16)) ^ hf(j7m)) 256 #define expand1b_inner(qf, mf, hf, i16, \ 257 i0, i1, i2, i3, i4, i5, i6, i7, i8, \ 258 i9, i10, i11, i12, i13, i14, i15, \ 259 i0m, i1m, i3m, i4m, i7m, i10m, i11m) \ 260 SPH_T64(sb1(qf(i0)) + sb2(qf(i1)) + sb3(qf(i2)) + sb0(qf(i3)) \ 261 + sb1(qf(i4)) + sb2(qf(i5)) + sb3(qf(i6)) + sb0(qf(i7)) \ 262 + sb1(qf(i8)) + sb2(qf(i9)) + sb3(qf(i10)) + sb0(qf(i11)) \ 263 + sb1(qf(i12)) + sb2(qf(i13)) + sb3(qf(i14)) + sb0(qf(i15)) \ 264 + add_elt_b(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16)) 266 #define expand1b(qf, mf, hf, i16) \ 267 expand1b_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16) 268 #define expand1b_(qf, mf, hf, i16, ix, iy) \ 269 expand1b_inner LPAR qf, mf, hf, i16, ix, iy) 271 #define expand2b_inner(qf, mf, hf, i16, \ 272 i0, i1, i2, i3, i4, i5, i6, i7, i8, \ 273 i9, i10, i11, i12, i13, i14, i15, \ 274 i0m, i1m, i3m, i4m, i7m, i10m, i11m) \ 275 SPH_T64(qf(i0) + rb1(qf(i1)) + qf(i2) + rb2(qf(i3)) \ 276 + qf(i4) + rb3(qf(i5)) + qf(i6) + rb4(qf(i7)) \ 277 + qf(i8) + rb5(qf(i9)) + qf(i10) + rb6(qf(i11)) \ 278 + qf(i12) + rb7(qf(i13)) + sb4(qf(i14)) + sb5(qf(i15)) \ 279 + add_elt_b(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16)) 281 #define expand2b(qf, mf, hf, i16) \ 282 expand2b_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16) 283 #define expand2b_(qf, mf, hf, i16, ix, iy) \ 284 expand2b_inner LPAR qf, mf, hf, i16, ix, iy) 290 #define MAKE_W(tt, i0, op01, i1, op12, i2, op23, i3, op34, i4) \ 291 tt((M(i0) ^ H(i0)) op01 (M(i1) ^ H(i1)) op12 (M(i2) ^ H(i2)) \ 292 op23 (M(i3) ^ H(i3)) op34 (M(i4) ^ H(i4))) 294 #define Ws0 MAKE_W(SPH_T32, 5, -, 7, +, 10, +, 13, +, 14) 295 #define Ws1 MAKE_W(SPH_T32, 6, -, 8, +, 11, +, 14, -, 15) 296 #define Ws2 MAKE_W(SPH_T32, 0, +, 7, +, 9, -, 12, +, 15) 297 #define Ws3 MAKE_W(SPH_T32, 0, -, 1, +, 8, -, 10, +, 13) 298 #define Ws4 MAKE_W(SPH_T32, 1, +, 2, +, 9, -, 11, -, 14) 299 #define Ws5 MAKE_W(SPH_T32, 3, -, 2, +, 10, -, 12, +, 15) 300 #define Ws6 MAKE_W(SPH_T32, 4, -, 0, -, 3, -, 11, +, 13) 301 #define Ws7 MAKE_W(SPH_T32, 1, -, 4, -, 5, -, 12, -, 14) 302 #define Ws8 MAKE_W(SPH_T32, 2, -, 5, -, 6, +, 13, -, 15) 303 #define Ws9 MAKE_W(SPH_T32, 0, -, 3, +, 6, -, 7, +, 14) 304 #define Ws10 MAKE_W(SPH_T32, 8, -, 1, -, 4, -, 7, +, 15) 305 #define Ws11 MAKE_W(SPH_T32, 8, -, 0, -, 2, -, 5, +, 9) 306 #define Ws12 MAKE_W(SPH_T32, 1, +, 3, -, 6, -, 9, +, 10) 307 #define Ws13 MAKE_W(SPH_T32, 2, +, 4, +, 7, +, 10, +, 11) 308 #define Ws14 MAKE_W(SPH_T32, 3, -, 5, +, 8, -, 11, -, 12) 309 #define Ws15 MAKE_W(SPH_T32, 12, -, 4, -, 6, -, 9, +, 13) 311 #if SPH_SMALL_FOOTPRINT_BMW 313 #define MAKE_Qas do { \ 332 for (u = 0; u < 15; u += 5) { \ 333 qt[u + 0] = SPH_T32(ss0(Ws[u + 0]) + H(u + 1)); \ 334 qt[u + 1] = SPH_T32(ss1(Ws[u + 1]) + H(u + 2)); \ 335 qt[u + 2] = SPH_T32(ss2(Ws[u + 2]) + H(u + 3)); \ 336 qt[u + 3] = SPH_T32(ss3(Ws[u + 3]) + H(u + 4)); \ 337 qt[u + 4] = SPH_T32(ss4(Ws[u + 4]) + H(u + 5)); \ 339 qt[15] = SPH_T32(ss0(Ws[15]) + H(0)); \ 342 #define MAKE_Qbs do { \ 343 qt[16] = expand1s(Qs, M, H, 16); \ 344 qt[17] = expand1s(Qs, M, H, 17); \ 345 qt[18] = expand2s(Qs, M, H, 18); \ 346 qt[19] = expand2s(Qs, M, H, 19); \ 347 qt[20] = expand2s(Qs, M, H, 20); \ 348 qt[21] = expand2s(Qs, M, H, 21); \ 349 qt[22] = expand2s(Qs, M, H, 22); \ 350 qt[23] = expand2s(Qs, M, H, 23); \ 351 qt[24] = expand2s(Qs, M, H, 24); \ 352 qt[25] = expand2s(Qs, M, H, 25); \ 353 qt[26] = expand2s(Qs, M, H, 26); \ 354 qt[27] = expand2s(Qs, M, H, 27); \ 355 qt[28] = expand2s(Qs, M, H, 28); \ 356 qt[29] = expand2s(Qs, M, H, 29); \ 357 qt[30] = expand2s(Qs, M, H, 30); \ 358 qt[31] = expand2s(Qs, M, H, 31); \ 363 #define MAKE_Qas do { \ 364 qt[ 0] = SPH_T32(ss0(Ws0 ) + H( 1)); \ 365 qt[ 1] = SPH_T32(ss1(Ws1 ) + H( 2)); \ 366 qt[ 2] = SPH_T32(ss2(Ws2 ) + H( 3)); \ 367 qt[ 3] = SPH_T32(ss3(Ws3 ) + H( 4)); \ 368 qt[ 4] = SPH_T32(ss4(Ws4 ) + H( 5)); \ 369 qt[ 5] = SPH_T32(ss0(Ws5 ) + H( 6)); \ 370 qt[ 6] = SPH_T32(ss1(Ws6 ) + H( 7)); \ 371 qt[ 7] = SPH_T32(ss2(Ws7 ) + H( 8)); \ 372 qt[ 8] = SPH_T32(ss3(Ws8 ) + H( 9)); \ 373 qt[ 9] = SPH_T32(ss4(Ws9 ) + H(10)); \ 374 qt[10] = SPH_T32(ss0(Ws10) + H(11)); \ 375 qt[11] = SPH_T32(ss1(Ws11) + H(12)); \ 376 qt[12] = SPH_T32(ss2(Ws12) + H(13)); \ 377 qt[13] = SPH_T32(ss3(Ws13) + H(14)); \ 378 qt[14] = SPH_T32(ss4(Ws14) + H(15)); \ 379 qt[15] = SPH_T32(ss0(Ws15) + H( 0)); \ 382 #define MAKE_Qbs do { \ 383 qt[16] = expand1s(Qs, M, H, 16); \ 384 qt[17] = expand1s(Qs, M, H, 17); \ 385 qt[18] = expand2s(Qs, M, H, 18); \ 386 qt[19] = expand2s(Qs, M, H, 19); \ 387 qt[20] = expand2s(Qs, M, H, 20); \ 388 qt[21] = expand2s(Qs, M, H, 21); \ 389 qt[22] = expand2s(Qs, M, H, 22); \ 390 qt[23] = expand2s(Qs, M, H, 23); \ 391 qt[24] = expand2s(Qs, M, H, 24); \ 392 qt[25] = expand2s(Qs, M, H, 25); \ 393 qt[26] = expand2s(Qs, M, H, 26); \ 394 qt[27] = expand2s(Qs, M, H, 27); \ 395 qt[28] = expand2s(Qs, M, H, 28); \ 396 qt[29] = expand2s(Qs, M, H, 29); \ 397 qt[30] = expand2s(Qs, M, H, 30); \ 398 qt[31] = expand2s(Qs, M, H, 31); \ 403 #define MAKE_Qs do { \ 408 #define Qs(j) (qt[j]) 412 #define Wb0 MAKE_W(SPH_T64, 5, -, 7, +, 10, +, 13, +, 14) 413 #define Wb1 MAKE_W(SPH_T64, 6, -, 8, +, 11, +, 14, -, 15) 414 #define Wb2 MAKE_W(SPH_T64, 0, +, 7, +, 9, -, 12, +, 15) 415 #define Wb3 MAKE_W(SPH_T64, 0, -, 1, +, 8, -, 10, +, 13) 416 #define Wb4 MAKE_W(SPH_T64, 1, +, 2, +, 9, -, 11, -, 14) 417 #define Wb5 MAKE_W(SPH_T64, 3, -, 2, +, 10, -, 12, +, 15) 418 #define Wb6 MAKE_W(SPH_T64, 4, -, 0, -, 3, -, 11, +, 13) 419 #define Wb7 MAKE_W(SPH_T64, 1, -, 4, -, 5, -, 12, -, 14) 420 #define Wb8 MAKE_W(SPH_T64, 2, -, 5, -, 6, +, 13, -, 15) 421 #define Wb9 MAKE_W(SPH_T64, 0, -, 3, +, 6, -, 7, +, 14) 422 #define Wb10 MAKE_W(SPH_T64, 8, -, 1, -, 4, -, 7, +, 15) 423 #define Wb11 MAKE_W(SPH_T64, 8, -, 0, -, 2, -, 5, +, 9) 424 #define Wb12 MAKE_W(SPH_T64, 1, +, 3, -, 6, -, 9, +, 10) 425 #define Wb13 MAKE_W(SPH_T64, 2, +, 4, +, 7, +, 10, +, 11) 426 #define Wb14 MAKE_W(SPH_T64, 3, -, 5, +, 8, -, 11, -, 12) 427 #define Wb15 MAKE_W(SPH_T64, 12, -, 4, -, 6, -, 9, +, 13) 429 #if SPH_SMALL_FOOTPRINT_BMW 431 #define MAKE_Qab do { \ 450 for (u = 0; u < 15; u += 5) { \ 451 qt[u + 0] = SPH_T64(sb0(Wb[u + 0]) + H(u + 1)); \ 452 qt[u + 1] = SPH_T64(sb1(Wb[u + 1]) + H(u + 2)); \ 453 qt[u + 2] = SPH_T64(sb2(Wb[u + 2]) + H(u + 3)); \ 454 qt[u + 3] = SPH_T64(sb3(Wb[u + 3]) + H(u + 4)); \ 455 qt[u + 4] = SPH_T64(sb4(Wb[u + 4]) + H(u + 5)); \ 457 qt[15] = SPH_T64(sb0(Wb[15]) + H(0)); \ 460 #define MAKE_Qbb do { \ 462 for (u = 16; u < 18; u ++) \ 463 qt[u] = expand1b(Qb, M, H, u); \ 464 for (u = 18; u < 32; u ++) \ 465 qt[u] = expand2b(Qb, M, H, u); \ 470 #define MAKE_Qab do { \ 471 qt[ 0] = SPH_T64(sb0(Wb0 ) + H( 1)); \ 472 qt[ 1] = SPH_T64(sb1(Wb1 ) + H( 2)); \ 473 qt[ 2] = SPH_T64(sb2(Wb2 ) + H( 3)); \ 474 qt[ 3] = SPH_T64(sb3(Wb3 ) + H( 4)); \ 475 qt[ 4] = SPH_T64(sb4(Wb4 ) + H( 5)); \ 476 qt[ 5] = SPH_T64(sb0(Wb5 ) + H( 6)); \ 477 qt[ 6] = SPH_T64(sb1(Wb6 ) + H( 7)); \ 478 qt[ 7] = SPH_T64(sb2(Wb7 ) + H( 8)); \ 479 qt[ 8] = SPH_T64(sb3(Wb8 ) + H( 9)); \ 480 qt[ 9] = SPH_T64(sb4(Wb9 ) + H(10)); \ 481 qt[10] = SPH_T64(sb0(Wb10) + H(11)); \ 482 qt[11] = SPH_T64(sb1(Wb11) + H(12)); \ 483 qt[12] = SPH_T64(sb2(Wb12) + H(13)); \ 484 qt[13] = SPH_T64(sb3(Wb13) + H(14)); \ 485 qt[14] = SPH_T64(sb4(Wb14) + H(15)); \ 486 qt[15] = SPH_T64(sb0(Wb15) + H( 0)); \ 489 #define MAKE_Qbb do { \ 490 qt[16] = expand1b(Qb, M, H, 16); \ 491 qt[17] = expand1b(Qb, M, H, 17); \ 492 qt[18] = expand2b(Qb, M, H, 18); \ 493 qt[19] = expand2b(Qb, M, H, 19); \ 494 qt[20] = expand2b(Qb, M, H, 20); \ 495 qt[21] = expand2b(Qb, M, H, 21); \ 496 qt[22] = expand2b(Qb, M, H, 22); \ 497 qt[23] = expand2b(Qb, M, H, 23); \ 498 qt[24] = expand2b(Qb, M, H, 24); \ 499 qt[25] = expand2b(Qb, M, H, 25); \ 500 qt[26] = expand2b(Qb, M, H, 26); \ 501 qt[27] = expand2b(Qb, M, H, 27); \ 502 qt[28] = expand2b(Qb, M, H, 28); \ 503 qt[29] = expand2b(Qb, M, H, 29); \ 504 qt[30] = expand2b(Qb, M, H, 30); \ 505 qt[31] = expand2b(Qb, M, H, 31); \ 510 #define MAKE_Qb do { \ 515 #define Qb(j) (qt[j]) 519 #define FOLD(type, mkQ, tt, rol, mf, qf, dhf) do { \ 520 type qt[32], xl, xh; \ 522 xl = qf(16) ^ qf(17) ^ qf(18) ^ qf(19) \ 523 ^ qf(20) ^ qf(21) ^ qf(22) ^ qf(23); \ 524 xh = xl ^ qf(24) ^ qf(25) ^ qf(26) ^ qf(27) \ 525 ^ qf(28) ^ qf(29) ^ qf(30) ^ qf(31); \ 526 dhf( 0) = tt(((xh << 5) ^ (qf(16) >> 5) ^ mf( 0)) \ 527 + (xl ^ qf(24) ^ qf( 0))); \ 528 dhf( 1) = tt(((xh >> 7) ^ (qf(17) << 8) ^ mf( 1)) \ 529 + (xl ^ qf(25) ^ qf( 1))); \ 530 dhf( 2) = tt(((xh >> 5) ^ (qf(18) << 5) ^ mf( 2)) \ 531 + (xl ^ qf(26) ^ qf( 2))); \ 532 dhf( 3) = tt(((xh >> 1) ^ (qf(19) << 5) ^ mf( 3)) \ 533 + (xl ^ qf(27) ^ qf( 3))); \ 534 dhf( 4) = tt(((xh >> 3) ^ (qf(20) << 0) ^ mf( 4)) \ 535 + (xl ^ qf(28) ^ qf( 4))); \ 536 dhf( 5) = tt(((xh << 6) ^ (qf(21) >> 6) ^ mf( 5)) \ 537 + (xl ^ qf(29) ^ qf( 5))); \ 538 dhf( 6) = tt(((xh >> 4) ^ (qf(22) << 6) ^ mf( 6)) \ 539 + (xl ^ qf(30) ^ qf( 6))); \ 540 dhf( 7) = tt(((xh >> 11) ^ (qf(23) << 2) ^ mf( 7)) \ 541 + (xl ^ qf(31) ^ qf( 7))); \ 542 dhf( 8) = tt(rol(dhf(4), 9) + (xh ^ qf(24) ^ mf( 8)) \ 543 + ((xl << 8) ^ qf(23) ^ qf( 8))); \ 544 dhf( 9) = tt(rol(dhf(5), 10) + (xh ^ qf(25) ^ mf( 9)) \ 545 + ((xl >> 6) ^ qf(16) ^ qf( 9))); \ 546 dhf(10) = tt(rol(dhf(6), 11) + (xh ^ qf(26) ^ mf(10)) \ 547 + ((xl << 6) ^ qf(17) ^ qf(10))); \ 548 dhf(11) = tt(rol(dhf(7), 12) + (xh ^ qf(27) ^ mf(11)) \ 549 + ((xl << 4) ^ qf(18) ^ qf(11))); \ 550 dhf(12) = tt(rol(dhf(0), 13) + (xh ^ qf(28) ^ mf(12)) \ 551 + ((xl >> 3) ^ qf(19) ^ qf(12))); \ 552 dhf(13) = tt(rol(dhf(1), 14) + (xh ^ qf(29) ^ mf(13)) \ 553 + ((xl >> 4) ^ qf(20) ^ qf(13))); \ 554 dhf(14) = tt(rol(dhf(2), 15) + (xh ^ qf(30) ^ mf(14)) \ 555 + ((xl >> 7) ^ qf(21) ^ qf(14))); \ 556 dhf(15) = tt(rol(dhf(3), 16) + (xh ^ qf(31) ^ mf(15)) \ 557 + ((xl >> 2) ^ qf(22) ^ qf(15))); \ 560 #define FOLDs FOLD(sph_u32, MAKE_Qs, SPH_T32, SPH_ROTL32, M, Qs, dH) 564 #define FOLDb FOLD(sph_u64, MAKE_Qb, SPH_T64, SPH_ROTL64, M, Qb, dH) 572 #define M(x) sph_dec32le_aligned(data + 4 * (x)) 595 #define dH(x) (dh[x]) 638 sc->bit_count += (sph_u64)len << 3;
653 clen = (
sizeof sc->
buf) - ptr;
656 memcpy(buf + ptr, data, clen);
657 data = (
const unsigned char *)data + clen;
660 if (ptr ==
sizeof sc->
buf) {
677 void *dst,
size_t out_size_w32)
679 unsigned char *buf, *out;
687 buf[ptr ++] = ((ub & -z) | z) & 0xFF;
689 if (ptr > (
sizeof sc->
buf) - 8) {
690 memset(buf + ptr, 0, (
sizeof sc->
buf) - ptr);
695 memset(buf + ptr, 0, (
sizeof sc->
buf) - 8 - ptr);
697 sph_enc64le_aligned(buf + (
sizeof sc->
buf) - 8,
698 SPH_T64(sc->bit_count + n));
706 for (u = 0; u < 16; u ++)
710 for (u = 0, v = 16 - out_size_w32; u < out_size_w32; u ++, v ++)
717 compress_big(
const unsigned char *data,
const sph_u64 h[16], sph_u64 dh[16])
720 #define M(x) sph_dec64le_aligned(data + 8 * (x)) 724 mv[ 0] = sph_dec64le_aligned(data + 0);
725 mv[ 1] = sph_dec64le_aligned(data + 8);
726 mv[ 2] = sph_dec64le_aligned(data + 16);
727 mv[ 3] = sph_dec64le_aligned(data + 24);
728 mv[ 4] = sph_dec64le_aligned(data + 32);
729 mv[ 5] = sph_dec64le_aligned(data + 40);
730 mv[ 6] = sph_dec64le_aligned(data + 48);
731 mv[ 7] = sph_dec64le_aligned(data + 56);
732 mv[ 8] = sph_dec64le_aligned(data + 64);
733 mv[ 9] = sph_dec64le_aligned(data + 72);
734 mv[10] = sph_dec64le_aligned(data + 80);
735 mv[11] = sph_dec64le_aligned(data + 88);
736 mv[12] = sph_dec64le_aligned(data + 96);
737 mv[13] = sph_dec64le_aligned(data + 104);
738 mv[14] = sph_dec64le_aligned(data + 112);
739 mv[15] = sph_dec64le_aligned(data + 120);
743 #define dH(x) (dh[x]) 752 static const sph_u64 final_b[16] = {
753 SPH_C64(0xaaaaaaaaaaaaaaa0), SPH_C64(0xaaaaaaaaaaaaaaa1),
754 SPH_C64(0xaaaaaaaaaaaaaaa2), SPH_C64(0xaaaaaaaaaaaaaaa3),
755 SPH_C64(0xaaaaaaaaaaaaaaa4), SPH_C64(0xaaaaaaaaaaaaaaa5),
756 SPH_C64(0xaaaaaaaaaaaaaaa6), SPH_C64(0xaaaaaaaaaaaaaaa7),
757 SPH_C64(0xaaaaaaaaaaaaaaa8), SPH_C64(0xaaaaaaaaaaaaaaa9),
758 SPH_C64(0xaaaaaaaaaaaaaaaa), SPH_C64(0xaaaaaaaaaaaaaaab),
759 SPH_C64(0xaaaaaaaaaaaaaaac), SPH_C64(0xaaaaaaaaaaaaaaad),
760 SPH_C64(0xaaaaaaaaaaaaaaae), SPH_C64(0xaaaaaaaaaaaaaaaf)
764 bmw64_init(sph_bmw_big_context *sc,
const sph_u64 *iv)
766 memcpy(sc->H, iv,
sizeof sc->H);
772 bmw64(sph_bmw_big_context *sc,
const void *data,
size_t len)
779 sc->bit_count += (sph_u64)len << 3;
787 clen = (
sizeof sc->buf) - ptr;
790 memcpy(buf + ptr, data, clen);
791 data = (
const unsigned char *)data + clen;
794 if (ptr ==
sizeof sc->buf) {
806 memcpy(sc->H, h1,
sizeof sc->H);
810 bmw64_close(sph_bmw_big_context *sc,
unsigned ub,
unsigned n,
811 void *dst,
size_t out_size_w64)
813 unsigned char *buf, *out;
816 sph_u64 h1[16], h2[16], *h;
821 buf[ptr ++] = ((ub & -z) | z) & 0xFF;
823 if (ptr > (
sizeof sc->buf) - 8) {
824 memset(buf + ptr, 0, (
sizeof sc->buf) - ptr);
829 memset(buf + ptr, 0, (
sizeof sc->buf) - 8 - ptr);
830 sph_enc64le_aligned(buf + (
sizeof sc->buf) - 8,
831 SPH_T64(sc->bit_count + n));
833 for (u = 0; u < 16; u ++)
834 sph_enc64le_aligned(buf + 8 * u, h2[u]);
837 for (u = 0, v = 16 - out_size_w64; u < out_size_w64; u ++, v ++)
838 sph_enc64le(out + 8 * u, h1[v]);
854 bmw32(cc, data, len);
883 bmw32(cc, data, len);
905 sph_bmw384_init(
void *cc)
907 bmw64_init(cc,
IV384);
912 sph_bmw384(
void *cc,
const void *data,
size_t len)
914 bmw64(cc, data, len);
919 sph_bmw384_close(
void *cc,
void *dst)
921 sph_bmw384_addbits_and_close(cc, 0, 0, dst);
926 sph_bmw384_addbits_and_close(
void *cc,
unsigned ub,
unsigned n,
void *dst)
928 bmw64_close(cc, ub, n, dst, 6);
934 sph_bmw512_init(
void *cc)
936 bmw64_init(cc,
IV512);
941 sph_bmw512(
void *cc,
const void *data,
size_t len)
943 bmw64(cc, data, len);
948 sph_bmw512_close(
void *cc,
void *dst)
950 sph_bmw512_addbits_and_close(cc, 0, 0, dst);
955 sph_bmw512_addbits_and_close(
void *cc,
unsigned ub,
unsigned n,
void *dst)
957 bmw64_close(cc, ub, n, dst, 8);
void sph_bmw256(void *cc, const void *data, size_t len)
Process some data bytes.
static const sph_u32 IV384[]
static SPH_INLINE void sph_enc32le_aligned(void *dst, sph_u32 val)
Encode a 32-bit value into the provided buffer (little endian convention).
static SPH_INLINE sph_u32 sph_dec32le_aligned(const void *src)
Decode a 32-bit value from the provided buffer (little endian convention).
static const sph_u32 final_s[16]
static void bmw32(sph_bmw_small_context *sc, const void *data, size_t len)
static void compress_big(sph_simd_big_context *sc, int last)
static void compress_small(const unsigned char *data, const sph_u32 h[16], sph_u32 dh[16])
void sph_bmw224(void *cc, const void *data, size_t len)
Process some data bytes.
void sph_bmw224_init(void *cc)
Initialize a BMW-224 context.
static void bmw32_init(sph_bmw_small_context *sc, const sph_u32 *iv)
void sph_bmw256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
Add a few additional bits (0 to 7) to the current computation, then terminate it and output the resul...
static SPH_INLINE void sph_enc32le(void *dst, sph_u32 val)
Encode a 32-bit value into the provided buffer (little endian convention).
static void bmw32_close(sph_bmw_small_context *sc, unsigned ub, unsigned n, void *dst, size_t out_size_w32)
void sph_bmw224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
Add a few additional bits (0 to 7) to the current computation, then terminate it and output the resul...
void sph_bmw256_init(void *cc)
Initialize a BMW-256 context.
void sph_bmw224_close(void *cc, void *dst)
Terminate the current BMW-224 computation and output the result into the provided buffer...
static const sph_u32 IV512[]
static const sph_u32 IV256[]
void sph_bmw256_close(void *cc, void *dst)
Terminate the current BMW-256 computation and output the result into the provided buffer...
void * memcpy(void *a, const void *b, size_t c)
static const sph_u32 IV224[]
This structure is a context for BMW-224 and BMW-256 computations: it contains the intermediate values...