Willy Tarreau | aea4635 | 2020-06-01 11:48:35 +0200 | [diff] [blame] | 1 | /* |
| 2 | * include/haproxy/intops.h |
| 3 | * Functions for integer operations. |
| 4 | * |
| 5 | * Copyright (C) 2020 Willy Tarreau - w@1wt.eu |
| 6 | * |
| 7 | * This library is free software; you can redistribute it and/or |
| 8 | * modify it under the terms of the GNU Lesser General Public |
| 9 | * License as published by the Free Software Foundation, version 2.1 |
| 10 | * exclusively. |
| 11 | * |
| 12 | * This library is distributed in the hope that it will be useful, |
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 15 | * Lesser General Public License for more details. |
| 16 | * |
| 17 | * You should have received a copy of the GNU Lesser General Public |
| 18 | * License along with this library; if not, write to the Free Software |
| 19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 20 | */ |
| 21 | |
| 22 | #ifndef _HAPROXY_INTOPS_H |
| 23 | #define _HAPROXY_INTOPS_H |
| 24 | |
| 25 | #include <haproxy/api.h> |
| 26 | |
| 27 | /* exported functions, mostly integer parsing */ |
| 28 | /* rounds <i> down to the closest value having max 2 digits */ |
| 29 | unsigned int round_2dig(unsigned int i); |
| 30 | unsigned int full_hash(unsigned int a); |
| 31 | int varint_bytes(uint64_t v); |
| 32 | unsigned int read_uint(const char **s, const char *end); |
| 33 | long long read_int64(const char **s, const char *end); |
| 34 | unsigned long long read_uint64(const char **s, const char *end); |
| 35 | unsigned int str2ui(const char *s); |
| 36 | unsigned int str2uic(const char *s); |
| 37 | unsigned int strl2ui(const char *s, int len); |
| 38 | unsigned int strl2uic(const char *s, int len); |
| 39 | int strl2ic(const char *s, int len); |
| 40 | int strl2irc(const char *s, int len, int *ret); |
| 41 | int strl2llrc(const char *s, int len, long long *ret); |
| 42 | int strl2llrc_dotted(const char *text, int len, long long *ret); |
| 43 | unsigned int mask_find_rank_bit(unsigned int r, unsigned long m); |
| 44 | unsigned int mask_find_rank_bit_fast(unsigned int r, unsigned long m, |
| 45 | unsigned long a, unsigned long b, |
| 46 | unsigned long c, unsigned long d); |
| 47 | void mask_prep_rank_map(unsigned long m, |
| 48 | unsigned long *a, unsigned long *b, |
| 49 | unsigned long *c, unsigned long *d); |
| 50 | |
| 51 | |
| 52 | /* Multiply the two 32-bit operands and shift the 64-bit result right 32 bits. |
| 53 | * This is used to compute fixed ratios by setting one of the operands to |
| 54 | * (2^32*ratio). |
| 55 | */ |
| 56 | static inline unsigned int mul32hi(unsigned int a, unsigned int b) |
| 57 | { |
Willy Tarreau | e66ee1a | 2021-02-09 17:10:54 +0100 | [diff] [blame] | 58 | return ((unsigned long long)a * b + a) >> 32; |
Willy Tarreau | aea4635 | 2020-06-01 11:48:35 +0200 | [diff] [blame] | 59 | } |
| 60 | |
| 61 | /* gcc does not know when it can safely divide 64 bits by 32 bits. Use this |
| 62 | * function when you know for sure that the result fits in 32 bits, because |
| 63 | * it is optimal on x86 and on 64bit processors. |
| 64 | */ |
| 65 | static inline unsigned int div64_32(unsigned long long o1, unsigned int o2) |
| 66 | { |
| 67 | unsigned long long result; |
| 68 | #ifdef __i386__ |
| 69 | asm("divl %2" |
| 70 | : "=A" (result) |
| 71 | : "A"(o1), "rm"(o2)); |
| 72 | #else |
| 73 | result = o1 / o2; |
| 74 | #endif |
| 75 | return result; |
| 76 | } |
| 77 | |
| 78 | /* rotate left a 64-bit integer by <bits:[0-5]> bits */ |
| 79 | static inline uint64_t rotl64(uint64_t v, uint8_t bits) |
| 80 | { |
| 81 | #if !defined(__ARM_ARCH_8A) && !defined(__x86_64__) |
| 82 | bits &= 63; |
| 83 | #endif |
| 84 | v = (v << bits) | (v >> (-bits & 63)); |
| 85 | return v; |
| 86 | } |
| 87 | |
| 88 | /* rotate right a 64-bit integer by <bits:[0-5]> bits */ |
| 89 | static inline uint64_t rotr64(uint64_t v, uint8_t bits) |
| 90 | { |
| 91 | #if !defined(__ARM_ARCH_8A) && !defined(__x86_64__) |
| 92 | bits &= 63; |
| 93 | #endif |
| 94 | v = (v >> bits) | (v << (-bits & 63)); |
| 95 | return v; |
| 96 | } |
| 97 | |
| 98 | /* Simple popcountl implementation. It returns the number of ones in a word. |
| 99 | * Described here : https://graphics.stanford.edu/~seander/bithacks.html |
| 100 | */ |
| 101 | static inline unsigned int my_popcountl(unsigned long a) |
| 102 | { |
| 103 | a = a - ((a >> 1) & ~0UL/3); |
| 104 | a = (a & ~0UL/15*3) + ((a >> 2) & ~0UL/15*3); |
| 105 | a = (a + (a >> 4)) & ~0UL/255*15; |
| 106 | return (unsigned long)(a * (~0UL/255)) >> (sizeof(unsigned long) - 1) * 8; |
| 107 | } |
| 108 | |
| 109 | /* returns non-zero if <a> has at least 2 bits set */ |
| 110 | static inline unsigned long atleast2(unsigned long a) |
| 111 | { |
| 112 | return a & (a - 1); |
| 113 | } |
| 114 | |
| 115 | /* Simple ffs implementation. It returns the position of the lowest bit set to |
| 116 | * one, starting at 1. It is illegal to call it with a==0 (undefined result). |
| 117 | */ |
| 118 | static inline unsigned int my_ffsl(unsigned long a) |
| 119 | { |
| 120 | unsigned long cnt; |
| 121 | |
| 122 | #if defined(__x86_64__) |
| 123 | __asm__("bsf %1,%0\n" : "=r" (cnt) : "rm" (a)); |
| 124 | cnt++; |
| 125 | #else |
| 126 | |
| 127 | cnt = 1; |
| 128 | #if LONG_MAX > 0x7FFFFFFFL /* 64bits */ |
| 129 | if (!(a & 0xFFFFFFFFUL)) { |
| 130 | a >>= 32; |
| 131 | cnt += 32; |
| 132 | } |
| 133 | #endif |
| 134 | if (!(a & 0XFFFFU)) { |
| 135 | a >>= 16; |
| 136 | cnt += 16; |
| 137 | } |
| 138 | if (!(a & 0XFF)) { |
| 139 | a >>= 8; |
| 140 | cnt += 8; |
| 141 | } |
| 142 | if (!(a & 0xf)) { |
| 143 | a >>= 4; |
| 144 | cnt += 4; |
| 145 | } |
| 146 | if (!(a & 0x3)) { |
| 147 | a >>= 2; |
| 148 | cnt += 2; |
| 149 | } |
| 150 | if (!(a & 0x1)) { |
| 151 | cnt += 1; |
| 152 | } |
| 153 | #endif /* x86_64 */ |
| 154 | |
| 155 | return cnt; |
| 156 | } |
| 157 | |
| 158 | /* Simple fls implementation. It returns the position of the highest bit set to |
| 159 | * one, starting at 1. It is illegal to call it with a==0 (undefined result). |
| 160 | */ |
| 161 | static inline unsigned int my_flsl(unsigned long a) |
| 162 | { |
| 163 | unsigned long cnt; |
| 164 | |
| 165 | #if defined(__x86_64__) |
| 166 | __asm__("bsr %1,%0\n" : "=r" (cnt) : "rm" (a)); |
| 167 | cnt++; |
| 168 | #else |
| 169 | |
| 170 | cnt = 1; |
| 171 | #if LONG_MAX > 0x7FFFFFFFUL /* 64bits */ |
| 172 | if (a & 0xFFFFFFFF00000000UL) { |
| 173 | a >>= 32; |
| 174 | cnt += 32; |
| 175 | } |
| 176 | #endif |
| 177 | if (a & 0XFFFF0000U) { |
| 178 | a >>= 16; |
| 179 | cnt += 16; |
| 180 | } |
| 181 | if (a & 0XFF00) { |
| 182 | a >>= 8; |
| 183 | cnt += 8; |
| 184 | } |
| 185 | if (a & 0xf0) { |
| 186 | a >>= 4; |
| 187 | cnt += 4; |
| 188 | } |
| 189 | if (a & 0xc) { |
| 190 | a >>= 2; |
| 191 | cnt += 2; |
| 192 | } |
| 193 | if (a & 0x2) { |
| 194 | cnt += 1; |
| 195 | } |
| 196 | #endif /* x86_64 */ |
| 197 | |
| 198 | return cnt; |
| 199 | } |
| 200 | |
| 201 | /* Build a word with the <bits> lower bits set (reverse of my_popcountl) */ |
| 202 | static inline unsigned long nbits(int bits) |
| 203 | { |
| 204 | if (--bits < 0) |
| 205 | return 0; |
| 206 | else |
| 207 | return (2UL << bits) - 1; |
| 208 | } |
| 209 | |
| 210 | /* Turns 64-bit value <a> from host byte order to network byte order. |
| 211 | * The principle consists in letting the compiler detect we're playing |
| 212 | * with a union and simplify most or all operations. The asm-optimized |
| 213 | * htonl() version involving bswap (x86) / rev (arm) / other is a single |
| 214 | * operation on little endian, or a NOP on big-endian. In both cases, |
| 215 | * this lets the compiler "see" that we're rebuilding a 64-bit word from |
| 216 | * two 32-bit quantities that fit into a 32-bit register. In big endian, |
| 217 | * the whole code is optimized out. In little endian, with a decent compiler, |
| 218 | * a few bswap and 2 shifts are left, which is the minimum acceptable. |
| 219 | */ |
| 220 | static inline unsigned long long my_htonll(unsigned long long a) |
| 221 | { |
| 222 | #if defined(__x86_64__) |
Willy Tarreau | d966f14 | 2020-09-10 09:31:50 +0200 | [diff] [blame] | 223 | __asm__ volatile("bswapq %0" : "=r"(a) : "0"(a)); |
Willy Tarreau | aea4635 | 2020-06-01 11:48:35 +0200 | [diff] [blame] | 224 | return a; |
| 225 | #else |
| 226 | union { |
| 227 | struct { |
| 228 | unsigned int w1; |
| 229 | unsigned int w2; |
| 230 | } by32; |
| 231 | unsigned long long by64; |
| 232 | } w = { .by64 = a }; |
| 233 | return ((unsigned long long)htonl(w.by32.w1) << 32) | htonl(w.by32.w2); |
| 234 | #endif |
| 235 | } |
| 236 | |
| 237 | /* Turns 64-bit value <a> from network byte order to host byte order. */ |
| 238 | static inline unsigned long long my_ntohll(unsigned long long a) |
| 239 | { |
| 240 | return my_htonll(a); |
| 241 | } |
| 242 | |
| 243 | /* sets bit <bit> into map <map>, which must be long-aligned */ |
| 244 | static inline void ha_bit_set(unsigned long bit, long *map) |
| 245 | { |
| 246 | map[bit / (8 * sizeof(*map))] |= 1UL << (bit & (8 * sizeof(*map) - 1)); |
| 247 | } |
| 248 | |
| 249 | /* clears bit <bit> from map <map>, which must be long-aligned */ |
| 250 | static inline void ha_bit_clr(unsigned long bit, long *map) |
| 251 | { |
| 252 | map[bit / (8 * sizeof(*map))] &= ~(1UL << (bit & (8 * sizeof(*map) - 1))); |
| 253 | } |
| 254 | |
| 255 | /* flips bit <bit> from map <map>, which must be long-aligned */ |
| 256 | static inline void ha_bit_flip(unsigned long bit, long *map) |
| 257 | { |
| 258 | map[bit / (8 * sizeof(*map))] ^= 1UL << (bit & (8 * sizeof(*map) - 1)); |
| 259 | } |
| 260 | |
| 261 | /* returns non-zero if bit <bit> from map <map> is set, otherwise 0 */ |
| 262 | static inline int ha_bit_test(unsigned long bit, const long *map) |
| 263 | { |
| 264 | return !!(map[bit / (8 * sizeof(*map))] & 1UL << (bit & (8 * sizeof(*map) - 1))); |
| 265 | } |
| 266 | |
| 267 | /* hash a 32-bit integer to another 32-bit integer. This code may be large when |
| 268 | * inlined, use full_hash() instead. |
| 269 | */ |
| 270 | static inline unsigned int __full_hash(unsigned int a) |
| 271 | { |
| 272 | /* This function is one of Bob Jenkins' full avalanche hashing |
| 273 | * functions, which when provides quite a good distribution for little |
| 274 | * input variations. The result is quite suited to fit over a 32-bit |
| 275 | * space with enough variations so that a randomly picked number falls |
| 276 | * equally before any server position. |
| 277 | * Check http://burtleburtle.net/bob/hash/integer.html for more info. |
| 278 | */ |
| 279 | a = (a+0x7ed55d16) + (a<<12); |
| 280 | a = (a^0xc761c23c) ^ (a>>19); |
| 281 | a = (a+0x165667b1) + (a<<5); |
| 282 | a = (a+0xd3a2646c) ^ (a<<9); |
| 283 | a = (a+0xfd7046c5) + (a<<3); |
| 284 | a = (a^0xb55a4f09) ^ (a>>16); |
| 285 | |
| 286 | /* ensure values are better spread all around the tree by multiplying |
| 287 | * by a large prime close to 3/4 of the tree. |
| 288 | */ |
| 289 | return a * 3221225473U; |
| 290 | } |
| 291 | |
| 292 | /* |
| 293 | * Return integer equivalent of character <c> for a hex digit (0-9, a-f, A-F), |
| 294 | * otherwise -1. This compact form helps gcc produce efficient code. |
| 295 | */ |
| 296 | static inline int hex2i(int c) |
| 297 | { |
| 298 | if ((unsigned char)(c -= '0') > 9) { |
| 299 | if ((unsigned char)(c -= 'A' - '0') > 5 && |
| 300 | (unsigned char)(c -= 'a' - 'A') > 5) |
| 301 | c = -11; |
| 302 | c += 10; |
| 303 | } |
| 304 | return c; |
| 305 | } |
| 306 | |
| 307 | /* This one is 6 times faster than strtoul() on athlon, but does |
| 308 | * no check at all. |
| 309 | */ |
| 310 | static inline unsigned int __str2ui(const char *s) |
| 311 | { |
| 312 | unsigned int i = 0; |
| 313 | while (*s) { |
| 314 | i = i * 10 - '0'; |
| 315 | i += (unsigned char)*s++; |
| 316 | } |
| 317 | return i; |
| 318 | } |
| 319 | |
| 320 | /* This one is 5 times faster than strtoul() on athlon with checks. |
| 321 | * It returns the value of the number composed of all valid digits read. |
| 322 | */ |
| 323 | static inline unsigned int __str2uic(const char *s) |
| 324 | { |
| 325 | unsigned int i = 0; |
| 326 | unsigned int j; |
| 327 | |
| 328 | while (1) { |
| 329 | j = (*s++) - '0'; |
| 330 | if (j > 9) |
| 331 | break; |
| 332 | i *= 10; |
| 333 | i += j; |
| 334 | } |
| 335 | return i; |
| 336 | } |
| 337 | |
| 338 | /* This one is 28 times faster than strtoul() on athlon, but does |
| 339 | * no check at all! |
| 340 | */ |
| 341 | static inline unsigned int __strl2ui(const char *s, int len) |
| 342 | { |
| 343 | unsigned int i = 0; |
| 344 | |
| 345 | while (len-- > 0) { |
| 346 | i = i * 10 - '0'; |
| 347 | i += (unsigned char)*s++; |
| 348 | } |
| 349 | return i; |
| 350 | } |
| 351 | |
| 352 | /* This one is 7 times faster than strtoul() on athlon with checks. |
| 353 | * It returns the value of the number composed of all valid digits read. |
| 354 | */ |
| 355 | static inline unsigned int __strl2uic(const char *s, int len) |
| 356 | { |
| 357 | unsigned int i = 0; |
| 358 | unsigned int j, k; |
| 359 | |
| 360 | while (len-- > 0) { |
| 361 | j = (*s++) - '0'; |
| 362 | k = i * 10; |
| 363 | if (j > 9) |
| 364 | break; |
| 365 | i = k + j; |
| 366 | } |
| 367 | return i; |
| 368 | } |
| 369 | |
| 370 | /* This function reads an unsigned integer from the string pointed to by <s> |
| 371 | * and returns it. The <s> pointer is adjusted to point to the first unread |
| 372 | * char. The function automatically stops at <end>. |
| 373 | */ |
| 374 | static inline unsigned int __read_uint(const char **s, const char *end) |
| 375 | { |
| 376 | const char *ptr = *s; |
| 377 | unsigned int i = 0; |
| 378 | unsigned int j, k; |
| 379 | |
| 380 | while (ptr < end) { |
| 381 | j = *ptr - '0'; |
| 382 | k = i * 10; |
| 383 | if (j > 9) |
| 384 | break; |
| 385 | i = k + j; |
| 386 | ptr++; |
| 387 | } |
| 388 | *s = ptr; |
| 389 | return i; |
| 390 | } |
| 391 | |
| 392 | /* returns the number of bytes needed to encode <v> as a varint. Be careful, use |
| 393 | * it only with constants as it generates a large code (typ. 180 bytes). Use the |
| 394 | * varint_bytes() version instead in case of doubt. |
| 395 | */ |
| 396 | static inline int __varint_bytes(uint64_t v) |
| 397 | { |
| 398 | switch (v) { |
Willy Tarreau | 4a0ae22 | 2022-01-28 09:39:24 +0100 | [diff] [blame] | 399 | case 0x0000000000000000ULL ... 0x00000000000000efULL: return 1; |
| 400 | case 0x00000000000000f0ULL ... 0x00000000000008efULL: return 2; |
| 401 | case 0x00000000000008f0ULL ... 0x00000000000408efULL: return 3; |
| 402 | case 0x00000000000408f0ULL ... 0x00000000020408efULL: return 4; |
| 403 | case 0x00000000020408f0ULL ... 0x00000001020408efULL: return 5; |
| 404 | case 0x00000001020408f0ULL ... 0x00000081020408efULL: return 6; |
| 405 | case 0x00000081020408f0ULL ... 0x00004081020408efULL: return 7; |
| 406 | case 0x00004081020408f0ULL ... 0x00204081020408efULL: return 8; |
| 407 | case 0x00204081020408f0ULL ... 0x10204081020408efULL: return 9; |
Willy Tarreau | aea4635 | 2020-06-01 11:48:35 +0200 | [diff] [blame] | 408 | default: return 10; |
| 409 | } |
| 410 | } |
| 411 | |
| 412 | /* Encode the integer <i> into a varint (variable-length integer). The encoded |
| 413 | * value is copied in <*buf>. Here is the encoding format: |
| 414 | * |
| 415 | * 0 <= X < 240 : 1 byte (7.875 bits) [ XXXX XXXX ] |
| 416 | * 240 <= X < 2288 : 2 bytes (11 bits) [ 1111 XXXX ] [ 0XXX XXXX ] |
| 417 | * 2288 <= X < 264432 : 3 bytes (18 bits) [ 1111 XXXX ] [ 1XXX XXXX ] [ 0XXX XXXX ] |
| 418 | * 264432 <= X < 33818864 : 4 bytes (25 bits) [ 1111 XXXX ] [ 1XXX XXXX ]*2 [ 0XXX XXXX ] |
| 419 | * 33818864 <= X < 4328786160 : 5 bytes (32 bits) [ 1111 XXXX ] [ 1XXX XXXX ]*3 [ 0XXX XXXX ] |
| 420 | * ... |
| 421 | * |
| 422 | * On success, it returns the number of written bytes and <*buf> is moved after |
| 423 | * the encoded value. Otherwise, it returns -1. */ |
| 424 | static inline int encode_varint(uint64_t i, char **buf, char *end) |
| 425 | { |
| 426 | unsigned char *p = (unsigned char *)*buf; |
| 427 | int r; |
| 428 | |
| 429 | if (p >= (unsigned char *)end) |
| 430 | return -1; |
| 431 | |
| 432 | if (i < 240) { |
| 433 | *p++ = i; |
| 434 | *buf = (char *)p; |
| 435 | return 1; |
| 436 | } |
| 437 | |
| 438 | *p++ = (unsigned char)i | 240; |
| 439 | i = (i - 240) >> 4; |
| 440 | while (i >= 128) { |
| 441 | if (p >= (unsigned char *)end) |
| 442 | return -1; |
| 443 | *p++ = (unsigned char)i | 128; |
| 444 | i = (i - 128) >> 7; |
| 445 | } |
| 446 | |
| 447 | if (p >= (unsigned char *)end) |
| 448 | return -1; |
| 449 | *p++ = (unsigned char)i; |
| 450 | |
| 451 | r = ((char *)p - *buf); |
| 452 | *buf = (char *)p; |
| 453 | return r; |
| 454 | } |
| 455 | |
| 456 | /* Decode a varint from <*buf> and save the decoded value in <*i>. See |
| 457 | * 'spoe_encode_varint' for details about varint. |
| 458 | * On success, it returns the number of read bytes and <*buf> is moved after the |
| 459 | * varint. Otherwise, it returns -1. */ |
| 460 | static inline int decode_varint(char **buf, char *end, uint64_t *i) |
| 461 | { |
| 462 | unsigned char *p = (unsigned char *)*buf; |
| 463 | int r; |
| 464 | |
| 465 | if (p >= (unsigned char *)end) |
| 466 | return -1; |
| 467 | |
| 468 | *i = *p++; |
| 469 | if (*i < 240) { |
| 470 | *buf = (char *)p; |
| 471 | return 1; |
| 472 | } |
| 473 | |
| 474 | r = 4; |
| 475 | do { |
| 476 | if (p >= (unsigned char *)end) |
| 477 | return -1; |
| 478 | *i += (uint64_t)*p << r; |
| 479 | r += 7; |
| 480 | } while (*p++ >= 128); |
| 481 | |
| 482 | r = ((char *)p - *buf); |
| 483 | *buf = (char *)p; |
| 484 | return r; |
| 485 | } |
| 486 | |
| 487 | #endif /* _HAPROXY_INTOPS_H */ |
| 488 | |
| 489 | /* |
| 490 | * Local variables: |
| 491 | * c-indent-level: 8 |
| 492 | * c-basic-offset: 8 |
| 493 | * End: |
| 494 | */ |