MINOR: ist: add ist_find_ctl()
This new function looks for the first control character in a string (a
char whose value is between 0x00 and 0x1F included) and returns it, or
NULL if there is none. It is optimized for quickly evicting non-matching
strings and scans ~0.43 bytes per cycle. It can be used as an accelerator
when it's needed to look up several of these characters (e.g. CR/LF/NUL).
diff --git a/include/common/ist.h b/include/common/ist.h
index 32a11ed..10c0b6c 100644
--- a/include/common/ist.h
+++ b/include/common/ist.h
@@ -606,6 +606,47 @@
return s - 1;
}
+/* Returns a pointer to the first control character found in <ist>, or NULL if
+ * none is present. A control character is defined as a byte whose value is
+ * between 0x00 and 0x1F included. The function is optimized for strings having
+ * no CTL chars by processing up to sizeof(long) bytes at once on architectures
+ * supporting efficient unaligned accesses. Despite this it is not very fast
+ * (~0.43 byte/cycle) and should mostly be used on low match probability when
+ * it can save a call to a much slower function.
+ */
+static inline const char *ist_find_ctl(const struct ist ist)
+{
+ const union { unsigned long v; } __attribute__((packed)) *u;
+ const char *curr = (void *)ist.ptr - sizeof(long);
+ const char *last = curr + ist.len;
+ unsigned long l1, l2;
+
+ do {
+ curr += sizeof(long);
+ if (curr > last)
+ break;
+ u = (void *)curr;
+ /* subtract 0x202020...20 to the value to generate a carry in
+ * the lower byte if the byte contains a lower value. If we
+ * generate a bit 7 that was not there, it means the byte was
+ * within 0x00..0x1F.
+ */
+ l2 = u->v;
+ l1 = ~l2 & ((~0UL / 255) * 0x80); /* 0x808080...80 */
+ l2 -= (~0UL / 255) * 0x20; /* 0x202020...20 */
+ } while ((l1 & l2) == 0);
+
+ last += sizeof(long);
+ if (__builtin_expect(curr < last, 0)) {
+ do {
+ if ((uint8_t)*curr < 0x20)
+ return curr;
+ curr++;
+ } while (curr < last);
+ }
+ return NULL;
+}
+
/* looks for first occurrence of character <chr> in string <ist> and returns
* the tail of the string starting with this character, or (ist.end,0) if not
* found.