MINOR: tools: add the ability to update a word fingerprint
Instead of making a new one from scratch, let's support not wiping the
existing fingerprint and updating it, and to do the same char by char.
The word-by-word one will still result in multiple beginnings and ends,
but that will accurately translate word boundaries. The char-based one
has more flexibility and requires that the caller maintains the previous
char to indicate the transition, which also allows to insert delimiters
for example.
diff --git a/include/haproxy/tools.h b/include/haproxy/tools.h
index 56948ea..901dca0 100644
--- a/include/haproxy/tools.h
+++ b/include/haproxy/tools.h
@@ -865,6 +865,7 @@
char *env_expand(char *in);
uint32_t parse_line(char *in, char *out, size_t *outlen, char **args, int *nbargs, uint32_t opts, char **errptr);
size_t sanitize_for_printing(char *line, size_t pos, size_t width);
+void update_word_fingerprint(uint8_t *fp, const char *word);
void make_word_fingerprint(uint8_t *fp, const char *word);
int word_fingerprint_distance(const uint8_t *fp1, const uint8_t *fp2);
@@ -1072,5 +1073,33 @@
return statistical_prng_state = x;
}
+/* Update array <fp> with the character transition <prev> to <curr>. If <prev>
+ * is zero, it's assumed that <curr> is the first character. If <curr> is zero
+ * its assumed to mark the end. Both may be zero. <fp> is a 1024-entries array
+ * indexed as 32*from+to. Positions for 'from' and 'to' are:
+ * 0..25=letter, 26=digit, 27=other, 28=begin, 29=end, others unused.
+ */
+static inline void update_char_fingerprint(uint8_t *fp, char prev, char curr)
+{
+ int from, to;
+
+ switch (prev) {
+ case 0: from = 26; break; // begin
+ case 'a'...'z': from = prev - 'a'; break;
+ case 'A'...'Z': from = tolower(prev) - 'a'; break;
+ case '0'...'9': from = 26; break;
+ default: from = 27; break;
+ }
+
+ switch (curr) {
+ case 0: to = 28; break; // end
+ case 'a'...'z': to = curr - 'a'; break;
+ case 'A'...'Z': to = tolower(curr) - 'a'; break;
+ case '0'...'9': to = 26; break;
+ default: to = 27; break;
+ }
+
+ fp[32 * from + to]++;
+}
#endif /* _HAPROXY_TOOLS_H */
diff --git a/src/tools.c b/src/tools.c
index 0fd3ede..1255e74 100644
--- a/src/tools.c
+++ b/src/tools.c
@@ -5369,18 +5369,17 @@
return pos - shift;
}
-/* Initialize array <fp> with the fingerprint of word <word> by counting the
+/* Update array <fp> with the fingerprint of word <word> by counting the
* transitions between characters. <fp> is a 1024-entries array indexed as
* 32*from+to. Positions for 'from' and 'to' are:
* 0..25=letter, 26=digit, 27=other, 28=begin, 29=end, others unused.
*/
-void make_word_fingerprint(uint8_t *fp, const char *word)
+void update_word_fingerprint(uint8_t *fp, const char *word)
{
const char *p;
int from, to;
int c;
- memset(fp, 0, 1024);
from = 28; // begin
for (p = word; *p; p++) {
c = tolower(*p);
@@ -5397,6 +5396,17 @@
fp[32 * from + to]++;
}
+/* Initialize array <fp> with the fingerprint of word <word> by counting the
+ * transitions between characters. <fp> is a 1024-entries array indexed as
+ * 32*from+to. Positions for 'from' and 'to' are:
+ * 0..25=letter, 26=digit, 27=other, 28=begin, 29=end, others unused.
+ */
+void make_word_fingerprint(uint8_t *fp, const char *word)
+{
+ memset(fp, 0, 1024);
+ update_word_fingerprint(fp, word);
+}
+
/* Return the distance between two word fingerprints created by function
* make_word_fingerprint(). It's a positive integer calculated as the sum of
* the squares of the differences between each location.