MINOR: tools: add simple word fingerprinting to find similar-looking words

This introduces two functions, one which creates a fingerprint of a word,
and one which computes a distance between two words fingerprints. The
fingerprint is made by counting the transitions between one character and
another one. Here we consider the 26 alphabetic letters regardless of
their case, then any digit as a digit, and anything else as "other". We
also consider the first and last locations as transitions from begin to
first char, and last char to end. The distance is simply the sum of the
squares of the differences between two fingerprints. This way, doubling/
missing a letter has the same cost, however some repeated transitions
such as "e"->"r" like in "server" are very unlikely to match against
situations where they do not exist. This is a naive approach but it seems
to work sufficiently well for now. It may be refined in the future if
needed.
diff --git a/src/tools.c b/src/tools.c
index d3ee426..0fd3ede 100644
--- a/src/tools.c
+++ b/src/tools.c
@@ -5369,6 +5369,49 @@
 	return pos - shift;
 }
 
+/* Initialize array <fp> with the fingerprint of word <word> by counting the
+ * transitions between characters. <fp> is a 1024-entries array indexed as
+ * 32*from+to. Positions for 'from' and 'to' are:
+ *   0..25=letter, 26=digit, 27=other, 28=begin, 29=end, others unused.
+ */
+void make_word_fingerprint(uint8_t *fp, const char *word)
+{
+	const char *p;
+	int from, to;
+	int c;
+
+	memset(fp, 0, 1024);
+	from = 28; // begin
+	for (p = word; *p; p++) {
+		c = tolower(*p);
+		switch(c) {
+		case 'a'...'z': to = c - 'a'; break;
+		case 'A'...'Z': to = tolower(c) - 'a'; break;
+		case '0'...'9': to = 26; break;
+		default: to = 27; break;
+		}
+		fp[32 * from + to]++;
+		from = to;
+	}
+	to = 28; // end
+	fp[32 * from + to]++;
+}
+
+/* Return the distance between two word fingerprints created by function
+ * make_word_fingerprint(). It's a positive integer calculated as the sum of
+ * the squares of the differences between each location.
+ */
+int word_fingerprint_distance(const uint8_t *fp1, const uint8_t *fp2)
+{
+	int i, k, dist = 0;
+
+	for (i = 0; i < 1024; i++) {
+		k = (int)fp1[i] - (int)fp2[i];
+		dist += k * k;
+	}
+	return dist;
+}
+
 static int init_tools_per_thread()
 {
 	/* Let's make each thread start from a different position */