MINOR: tools: improve word fingerprinting by counting presence

The distance between two words can be high due to a sub-word being missing
and in this case it happens that other totally unrealted words are proposed
because their average score looks lower thanks to being shorter. Here we're
introducing the notion of presence of each character so that word sequences
that contain existing sub-words are favored against the shorter ones having
nothing in common. In addition we do not distinguish being/end from a
regular delimitor anymore. That made it harder to spot inverted words.
diff --git a/src/tools.c b/src/tools.c
index 1255e74..ffd167a 100644
--- a/src/tools.c
+++ b/src/tools.c
@@ -5372,7 +5372,8 @@
 /* Update array <fp> with the fingerprint of word <word> by counting the
  * transitions between characters. <fp> is a 1024-entries array indexed as
  * 32*from+to. Positions for 'from' and 'to' are:
- *   0..25=letter, 26=digit, 27=other, 28=begin, 29=end, others unused.
+ *   1..26=letter, 27=digit, 28=other/begin/end.
+ * Row "from=0" is used to mark the character's presence. Others unused.
  */
 void update_word_fingerprint(uint8_t *fp, const char *word)
 {
@@ -5384,11 +5385,12 @@
 	for (p = word; *p; p++) {
 		c = tolower(*p);
 		switch(c) {
-		case 'a'...'z': to = c - 'a'; break;
-		case 'A'...'Z': to = tolower(c) - 'a'; break;
-		case '0'...'9': to = 26; break;
-		default: to = 27; break;
+		case 'a'...'z': to = c - 'a' + 1; break;
+		case 'A'...'Z': to = tolower(c) - 'a' + 1; break;
+		case '0'...'9': to = 27; break;
+		default:        to = 28; break;
 		}
+		fp[to] = 1;
 		fp[32 * from + to]++;
 		from = to;
 	}