MINOR: tools: implement my_flsl()

We already have my_ffsl() to find the lowest bit set in a word, and
this patch implements the search for the highest bit set in a word.
On x86 it uses the bsr instruction and on other architectures it
uses an efficient implementation.
diff --git a/include/common/standard.h b/include/common/standard.h
index 87d5f88..7536a64 100644
--- a/include/common/standard.h
+++ b/include/common/standard.h
@@ -808,7 +808,7 @@
 }
 
 /* Simple ffs implementation. It returns the position of the lowest bit set to
- * one. It is illegal to call it with a==0 (undefined result).
+ * one, starting at 1. It is illegal to call it with a==0 (undefined result).
  */
 static inline unsigned int my_ffsl(unsigned long a)
 {
@@ -851,6 +851,50 @@
 	return cnt;
 }
 
+/* Simple fls implementation. It returns the position of the highest bit set to
+ * one, starting at 1. It is illegal to call it with a==0 (undefined result).
+ */
+static inline unsigned int my_flsl(unsigned long a)
+{
+	unsigned long cnt;
+
+#if defined(__x86_64__)
+	__asm__("bsr %1,%0\n" : "=r" (cnt) : "rm" (a));
+	cnt++;
+#else
+
+	cnt = 1;
+#if LONG_MAX > 0x7FFFFFFFUL /* 64bits */
+	if (a & 0xFFFFFFFF00000000UL) {
+		a >>= 32;
+		cnt += 32;
+	}
+#endif
+	if (a & 0XFFFF0000U) {
+		a >>= 16;
+		cnt += 16;
+	}
+	if (a & 0XFF00) {
+		a >>= 8;
+		cnt += 8;
+	}
+	if (a & 0xf0) {
+		a >>= 4;
+		cnt += 4;
+	}
+	if (a & 0xc) {
+		a >>= 2;
+		cnt += 2;
+	}
+	if (a & 0x2) {
+		a >>= 1;
+		cnt += 1;
+	}
+#endif /* x86_64 */
+
+	return cnt;
+}
+
 /* Build a word with the <bits> lower bits set (reverse of my_popcountl) */
 static inline unsigned long nbits(int bits)
 {