[MINOR] tools: add a fast div64_32 function

We'll need to divide 64 bits by 32 bits with new frequency counters.
Gcc does not know when it can safely do that, but the way we build
our operations let us be sure. So let's provide an optimised version
for that purpose.
diff --git a/include/common/standard.h b/include/common/standard.h
index 2366f9e..fcfb52f 100644
--- a/include/common/standard.h
+++ b/include/common/standard.h
@@ -364,6 +364,23 @@
 	return ((unsigned long long)a * b) >> 32;
 }
 
+/* gcc does not know when it can safely divide 64 bits by 32 bits. Use this
+ * function when you know for sure that the result fits in 32 bits, because
+ * it is optimal on x86 and on 64bit processors.
+ */
+static inline unsigned int div64_32(unsigned long long o1, unsigned int o2)
+{
+	unsigned int result;
+#ifdef __i386__
+	asm("divl %2"
+	    : "=a" (result)
+	    : "A"(o1), "rm"(o2));
+#else
+	result = o1 / o2;
+#endif
+	return result;
+}
+
 /* copies at most <n> characters from <src> and always terminates with '\0' */
 char *my_strndup(const char *src, int n);