crc32: Add crc32 implementation using __builtin_aarch64_crc32b

ARMv8.0 has optional crc32 instruction for crc32 calculation. The
instruction is mandatory since ARMv8.1. The crc32 calculation is
faster using the dedicated instruction, e.g. 1.4 GHz iMX8MN gives:

  => time crc32 0x50000000 0x2000000
  time: 0.126 seconds # crc32 instruction
  time: 0.213 seconds # software crc32

Add implementation using the compiler builtin wrapper for the crc32
instruction and enable it by default, since we don't support any
platforms which do not implement this instruction.

Signed-off-by: Marek Vasut <marex@denx.de>
Cc: Simon Glass <sjg@chromium.org>
[trini: Make crc32_table guarded by CONFIG_ARM64_CRC32]
Signed-off-by: Tom Rini <trini@konsulko.com>
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index f0fd57f..95102d3 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -9,6 +9,16 @@
 	select PHYS_64BIT
 	select SYS_CACHE_SHIFT_6
 
+config ARM64_CRC32
+	bool "Enable support for CRC32 instruction"
+	depends on ARM64
+	default y
+	help
+	  ARMv8 implements dedicated crc32 instruction for crc32 calculation.
+	  This is faster than software crc32 calculation. This instruction may
+	  not be present on all ARMv8.0, but is always present on ARMv8.1 and
+	  newer.
+
 config POSITION_INDEPENDENT
 	bool "Generate position-independent pre-relocation code"
 	depends on ARM64 || CPU_V7A