Use hardware 'crc32b' to build genksyms when support,
it shows 2x speed up than crctab32 way.
Signed-off-by: Wentao Guan <guanwentao@uniontech.com>
---
scripts/genksyms/genksyms.c | 41 +++++++++++++++++++++++++++++++++++++
1 file changed, 41 insertions(+)
diff --git a/scripts/genksyms/genksyms.c b/scripts/genksyms/genksyms.c
index 83e48670c2fcf..8a56782195593 100644
--- a/scripts/genksyms/genksyms.c
+++ b/scripts/genksyms/genksyms.c
@@ -116,13 +116,52 @@ static const uint32_t crctab32[] = {
0x2d02ef8dU
};
+/*
+ * Architecture-specific CRC32 hardware acceleration.
+ */
+static int crc32_hw_available;
+
+#ifdef __aarch64__
+#include <sys/auxv.h>
+#include <asm/hwcap.h>
+
+static void crc32_check_hw(void)
+{
+ crc32_hw_available = (getauxval(AT_HWCAP) & HWCAP_CRC32) != 0;
+}
+
+static inline uint32_t crc32_hw_byte(uint8_t c, uint32_t crc)
+{
+ asm volatile(".arch_extension crc\n\t"
+ "crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(c));
+ return crc;
+}
+
+#else
+static void crc32_check_hw(void)
+{
+ crc32_hw_available = 0;
+}
+#endif
+
static uint32_t partial_crc32_one(uint8_t c, uint32_t crc)
{
+#if defined(__aarch64__)
+ if (__builtin_expect(crc32_hw_available, 0))
+ return crc32_hw_byte(c, crc);
+#endif
return crctab32[(crc ^ c) & 0xff] ^ (crc >> 8);
}
static uint32_t partial_crc32(const char *s, uint32_t crc)
{
+#if defined(__aarch64__)
+ if (__builtin_expect(crc32_hw_available, 0)) {
+ while (*s)
+ crc = crc32_hw_byte(*s++, crc);
+ return crc;
+ }
+#endif
while (*s)
crc = partial_crc32_one(*s++, crc);
return crc;
@@ -740,6 +779,8 @@ int main(int argc, char **argv)
FILE *dumpfile = NULL, *ref_file = NULL;
int o;
+ crc32_check_hw();
+
struct option long_opts[] = {
{"debug", 0, 0, 'd'},
{"warnings", 0, 0, 'w'},
--
2.30.2