[PATCH] genksyms: Support arm64 CRC32 hardware acceleration

Wentao Guan posted 1 patch 2 weeks ago
There is a newer version of this series
scripts/genksyms/genksyms.c | 41 +++++++++++++++++++++++++++++++++++++
1 file changed, 41 insertions(+)
[PATCH] genksyms: Support arm64 CRC32 hardware acceleration
Posted by Wentao Guan 2 weeks ago
Use hardware 'crc32b' to build genksyms when support,
it shows 2x speed up than crctab32 way.

Signed-off-by: Wentao Guan <guanwentao@uniontech.com>
---
 scripts/genksyms/genksyms.c | 41 +++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/scripts/genksyms/genksyms.c b/scripts/genksyms/genksyms.c
index 83e48670c2fcf..8a56782195593 100644
--- a/scripts/genksyms/genksyms.c
+++ b/scripts/genksyms/genksyms.c
@@ -116,13 +116,52 @@ static const uint32_t crctab32[] = {
 	0x2d02ef8dU
 };
 
+/*
+ * Architecture-specific CRC32 hardware acceleration.
+ */
+static int crc32_hw_available;
+
+#ifdef __aarch64__
+#include <sys/auxv.h>
+#include <asm/hwcap.h>
+
+static void crc32_check_hw(void)
+{
+	crc32_hw_available = (getauxval(AT_HWCAP) & HWCAP_CRC32) != 0;
+}
+
+static inline uint32_t crc32_hw_byte(uint8_t c, uint32_t crc)
+{
+	asm volatile(".arch_extension crc\n\t"
+				"crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(c));
+	return crc;
+}
+
+#else
+static void crc32_check_hw(void)
+{
+	crc32_hw_available = 0;
+}
+#endif
+
 static uint32_t partial_crc32_one(uint8_t c, uint32_t crc)
 {
+#if defined(__aarch64__)
+	if (__builtin_expect(crc32_hw_available, 0))
+		return crc32_hw_byte(c, crc);
+#endif
 	return crctab32[(crc ^ c) & 0xff] ^ (crc >> 8);
 }
 
 static uint32_t partial_crc32(const char *s, uint32_t crc)
 {
+#if defined(__aarch64__)
+	if (__builtin_expect(crc32_hw_available, 0)) {
+		while (*s)
+			crc = crc32_hw_byte(*s++, crc);
+		return crc;
+	}
+#endif
 	while (*s)
 		crc = partial_crc32_one(*s++, crc);
 	return crc;
@@ -740,6 +779,8 @@ int main(int argc, char **argv)
 	FILE *dumpfile = NULL, *ref_file = NULL;
 	int o;
 
+	crc32_check_hw();
+
 	struct option long_opts[] = {
 		{"debug", 0, 0, 'd'},
 		{"warnings", 0, 0, 'w'},
-- 
2.30.2