From nobody Wed May 1 06:35:45 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass(p=none dis=none) header.from=kernel.org ARC-Seal: i=1; a=rsa-sha256; t=1685622876; cv=none; d=zohomail.com; s=zohoarc; b=YmM+yGTmqGa/zRzkcdjDfwgN6XIds46jPXhi29ZHeafSGSRFK0VxkEas7tQnXoc9XeigLiQqo21C9A/i35kObO2497oq7R7MaKUIltN2N7Vc4G6AlNAPbMAsv+6BmD16TmKRfeichp7DWbVtcQEZ2VOAlIPOHpFZTxsNwNzkXBA= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1685622876; h=Content-Transfer-Encoding:Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To; bh=gs3lJZ5X0nS1wEoi46SJBIt6fEJ+mVI5qkhB2nHvBOI=; b=PPT32ZLTGRs73laCI0oUi3d5G/d32YFJErhwsn7xCiVqZzb0o/b5fpZcfnJw9NLTwKCDg/U85LD+nBu/RLYszuBHaSENtQ2PK1xG2O2YKCoEFx72xuQFD4PF/f0haDClAXQ0V8KhLhDYgaaZoxqEZf1mO46k03B22BGVTO6HcPI= ARC-Authentication-Results: i=1; mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass header.from= (p=none dis=none) Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1685622876001872.3705861383435; Thu, 1 Jun 2023 05:34:36 -0700 (PDT) Received: from localhost ([::1] helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1q4hUu-00062q-NG; Thu, 01 Jun 2023 08:33:44 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1q4hUs-00062Q-Vh; Thu, 01 Jun 2023 08:33:43 -0400 Received: from dfw.source.kernel.org ([2604:1380:4641:c500::1]) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1q4hUr-0002aZ-C5; Thu, 01 Jun 2023 08:33:42 -0400 Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id C445F608D5; Thu, 1 Jun 2023 12:33:39 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id A3353C4339C; Thu, 1 Jun 2023 12:33:37 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1685622819; bh=ybhCCaRaMtEWM/hqlDKZlEwr5UC1ZUvXv1DqWaYk9MA=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=LdBq8ScUOELwb9t043Oo96oEIYYaBTlNLZzhG4nhfAVcb52kUYZIjTynJSAd9o46r 6HzLJI6SwsVNM7VxuQZZNWz6JMyARKEObO5CaDx5iUcoiBkOo7E9fp9NuuhvigtC0t ecdDvBCx0HGuvqwcgkXbotefMAXbpj0iQrXZfe+pXOW3uRhxQONBbClWxnLMZBrvwF 7+WQLEGPy4L9LuTHEtLoArrb+fcxHvrh4Q7WBMllFQ1OasO7IBa4gV4g+OwJ1pcr+z YhHz4hyxbOxhA73ZK6QvMuwdS3Mmmpez9Fy6Xqb/l7uOLFfQZZ+CLb4a+0TplRDXcl hm0cbzHVqlgxA== From: Ard Biesheuvel To: qemu-arm@nongnu.org Cc: qemu-devel@nongnu.org, Ard Biesheuvel , Peter Maydell , =?UTF-8?q?Alex=20Benn=C3=A9e?= , Richard Henderson , =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Subject: [PATCH 1/2] target/arm: Use x86 intrinsics to implement PMULL.P64 Date: Thu, 1 Jun 2023 14:33:31 +0200 Message-Id: <20230601123332.3297404-2-ardb@kernel.org> X-Mailer: git-send-email 2.39.2 In-Reply-To: <20230601123332.3297404-1-ardb@kernel.org> References: <20230601123332.3297404-1-ardb@kernel.org> MIME-Version: 1.0 X-Developer-Signature: v=1; a=openpgp-sha256; l=2850; i=ardb@kernel.org; h=from:subject; bh=ybhCCaRaMtEWM/hqlDKZlEwr5UC1ZUvXv1DqWaYk9MA=; b=owGbwMvMwCFmkMcZplerG8N4Wi2JIaViglixxrJ5/yNtmaMur5FTufNlysLajHyX/+emhLWl6 ig9s3/aUcrCIMbBICumyCIw+++7nacnStU6z5KFmcPKBDKEgYtTACZSEcrIsHex0Z1na8PK9BYe VgkSTCljm3zOdU/V3KMzHQPs/vCuWsPwVz43oJbD7KbbT4PySzs3nt8qfMdk/cp3zru+TLr8W+z rVnYA X-Developer-Key: i=ardb@kernel.org; a=openpgp; fpr=F43D03328115A198C90016883D200E9CA6329909 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=2604:1380:4641:c500::1; envelope-from=ardb@kernel.org; helo=dfw.source.kernel.org X-Spam_score_int: -45 X-Spam_score: -4.6 X-Spam_bar: ---- X-Spam_report: (-4.6 / 5.0 requ) BAYES_00=-1.9, DKIMWL_WL_HIGH=-0.166, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1, RCVD_IN_DNSWL_MED=-2.3, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: qemu-devel-bounces+importer=patchew.org@nongnu.org X-ZohoMail-DKIM: pass (identity @kernel.org) X-ZM-MESSAGEID: 1685622877875100001 Content-Type: text/plain; charset="utf-8" Signed-off-by: Ard Biesheuvel --- host/include/i386/host/cpuinfo.h | 1 + target/arm/tcg/vec_helper.c | 26 +++++++++++++++++++- util/cpuinfo-i386.c | 1 + 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/host/include/i386/host/cpuinfo.h b/host/include/i386/host/cpui= nfo.h index 073d0a426f31487d..cf4ced844760d28f 100644 --- a/host/include/i386/host/cpuinfo.h +++ b/host/include/i386/host/cpuinfo.h @@ -27,6 +27,7 @@ #define CPUINFO_ATOMIC_VMOVDQA (1u << 16) #define CPUINFO_ATOMIC_VMOVDQU (1u << 17) #define CPUINFO_AES (1u << 18) +#define CPUINFO_PMULL (1u << 19) =20 /* Initialized with a constructor. */ extern unsigned cpuinfo; diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c index f59d3b26eacf08f8..fb422627588439b3 100644 --- a/target/arm/tcg/vec_helper.c +++ b/target/arm/tcg/vec_helper.c @@ -25,6 +25,14 @@ #include "qemu/int128.h" #include "vec_internal.h" =20 +#ifdef __x86_64__ +#include "host/cpuinfo.h" +#include +#define TARGET_PMULL __attribute__((__target__("pclmul"))) +#else +#define TARGET_PMULL +#endif + /* * Data for expanding active predicate bits to bytes, for byte elements. * @@ -2010,12 +2018,28 @@ void HELPER(gvec_pmul_b)(void *vd, void *vn, void *= vm, uint32_t desc) * Because of the lanes are not accessed in strict columns, * this probably cannot be turned into a generic helper. */ -void HELPER(gvec_pmull_q)(void *vd, void *vn, void *vm, uint32_t desc) +void TARGET_PMULL HELPER(gvec_pmull_q)(void *vd, void *vn, void *vm, uint3= 2_t desc) { intptr_t i, j, opr_sz =3D simd_oprsz(desc); intptr_t hi =3D simd_data(desc); uint64_t *d =3D vd, *n =3D vn, *m =3D vm; =20 +#ifdef __x86_64__ + if (cpuinfo & CPUINFO_PMULL) { + switch (hi) { + case 0: + *(__m128i *)vd =3D _mm_clmulepi64_si128(*(__m128i *)vm, *(__m128i *)vn, = 0x0); + break; + case 1: + *(__m128i *)vd =3D _mm_clmulepi64_si128(*(__m128i *)vm, *(__m128i *)vn, = 0x11); + break; + default: + g_assert_not_reached(); + } + return; + } +#endif + for (i =3D 0; i < opr_sz / 8; i +=3D 2) { uint64_t nn =3D n[i + hi]; uint64_t mm =3D m[i + hi]; diff --git a/util/cpuinfo-i386.c b/util/cpuinfo-i386.c index 3043f066c0182dc8..8930e13451201a64 100644 --- a/util/cpuinfo-i386.c +++ b/util/cpuinfo-i386.c @@ -40,6 +40,7 @@ unsigned __attribute__((constructor)) cpuinfo_init(void) info |=3D (c & bit_MOVBE ? CPUINFO_MOVBE : 0); info |=3D (c & bit_POPCNT ? CPUINFO_POPCNT : 0); info |=3D (c & bit_AES ? CPUINFO_AES : 0); + info |=3D (c & bit_PCLMULQDQ ? CPUINFO_PMULL : 0); =20 /* For AVX features, we must check available and usable. */ if ((c & bit_AVX) && (c & bit_OSXSAVE)) { --=20 2.39.2 From nobody Wed May 1 06:35:45 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass(p=none dis=none) header.from=kernel.org ARC-Seal: i=1; a=rsa-sha256; t=1685622877; cv=none; d=zohomail.com; s=zohoarc; b=ACvT613JBrkocfUnQ6N+0B16G7UcfA08tlIrc7AhoDgUDXX4/57X1PTA+vEg1/YXGlSJXTJ+3PvvAAA9NmTk04Tde6JjKW4ZODKkDhBa3N2xP4icRsbMjzLXlR0c/LYyKYWoiyXgReTZrFcJW/FnPF+3BfCuvqbDdWO4Dmwx0b4= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1685622877; h=Content-Transfer-Encoding:Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To; bh=6vpx5TYtcjQOEO25BXt+lzmn41Cv9OHTZKvB4B82/c4=; b=AKV/Kq6VQ/nqXl4VJFWAoCLwS/tysNjoMU6DaxeWHxyFVg2J2eco8u2w0lP2J/zfCmSyRQCVcRpgoejd6iRbO5Baj620waJJzszxCwDuc6Wydl87/mEn0r/6tbR6yBCM0G87cJ+elHFQRUsVtNwkXbM8BH9CCRjwHG/o9idOKHI= ARC-Authentication-Results: i=1; mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass header.from= (p=none dis=none) Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1685622877790511.50730489518537; Thu, 1 Jun 2023 05:34:37 -0700 (PDT) Received: from localhost ([::1] helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1q4hUw-00064T-LV; Thu, 01 Jun 2023 08:33:46 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1q4hUu-00063T-TJ; Thu, 01 Jun 2023 08:33:44 -0400 Received: from dfw.source.kernel.org ([139.178.84.217]) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1q4hUt-0002al-8t; Thu, 01 Jun 2023 08:33:44 -0400 Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id C3E1064405; Thu, 1 Jun 2023 12:33:41 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id A2BDEC433D2; Thu, 1 Jun 2023 12:33:39 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1685622821; bh=Altb9gx/izM5ctcWAjcLXu6Yyml5zlbxuXilVs0UmFo=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=GZOMIycMpsrQMgWQub7f7W5KecPa0htilzegtPRZVV7dX86TBWYfO8GC4k+7CVL1K fytcJcC2MRz42POdqP0eYPWYGVIiLyqMPOMR+qHc29odiSWy3gdHQn4oluwz/rvOIn kLeMxPxh4FbuGMYQvsYtoQUejIl+tE8MKuP0JruPTqt43HGA3ICuh9BZ4MNSOrQkeD CwBHBLmvmljrBQhx4bOxVdZQqcwANUtCXluwIThgkzG1BefRDcYgTLZL+zZ/BrIzbh GkU/weYMmjo8zO+dD80E9lFfGj60RALZjYCzRXj1InfI7qsfxCUhbspWhkJPhJ+GIe rW2UdIRZ4dVAA== From: Ard Biesheuvel To: qemu-arm@nongnu.org Cc: qemu-devel@nongnu.org, Ard Biesheuvel , Peter Maydell , =?UTF-8?q?Alex=20Benn=C3=A9e?= , Richard Henderson , =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Subject: [PATCH 2/2] target/i386: Implement PCLMULQDQ using AArch64 PMULL instructions Date: Thu, 1 Jun 2023 14:33:32 +0200 Message-Id: <20230601123332.3297404-3-ardb@kernel.org> X-Mailer: git-send-email 2.39.2 In-Reply-To: <20230601123332.3297404-1-ardb@kernel.org> References: <20230601123332.3297404-1-ardb@kernel.org> MIME-Version: 1.0 X-Developer-Signature: v=1; a=openpgp-sha256; l=3517; i=ardb@kernel.org; h=from:subject; bh=Altb9gx/izM5ctcWAjcLXu6Yyml5zlbxuXilVs0UmFo=; b=owGbwMvMwCFmkMcZplerG8N4Wi2JIaVigvj0tBJulgUChyb9Smtwzz1c0CnMWP7nX2Hjpv+rP uZzKN/pKGVhEONgkBVTZBGY/ffdztMTpWqdZ8nCzGFlAhnCwMUpABNZUM7wh0M5+PSp2EWLTqs+ e/qF3Zs/9/aDTY9adUT3S4oKRE340cbwz2TO9Ak2e9/07FBraGhxmB+j3sbC01HyenqewiWLLsb v7AA= X-Developer-Key: i=ardb@kernel.org; a=openpgp; fpr=F43D03328115A198C90016883D200E9CA6329909 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=139.178.84.217; envelope-from=ardb@kernel.org; helo=dfw.source.kernel.org X-Spam_score_int: -72 X-Spam_score: -7.3 X-Spam_bar: ------- X-Spam_report: (-7.3 / 5.0 requ) BAYES_00=-1.9, DKIMWL_WL_HIGH=-0.166, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1, RCVD_IN_DNSWL_HI=-5, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: qemu-devel-bounces+importer=patchew.org@nongnu.org X-ZohoMail-DKIM: pass (identity @kernel.org) X-ZM-MESSAGEID: 1685622879844100005 Content-Type: text/plain; charset="utf-8" Use the AArch64 PMULL{2}.P64 instructions to implement PCLMULQDQ instead of emulating them in C code if the host supports this. This is used in the implementation of GCM, which is widely used in IPsec VPN and HTTPS. Somewhat surprising results: on my ThunderX2, enabling this on top of the AES acceleration I sent out earlier, the speedup is substantial. (1420 is a typical IPsec block size - in HTTPS, GCM operates on much larger block sizes but the kernel mode benchmarks are not the best place to measure its performance in this mode) tcrypt: testing speed of rfc4106(gcm(aes)) (rfc4106-gcm-aesni) encryption No acceleration tcrypt: test 5 (160 bit key, 1420 byte blocks): 10046 operations in 1 secon= ds (14265320 bytes) AES acceleration tcrypt: test 5 (160 bit key, 1420 byte blocks): 13970 operations in 1 secon= ds (19837400 bytes) AES + PMULL acceleration tcrypt: test 5 (160 bit key, 1420 byte blocks): 24372 operations in 1 secon= ds (34608240 bytes) Signed-off-by: Ard Biesheuvel --- host/include/aarch64/host/cpuinfo.h | 1 + target/i386/ops_sse.h | 24 ++++++++++++++++++++ util/cpuinfo-aarch64.c | 1 + 3 files changed, 26 insertions(+) diff --git a/host/include/aarch64/host/cpuinfo.h b/host/include/aarch64/hos= t/cpuinfo.h index 05feeb4f4369fc19..da268dce1390cac0 100644 --- a/host/include/aarch64/host/cpuinfo.h +++ b/host/include/aarch64/host/cpuinfo.h @@ -10,6 +10,7 @@ #define CPUINFO_LSE (1u << 1) #define CPUINFO_LSE2 (1u << 2) #define CPUINFO_AES (1u << 3) +#define CPUINFO_PMULL (1u << 4) =20 /* Initialized with a constructor. */ extern unsigned cpuinfo; diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h index db79132778efd211..d7e7bd8b733122a8 100644 --- a/target/i386/ops_sse.h +++ b/target/i386/ops_sse.h @@ -2157,6 +2157,30 @@ void glue(helper_pclmulqdq, SUFFIX)(CPUX86State *env= , Reg *d, Reg *v, Reg *s, uint64_t a, b; int i; =20 +#ifdef __aarch64__ + if (cpuinfo & CPUINFO_PMULL) { + aes_vec_t vv =3D *(aes_vec_t *)v, vs =3D *(aes_vec_t *)s; + aes_vec_t *vd =3D (aes_vec_t *)d; + + switch (ctrl & 0x11) { + case 0x1: + asm("ext %0.16b, %0.16b, %0.16b, #8":"+w"(vv)); + /* fallthrough */ + case 0x0: + asm(".arch_extension aes\n" + "pmull %0.1q, %1.1d, %2.1d":"=3Dw"(*vd):"w"(vv),"w"(vs)); + break; + case 0x10: + asm("ext %0.16b, %0.16b, %0.16b, #8":"+w"(vv)); + /* fallthrough */ + case 0x11: + asm(".arch_extension aes\n" + "pmull2 %0.1q, %1.2d, %2.2d":"=3Dw"(*vd):"w"(vv),"w"(vs)); + } + return; + } +#endif + for (i =3D 0; i < 1 << SHIFT; i +=3D 2) { a =3D v->Q(((ctrl & 1) !=3D 0) + i); b =3D s->Q(((ctrl & 16) !=3D 0) + i); diff --git a/util/cpuinfo-aarch64.c b/util/cpuinfo-aarch64.c index 769cdfeb2fc32d5e..95ec1f4adfc829b9 100644 --- a/util/cpuinfo-aarch64.c +++ b/util/cpuinfo-aarch64.c @@ -57,6 +57,7 @@ unsigned __attribute__((constructor)) cpuinfo_init(void) info |=3D (hwcap & HWCAP_ATOMICS ? CPUINFO_LSE : 0); info |=3D (hwcap & HWCAP_USCAT ? CPUINFO_LSE2 : 0); info |=3D (hwcap & HWCAP_AES ? CPUINFO_AES : 0); + info |=3D (hwcap & HWCAP_PMULL ? CPUINFO_PMULL : 0); #endif #ifdef CONFIG_DARWIN info |=3D sysctl_for_bool("hw.optional.arm.FEAT_LSE") * CPUINFO_LSE; --=20 2.39.2