This patch adds a new procedure, iaa_submit_desc_movdir64b(), that
directly calls movdir64b. The core iaa_crypto routines that submit
compress and decompress jobs now invoke iaa_submit_desc_movdir64b() in
non-irq driver modes, instead of idxd_submit_desc().
idxd_submit_desc() is called only in irq mode.
This improves latency for the most commonly used iaa_crypto usage
(i.e., async non-irq) in zswap by eliminating redundant computes
that would otherwise be incurred in idxd_submit_desc():
For a single-threaded madvise-based workload with the Silesia.tar
dataset, these are the before/after batch compression latencies for a
compress batch of 8 pages:
==================================
p50 (ns) p99 (ns)
==================================
before 5,568 6,056
after 5,472 5,848
Change -96 -208
==================================
Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
---
drivers/crypto/intel/iaa/iaa_crypto_main.c | 30 ++++++++++++++--------
1 file changed, 20 insertions(+), 10 deletions(-)
diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c
index 697e98785335..dfc67109e81e 100644
--- a/drivers/crypto/intel/iaa/iaa_crypto_main.c
+++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c
@@ -1788,6 +1788,24 @@ iaa_setup_decompress_hw_desc(struct idxd_desc *idxd_desc,
return desc;
}
+/*
+ * Call this for non-irq, non-enqcmds job submissions.
+ */
+static __always_inline void iaa_submit_desc_movdir64b(struct idxd_wq *wq,
+ struct idxd_desc *desc)
+{
+ void __iomem *portal = idxd_wq_portal_addr(wq);
+
+ /*
+ * The wmb() flushes writes to coherent DMA data before
+ * possibly triggering a DMA read. The wmb() is necessary
+ * even on UP because the recipient is a device.
+ */
+ wmb();
+
+ iosubmit_cmds512(portal, desc->hw, 1);
+}
+
static int iaa_compress(struct crypto_tfm *tfm, struct acomp_req *req,
struct idxd_wq *wq,
dma_addr_t src_addr, unsigned int slen,
@@ -1826,11 +1844,7 @@ static int iaa_compress(struct crypto_tfm *tfm, struct acomp_req *req,
ctx->mode, iaa_device->compression_modes[ctx->mode]);
if (likely(!ctx->use_irq)) {
- ret = idxd_submit_desc(wq, idxd_desc);
- if (ret) {
- dev_dbg(dev, "submit_desc failed ret=%d\n", ret);
- goto out;
- }
+ iaa_submit_desc_movdir64b(wq, idxd_desc);
/* Update stats */
update_total_comp_calls();
@@ -1918,11 +1932,7 @@ static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req,
desc = iaa_setup_decompress_hw_desc(idxd_desc, src_addr, slen, dst_addr, *dlen);
if (likely(!ctx->use_irq)) {
- ret = idxd_submit_desc(wq, idxd_desc);
- if (ret) {
- dev_dbg(dev, "submit_desc failed ret=%d\n", ret);
- goto fallback_software_decomp;
- }
+ iaa_submit_desc_movdir64b(wq, idxd_desc);
/* Update stats */
update_total_decomp_calls();
--
2.27.0