From nobody Sun Nov 24 18:37:38 2024 Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.19]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id B745719CC2D for ; Mon, 4 Nov 2024 01:41:41 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=198.175.65.19 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730684504; cv=none; b=sbwrYkQWSA/7fvpqNObuzouBQjcsq4Y7mKtKUiNvOiN+1ka0qeGio/gJCzD+2s4GqnZp4fEWZll3hryQ5vrRE/TsktkPeetX8/TcTfP4HPiqxjDR5y0UeH3Q/bWgaErxg0YMcKyTFJbpLIZlRDmB+UGpk0UEN/xIFQP1p4+BiGY= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730684504; c=relaxed/simple; bh=TYOLs2DCehrObqbeopz4BLUsJlVmtUl/qxOpmlQ65Ng=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=h1vb9Ehja86JDoaiPaPEUA9sI41OWBZh49B/+G21mLE+z6kxuHqCAaYaoAiroxmxpE+oghNFTo/U+8UMCTbB+jDndS1QD14KhIluIANOQJLqHX3uVcqu9WzL7dLkave5K4IWvv4iXbVqngQL917hs8N8QwK4HyJA8a+l/AJ8W3Q= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.intel.com; spf=none smtp.mailfrom=linux.intel.com; dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com header.b=Xe+cdd1f; arc=none smtp.client-ip=198.175.65.19 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.intel.com Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=linux.intel.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com header.b="Xe+cdd1f" DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1730684502; x=1762220502; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=TYOLs2DCehrObqbeopz4BLUsJlVmtUl/qxOpmlQ65Ng=; b=Xe+cdd1fjhs+wwrYx+ekxm6PI9BkNuzwhupL9/9gUaZGYAkLDJjRkqFC uGj6oWoB45kmSCrje2WJqH3mkOZgv722P8ZzeF0J3DhU5VlIhOVYqQp7I LlW28tgENYDrhHVW/efGsz1n0LHvS/BDI835rnroZrnF5Uh8eSflcZcyj wFd8Y1rhK6bOkHmjwmrswhsjY2BniKbXsbJNY+M6/oR/rC+tm/WbNiejN I8hDdNutckVr6dAtwMBLkKZRdEgu6+xPPZpVLwnuLc3+0o+psnTtNaWR8 vkrqFRK0KpBF95h1Bg+38fXtuNzmlMcNLxXaFKzC+DpoLrbR2wCT+arnR A==; X-CSE-ConnectionGUID: YS2Wj2wVQJm79whFedqr2w== X-CSE-MsgGUID: UGyWk/3GSiS14XPV0des3Q== X-IronPort-AV: E=McAfee;i="6700,10204,11222"; a="30221951" X-IronPort-AV: E=Sophos;i="6.11,199,1725346800"; d="scan'208";a="30221951" Received: from fmviesa005.fm.intel.com ([10.60.135.145]) by orvoesa111.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 03 Nov 2024 17:41:41 -0800 X-CSE-ConnectionGUID: LEQtSL2bRTSke0gKusv2Mg== X-CSE-MsgGUID: 9l6K6uC/TvOPakASYUyTDA== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="6.11,256,1725346800"; d="scan'208";a="88020893" Received: from allen-sbox.sh.intel.com ([10.239.159.30]) by fmviesa005.fm.intel.com with ESMTP; 03 Nov 2024 17:41:40 -0800 From: Lu Baolu To: Joerg Roedel Cc: iommu@lists.linux.dev, linux-kernel@vger.kernel.org Subject: [PATCH 14/19] iommu/vt-d: Separate page request queue from SVM Date: Mon, 4 Nov 2024 09:40:34 +0800 Message-ID: <20241104014040.106100-15-baolu.lu@linux.intel.com> X-Mailer: git-send-email 2.43.0 In-Reply-To: <20241104014040.106100-1-baolu.lu@linux.intel.com> References: <20241104014040.106100-1-baolu.lu@linux.intel.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Joel Granados IO page faults are no longer dependent on CONFIG_INTEL_IOMMU_SVM. Move all Page Request Queue (PRQ) functions that handle prq events to a new file in drivers/iommu/intel/prq.c. The page_req_des struct is now declared in drivers/iommu/intel/prq.c. No functional changes are intended. This is a preparation patch to enable the use of IO page faults outside the SVM/PASID use cases. Signed-off-by: Joel Granados Link: https://lore.kernel.org/r/20241015-jag-iopfv8-v4-1-b696ca89ba29@kerne= l.org Signed-off-by: Lu Baolu --- drivers/iommu/intel/Makefile | 2 +- drivers/iommu/intel/iommu.c | 20 +- drivers/iommu/intel/iommu.h | 14 +- drivers/iommu/intel/prq.c | 410 +++++++++++++++++++++++++++++++++++ drivers/iommu/intel/svm.c | 397 --------------------------------- 5 files changed, 424 insertions(+), 419 deletions(-) create mode 100644 drivers/iommu/intel/prq.c diff --git a/drivers/iommu/intel/Makefile b/drivers/iommu/intel/Makefile index c8beb0281559..d3bb0798092d 100644 --- a/drivers/iommu/intel/Makefile +++ b/drivers/iommu/intel/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_DMAR_TABLE) +=3D dmar.o -obj-$(CONFIG_INTEL_IOMMU) +=3D iommu.o pasid.o nested.o cache.o +obj-$(CONFIG_INTEL_IOMMU) +=3D iommu.o pasid.o nested.o cache.o prq.o obj-$(CONFIG_DMAR_TABLE) +=3D trace.o cap_audit.o obj-$(CONFIG_DMAR_PERF) +=3D perf.o obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) +=3D debugfs.o diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index b7351d81b77d..3a20795b3f8d 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1329,12 +1329,10 @@ static void free_dmar_iommu(struct intel_iommu *iom= mu) /* free context mapping */ free_context_table(iommu); =20 -#ifdef CONFIG_INTEL_IOMMU_SVM if (pasid_supported(iommu)) { if (ecap_prs(iommu->ecap)) - intel_svm_finish_prq(iommu); + intel_iommu_finish_prq(iommu); } -#endif } =20 /* @@ -2194,19 +2192,18 @@ static int __init init_dmars(void) =20 iommu_flush_write_buffer(iommu); =20 -#ifdef CONFIG_INTEL_IOMMU_SVM if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) { /* * Call dmar_alloc_hwirq() with dmar_global_lock held, * could cause possible lock race condition. */ up_write(&dmar_global_lock); - ret =3D intel_svm_enable_prq(iommu); + ret =3D intel_iommu_enable_prq(iommu); down_write(&dmar_global_lock); if (ret) goto free_iommu; } -#endif + ret =3D dmar_set_interrupt(iommu); if (ret) goto free_iommu; @@ -2619,13 +2616,12 @@ static int intel_iommu_add(struct dmar_drhd_unit *d= maru) intel_iommu_init_qi(iommu); iommu_flush_write_buffer(iommu); =20 -#ifdef CONFIG_INTEL_IOMMU_SVM if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) { - ret =3D intel_svm_enable_prq(iommu); + ret =3D intel_iommu_enable_prq(iommu); if (ret) goto disable_iommu; } -#endif + ret =3D dmar_set_interrupt(iommu); if (ret) goto disable_iommu; @@ -4074,7 +4070,7 @@ static void intel_iommu_remove_dev_pasid(struct devic= e *dev, ioasid_t pasid, intel_iommu_debugfs_remove_dev_pasid(dev_pasid); kfree(dev_pasid); intel_pasid_tear_down_entry(iommu, dev, pasid, false); - intel_drain_pasid_prq(dev, pasid); + intel_iommu_drain_pasid_prq(dev, pasid); } =20 static int intel_iommu_set_dev_pasid(struct iommu_domain *domain, @@ -4417,9 +4413,7 @@ const struct iommu_ops intel_iommu_ops =3D { .def_domain_type =3D device_def_domain_type, .remove_dev_pasid =3D intel_iommu_remove_dev_pasid, .pgsize_bitmap =3D SZ_4K, -#ifdef CONFIG_INTEL_IOMMU_SVM - .page_response =3D intel_svm_page_response, -#endif + .page_response =3D intel_iommu_page_response, .default_domain_ops =3D &(const struct iommu_domain_ops) { .attach_dev =3D intel_iommu_attach_device, .set_dev_pasid =3D intel_iommu_set_dev_pasid, diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 4c6135a2e2f8..b3912633ce25 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -728,12 +728,10 @@ struct intel_iommu { =20 struct iommu_flush flush; #endif -#ifdef CONFIG_INTEL_IOMMU_SVM struct page_req_dsc *prq; unsigned char prq_name[16]; /* Name for PRQ interrupt */ unsigned long prq_seq_number; struct completion prq_complete; -#endif struct iopf_queue *iopf_queue; unsigned char iopfq_name[16]; /* Synchronization between fault report and iommu device release. */ @@ -1274,18 +1272,18 @@ void intel_context_flush_present(struct device_doma= in_info *info, struct context_entry *context, u16 did, bool affect_domains); =20 +int intel_iommu_enable_prq(struct intel_iommu *iommu); +int intel_iommu_finish_prq(struct intel_iommu *iommu); +void intel_iommu_page_response(struct device *dev, struct iopf_fault *evt, + struct iommu_page_response *msg); +void intel_iommu_drain_pasid_prq(struct device *dev, u32 pasid); + #ifdef CONFIG_INTEL_IOMMU_SVM void intel_svm_check(struct intel_iommu *iommu); -int intel_svm_enable_prq(struct intel_iommu *iommu); -int intel_svm_finish_prq(struct intel_iommu *iommu); -void intel_svm_page_response(struct device *dev, struct iopf_fault *evt, - struct iommu_page_response *msg); struct iommu_domain *intel_svm_domain_alloc(struct device *dev, struct mm_struct *mm); -void intel_drain_pasid_prq(struct device *dev, u32 pasid); #else static inline void intel_svm_check(struct intel_iommu *iommu) {} -static inline void intel_drain_pasid_prq(struct device *dev, u32 pasid) {} static inline struct iommu_domain *intel_svm_domain_alloc(struct device *d= ev, struct mm_struct *mm) { diff --git a/drivers/iommu/intel/prq.c b/drivers/iommu/intel/prq.c new file mode 100644 index 000000000000..edda5da8ba15 --- /dev/null +++ b/drivers/iommu/intel/prq.c @@ -0,0 +1,410 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 Intel Corporation + * + * Originally split from drivers/iommu/intel/svm.c + */ + +#include +#include + +#include "iommu.h" +#include "pasid.h" +#include "../iommu-pages.h" +#include "trace.h" + +/* Page request queue descriptor */ +struct page_req_dsc { + union { + struct { + u64 type:8; + u64 pasid_present:1; + u64 rsvd:7; + u64 rid:16; + u64 pasid:20; + u64 exe_req:1; + u64 pm_req:1; + u64 rsvd2:10; + }; + u64 qw_0; + }; + union { + struct { + u64 rd_req:1; + u64 wr_req:1; + u64 lpig:1; + u64 prg_index:9; + u64 addr:52; + }; + u64 qw_1; + }; + u64 qw_2; + u64 qw_3; +}; + +/** + * intel_iommu_drain_pasid_prq - Drain page requests and responses for a p= asid + * @dev: target device + * @pasid: pasid for draining + * + * Drain all pending page requests and responses related to @pasid in both + * software and hardware. This is supposed to be called after the device + * driver has stopped DMA, the pasid entry has been cleared, and both IOTLB + * and DevTLB have been invalidated. + * + * It waits until all pending page requests for @pasid in the page fault + * queue are completed by the prq handling thread. Then follow the steps + * described in VT-d spec CH7.10 to drain all page requests and page + * responses pending in the hardware. + */ +void intel_iommu_drain_pasid_prq(struct device *dev, u32 pasid) +{ + struct device_domain_info *info; + struct dmar_domain *domain; + struct intel_iommu *iommu; + struct qi_desc desc[3]; + struct pci_dev *pdev; + int head, tail; + u16 sid, did; + int qdep; + + info =3D dev_iommu_priv_get(dev); + if (WARN_ON(!info || !dev_is_pci(dev))) + return; + + if (!info->pri_enabled) + return; + + iommu =3D info->iommu; + domain =3D info->domain; + pdev =3D to_pci_dev(dev); + sid =3D PCI_DEVID(info->bus, info->devfn); + did =3D domain ? domain_id_iommu(domain, iommu) : FLPT_DEFAULT_DID; + + qdep =3D pci_ats_queue_depth(pdev); + + /* + * Check and wait until all pending page requests in the queue are + * handled by the prq handling thread. + */ +prq_retry: + reinit_completion(&iommu->prq_complete); + tail =3D dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; + head =3D dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; + while (head !=3D tail) { + struct page_req_dsc *req; + + req =3D &iommu->prq[head / sizeof(*req)]; + if (!req->pasid_present || req->pasid !=3D pasid) { + head =3D (head + sizeof(*req)) & PRQ_RING_MASK; + continue; + } + + wait_for_completion(&iommu->prq_complete); + goto prq_retry; + } + + iopf_queue_flush_dev(dev); + + /* + * Perform steps described in VT-d spec CH7.10 to drain page + * requests and responses in hardware. + */ + memset(desc, 0, sizeof(desc)); + desc[0].qw0 =3D QI_IWD_STATUS_DATA(QI_DONE) | + QI_IWD_FENCE | + QI_IWD_TYPE; + desc[1].qw0 =3D QI_EIOTLB_PASID(pasid) | + QI_EIOTLB_DID(did) | + QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | + QI_EIOTLB_TYPE; + desc[2].qw0 =3D QI_DEV_EIOTLB_PASID(pasid) | + QI_DEV_EIOTLB_SID(sid) | + QI_DEV_EIOTLB_QDEP(qdep) | + QI_DEIOTLB_TYPE | + QI_DEV_IOTLB_PFSID(info->pfsid); +qi_retry: + reinit_completion(&iommu->prq_complete); + qi_submit_sync(iommu, desc, 3, QI_OPT_WAIT_DRAIN); + if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) { + wait_for_completion(&iommu->prq_complete); + goto qi_retry; + } +} + +static bool is_canonical_address(u64 addr) +{ + int shift =3D 64 - (__VIRTUAL_MASK_SHIFT + 1); + long saddr =3D (long)addr; + + return (((saddr << shift) >> shift) =3D=3D saddr); +} + +static void handle_bad_prq_event(struct intel_iommu *iommu, + struct page_req_dsc *req, int result) +{ + struct qi_desc desc =3D { }; + + pr_err("%s: Invalid page request: %08llx %08llx\n", + iommu->name, ((unsigned long long *)req)[0], + ((unsigned long long *)req)[1]); + + if (!req->lpig) + return; + + desc.qw0 =3D QI_PGRP_PASID(req->pasid) | + QI_PGRP_DID(req->rid) | + QI_PGRP_PASID_P(req->pasid_present) | + QI_PGRP_RESP_CODE(result) | + QI_PGRP_RESP_TYPE; + desc.qw1 =3D QI_PGRP_IDX(req->prg_index) | + QI_PGRP_LPIG(req->lpig); + + qi_submit_sync(iommu, &desc, 1, 0); +} + +static int prq_to_iommu_prot(struct page_req_dsc *req) +{ + int prot =3D 0; + + if (req->rd_req) + prot |=3D IOMMU_FAULT_PERM_READ; + if (req->wr_req) + prot |=3D IOMMU_FAULT_PERM_WRITE; + if (req->exe_req) + prot |=3D IOMMU_FAULT_PERM_EXEC; + if (req->pm_req) + prot |=3D IOMMU_FAULT_PERM_PRIV; + + return prot; +} + +static void intel_prq_report(struct intel_iommu *iommu, struct device *dev, + struct page_req_dsc *desc) +{ + struct iopf_fault event =3D { }; + + /* Fill in event data for device specific processing */ + event.fault.type =3D IOMMU_FAULT_PAGE_REQ; + event.fault.prm.addr =3D (u64)desc->addr << VTD_PAGE_SHIFT; + event.fault.prm.pasid =3D desc->pasid; + event.fault.prm.grpid =3D desc->prg_index; + event.fault.prm.perm =3D prq_to_iommu_prot(desc); + + if (desc->lpig) + event.fault.prm.flags |=3D IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; + if (desc->pasid_present) { + event.fault.prm.flags |=3D IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; + event.fault.prm.flags |=3D IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID; + } + + iommu_report_device_fault(dev, &event); +} + +static irqreturn_t prq_event_thread(int irq, void *d) +{ + struct intel_iommu *iommu =3D d; + struct page_req_dsc *req; + int head, tail, handled; + struct device *dev; + u64 address; + + /* + * Clear PPR bit before reading head/tail registers, to ensure that + * we get a new interrupt if needed. + */ + writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG); + + tail =3D dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; + head =3D dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; + handled =3D (head !=3D tail); + while (head !=3D tail) { + req =3D &iommu->prq[head / sizeof(*req)]; + address =3D (u64)req->addr << VTD_PAGE_SHIFT; + + if (unlikely(!req->pasid_present)) { + pr_err("IOMMU: %s: Page request without PASID\n", + iommu->name); +bad_req: + handle_bad_prq_event(iommu, req, QI_RESP_INVALID); + goto prq_advance; + } + + if (unlikely(!is_canonical_address(address))) { + pr_err("IOMMU: %s: Address is not canonical\n", + iommu->name); + goto bad_req; + } + + if (unlikely(req->pm_req && (req->rd_req | req->wr_req))) { + pr_err("IOMMU: %s: Page request in Privilege Mode\n", + iommu->name); + goto bad_req; + } + + if (unlikely(req->exe_req && req->rd_req)) { + pr_err("IOMMU: %s: Execution request not supported\n", + iommu->name); + goto bad_req; + } + + /* Drop Stop Marker message. No need for a response. */ + if (unlikely(req->lpig && !req->rd_req && !req->wr_req)) + goto prq_advance; + + /* + * If prq is to be handled outside iommu driver via receiver of + * the fault notifiers, we skip the page response here. + */ + mutex_lock(&iommu->iopf_lock); + dev =3D device_rbtree_find(iommu, req->rid); + if (!dev) { + mutex_unlock(&iommu->iopf_lock); + goto bad_req; + } + + intel_prq_report(iommu, dev, req); + trace_prq_report(iommu, dev, req->qw_0, req->qw_1, + req->qw_2, req->qw_3, + iommu->prq_seq_number++); + mutex_unlock(&iommu->iopf_lock); +prq_advance: + head =3D (head + sizeof(*req)) & PRQ_RING_MASK; + } + + dmar_writeq(iommu->reg + DMAR_PQH_REG, tail); + + /* + * Clear the page request overflow bit and wake up all threads that + * are waiting for the completion of this handling. + */ + if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) { + pr_info_ratelimited("IOMMU: %s: PRQ overflow detected\n", + iommu->name); + head =3D dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; + tail =3D dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; + if (head =3D=3D tail) { + iopf_queue_discard_partial(iommu->iopf_queue); + writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG); + pr_info_ratelimited("IOMMU: %s: PRQ overflow cleared", + iommu->name); + } + } + + if (!completion_done(&iommu->prq_complete)) + complete(&iommu->prq_complete); + + return IRQ_RETVAL(handled); +} + +int intel_iommu_enable_prq(struct intel_iommu *iommu) +{ + struct iopf_queue *iopfq; + int irq, ret; + + iommu->prq =3D iommu_alloc_pages_node(iommu->node, GFP_KERNEL, PRQ_ORDER); + if (!iommu->prq) { + pr_warn("IOMMU: %s: Failed to allocate page request queue\n", + iommu->name); + return -ENOMEM; + } + + irq =3D dmar_alloc_hwirq(IOMMU_IRQ_ID_OFFSET_PRQ + iommu->seq_id, iommu->= node, iommu); + if (irq <=3D 0) { + pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n", + iommu->name); + ret =3D -EINVAL; + goto free_prq; + } + iommu->pr_irq =3D irq; + + snprintf(iommu->iopfq_name, sizeof(iommu->iopfq_name), + "dmar%d-iopfq", iommu->seq_id); + iopfq =3D iopf_queue_alloc(iommu->iopfq_name); + if (!iopfq) { + pr_err("IOMMU: %s: Failed to allocate iopf queue\n", iommu->name); + ret =3D -ENOMEM; + goto free_hwirq; + } + iommu->iopf_queue =3D iopfq; + + snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->s= eq_id); + + ret =3D request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT, + iommu->prq_name, iommu); + if (ret) { + pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n", + iommu->name); + goto free_iopfq; + } + dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL); + dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL); + dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORD= ER); + + init_completion(&iommu->prq_complete); + + return 0; + +free_iopfq: + iopf_queue_free(iommu->iopf_queue); + iommu->iopf_queue =3D NULL; +free_hwirq: + dmar_free_hwirq(irq); + iommu->pr_irq =3D 0; +free_prq: + iommu_free_pages(iommu->prq, PRQ_ORDER); + iommu->prq =3D NULL; + + return ret; +} + +int intel_iommu_finish_prq(struct intel_iommu *iommu) +{ + dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL); + dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL); + dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL); + + if (iommu->pr_irq) { + free_irq(iommu->pr_irq, iommu); + dmar_free_hwirq(iommu->pr_irq); + iommu->pr_irq =3D 0; + } + + if (iommu->iopf_queue) { + iopf_queue_free(iommu->iopf_queue); + iommu->iopf_queue =3D NULL; + } + + iommu_free_pages(iommu->prq, PRQ_ORDER); + iommu->prq =3D NULL; + + return 0; +} + +void intel_iommu_page_response(struct device *dev, struct iopf_fault *evt, + struct iommu_page_response *msg) +{ + struct device_domain_info *info =3D dev_iommu_priv_get(dev); + struct intel_iommu *iommu =3D info->iommu; + u8 bus =3D info->bus, devfn =3D info->devfn; + struct iommu_fault_page_request *prm; + struct qi_desc desc; + bool pasid_present; + bool last_page; + u16 sid; + + prm =3D &evt->fault.prm; + sid =3D PCI_DEVID(bus, devfn); + pasid_present =3D prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; + last_page =3D prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; + + desc.qw0 =3D QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) | + QI_PGRP_PASID_P(pasid_present) | + QI_PGRP_RESP_CODE(msg->code) | + QI_PGRP_RESP_TYPE; + desc.qw1 =3D QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page); + desc.qw2 =3D 0; + desc.qw3 =3D 0; + + qi_submit_sync(iommu, &desc, 1, 0); +} diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 078d1e32a24e..3cc43a958b4d 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -25,92 +25,6 @@ #include "../iommu-pages.h" #include "trace.h" =20 -static irqreturn_t prq_event_thread(int irq, void *d); - -int intel_svm_enable_prq(struct intel_iommu *iommu) -{ - struct iopf_queue *iopfq; - int irq, ret; - - iommu->prq =3D iommu_alloc_pages_node(iommu->node, GFP_KERNEL, PRQ_ORDER); - if (!iommu->prq) { - pr_warn("IOMMU: %s: Failed to allocate page request queue\n", - iommu->name); - return -ENOMEM; - } - - irq =3D dmar_alloc_hwirq(IOMMU_IRQ_ID_OFFSET_PRQ + iommu->seq_id, iommu->= node, iommu); - if (irq <=3D 0) { - pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n", - iommu->name); - ret =3D -EINVAL; - goto free_prq; - } - iommu->pr_irq =3D irq; - - snprintf(iommu->iopfq_name, sizeof(iommu->iopfq_name), - "dmar%d-iopfq", iommu->seq_id); - iopfq =3D iopf_queue_alloc(iommu->iopfq_name); - if (!iopfq) { - pr_err("IOMMU: %s: Failed to allocate iopf queue\n", iommu->name); - ret =3D -ENOMEM; - goto free_hwirq; - } - iommu->iopf_queue =3D iopfq; - - snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->s= eq_id); - - ret =3D request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT, - iommu->prq_name, iommu); - if (ret) { - pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n", - iommu->name); - goto free_iopfq; - } - dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL); - dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL); - dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORD= ER); - - init_completion(&iommu->prq_complete); - - return 0; - -free_iopfq: - iopf_queue_free(iommu->iopf_queue); - iommu->iopf_queue =3D NULL; -free_hwirq: - dmar_free_hwirq(irq); - iommu->pr_irq =3D 0; -free_prq: - iommu_free_pages(iommu->prq, PRQ_ORDER); - iommu->prq =3D NULL; - - return ret; -} - -int intel_svm_finish_prq(struct intel_iommu *iommu) -{ - dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL); - dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL); - dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL); - - if (iommu->pr_irq) { - free_irq(iommu->pr_irq, iommu); - dmar_free_hwirq(iommu->pr_irq); - iommu->pr_irq =3D 0; - } - - if (iommu->iopf_queue) { - iopf_queue_free(iommu->iopf_queue); - iommu->iopf_queue =3D NULL; - } - - iommu_free_pages(iommu->prq, PRQ_ORDER); - iommu->prq =3D NULL; - - return 0; -} - void intel_svm_check(struct intel_iommu *iommu) { if (!pasid_supported(iommu)) @@ -240,317 +154,6 @@ static int intel_svm_set_dev_pasid(struct iommu_domai= n *domain, return ret; } =20 -/* Page request queue descriptor */ -struct page_req_dsc { - union { - struct { - u64 type:8; - u64 pasid_present:1; - u64 rsvd:7; - u64 rid:16; - u64 pasid:20; - u64 exe_req:1; - u64 pm_req:1; - u64 rsvd2:10; - }; - u64 qw_0; - }; - union { - struct { - u64 rd_req:1; - u64 wr_req:1; - u64 lpig:1; - u64 prg_index:9; - u64 addr:52; - }; - u64 qw_1; - }; - u64 qw_2; - u64 qw_3; -}; - -static bool is_canonical_address(u64 addr) -{ - int shift =3D 64 - (__VIRTUAL_MASK_SHIFT + 1); - long saddr =3D (long) addr; - - return (((saddr << shift) >> shift) =3D=3D saddr); -} - -/** - * intel_drain_pasid_prq - Drain page requests and responses for a pasid - * @dev: target device - * @pasid: pasid for draining - * - * Drain all pending page requests and responses related to @pasid in both - * software and hardware. This is supposed to be called after the device - * driver has stopped DMA, the pasid entry has been cleared, and both IOTLB - * and DevTLB have been invalidated. - * - * It waits until all pending page requests for @pasid in the page fault - * queue are completed by the prq handling thread. Then follow the steps - * described in VT-d spec CH7.10 to drain all page requests and page - * responses pending in the hardware. - */ -void intel_drain_pasid_prq(struct device *dev, u32 pasid) -{ - struct device_domain_info *info; - struct dmar_domain *domain; - struct intel_iommu *iommu; - struct qi_desc desc[3]; - struct pci_dev *pdev; - int head, tail; - u16 sid, did; - int qdep; - - info =3D dev_iommu_priv_get(dev); - if (WARN_ON(!info || !dev_is_pci(dev))) - return; - - if (!info->pri_enabled) - return; - - iommu =3D info->iommu; - domain =3D info->domain; - pdev =3D to_pci_dev(dev); - sid =3D PCI_DEVID(info->bus, info->devfn); - did =3D domain ? domain_id_iommu(domain, iommu) : FLPT_DEFAULT_DID; - qdep =3D pci_ats_queue_depth(pdev); - - /* - * Check and wait until all pending page requests in the queue are - * handled by the prq handling thread. - */ -prq_retry: - reinit_completion(&iommu->prq_complete); - tail =3D dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; - head =3D dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; - while (head !=3D tail) { - struct page_req_dsc *req; - - req =3D &iommu->prq[head / sizeof(*req)]; - if (!req->pasid_present || req->pasid !=3D pasid) { - head =3D (head + sizeof(*req)) & PRQ_RING_MASK; - continue; - } - - wait_for_completion(&iommu->prq_complete); - goto prq_retry; - } - - iopf_queue_flush_dev(dev); - - /* - * Perform steps described in VT-d spec CH7.10 to drain page - * requests and responses in hardware. - */ - memset(desc, 0, sizeof(desc)); - desc[0].qw0 =3D QI_IWD_STATUS_DATA(QI_DONE) | - QI_IWD_FENCE | - QI_IWD_TYPE; - desc[1].qw0 =3D QI_EIOTLB_PASID(pasid) | - QI_EIOTLB_DID(did) | - QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | - QI_EIOTLB_TYPE; - desc[2].qw0 =3D QI_DEV_EIOTLB_PASID(pasid) | - QI_DEV_EIOTLB_SID(sid) | - QI_DEV_EIOTLB_QDEP(qdep) | - QI_DEIOTLB_TYPE | - QI_DEV_IOTLB_PFSID(info->pfsid); -qi_retry: - reinit_completion(&iommu->prq_complete); - qi_submit_sync(iommu, desc, 3, QI_OPT_WAIT_DRAIN); - if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) { - wait_for_completion(&iommu->prq_complete); - goto qi_retry; - } -} - -static int prq_to_iommu_prot(struct page_req_dsc *req) -{ - int prot =3D 0; - - if (req->rd_req) - prot |=3D IOMMU_FAULT_PERM_READ; - if (req->wr_req) - prot |=3D IOMMU_FAULT_PERM_WRITE; - if (req->exe_req) - prot |=3D IOMMU_FAULT_PERM_EXEC; - if (req->pm_req) - prot |=3D IOMMU_FAULT_PERM_PRIV; - - return prot; -} - -static void intel_svm_prq_report(struct intel_iommu *iommu, struct device = *dev, - struct page_req_dsc *desc) -{ - struct iopf_fault event =3D { }; - - /* Fill in event data for device specific processing */ - event.fault.type =3D IOMMU_FAULT_PAGE_REQ; - event.fault.prm.addr =3D (u64)desc->addr << VTD_PAGE_SHIFT; - event.fault.prm.pasid =3D desc->pasid; - event.fault.prm.grpid =3D desc->prg_index; - event.fault.prm.perm =3D prq_to_iommu_prot(desc); - - if (desc->lpig) - event.fault.prm.flags |=3D IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; - if (desc->pasid_present) { - event.fault.prm.flags |=3D IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; - event.fault.prm.flags |=3D IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID; - } - - iommu_report_device_fault(dev, &event); -} - -static void handle_bad_prq_event(struct intel_iommu *iommu, - struct page_req_dsc *req, int result) -{ - struct qi_desc desc =3D { }; - - pr_err("%s: Invalid page request: %08llx %08llx\n", - iommu->name, ((unsigned long long *)req)[0], - ((unsigned long long *)req)[1]); - - if (!req->lpig) - return; - - desc.qw0 =3D QI_PGRP_PASID(req->pasid) | - QI_PGRP_DID(req->rid) | - QI_PGRP_PASID_P(req->pasid_present) | - QI_PGRP_RESP_CODE(result) | - QI_PGRP_RESP_TYPE; - desc.qw1 =3D QI_PGRP_IDX(req->prg_index) | - QI_PGRP_LPIG(req->lpig); - - qi_submit_sync(iommu, &desc, 1, 0); -} - -static irqreturn_t prq_event_thread(int irq, void *d) -{ - struct intel_iommu *iommu =3D d; - struct page_req_dsc *req; - int head, tail, handled; - struct device *dev; - u64 address; - - /* - * Clear PPR bit before reading head/tail registers, to ensure that - * we get a new interrupt if needed. - */ - writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG); - - tail =3D dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; - head =3D dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; - handled =3D (head !=3D tail); - while (head !=3D tail) { - req =3D &iommu->prq[head / sizeof(*req)]; - address =3D (u64)req->addr << VTD_PAGE_SHIFT; - - if (unlikely(!req->pasid_present)) { - pr_err("IOMMU: %s: Page request without PASID\n", - iommu->name); -bad_req: - handle_bad_prq_event(iommu, req, QI_RESP_INVALID); - goto prq_advance; - } - - if (unlikely(!is_canonical_address(address))) { - pr_err("IOMMU: %s: Address is not canonical\n", - iommu->name); - goto bad_req; - } - - if (unlikely(req->pm_req && (req->rd_req | req->wr_req))) { - pr_err("IOMMU: %s: Page request in Privilege Mode\n", - iommu->name); - goto bad_req; - } - - if (unlikely(req->exe_req && req->rd_req)) { - pr_err("IOMMU: %s: Execution request not supported\n", - iommu->name); - goto bad_req; - } - - /* Drop Stop Marker message. No need for a response. */ - if (unlikely(req->lpig && !req->rd_req && !req->wr_req)) - goto prq_advance; - - /* - * If prq is to be handled outside iommu driver via receiver of - * the fault notifiers, we skip the page response here. - */ - mutex_lock(&iommu->iopf_lock); - dev =3D device_rbtree_find(iommu, req->rid); - if (!dev) { - mutex_unlock(&iommu->iopf_lock); - goto bad_req; - } - - intel_svm_prq_report(iommu, dev, req); - trace_prq_report(iommu, dev, req->qw_0, req->qw_1, - req->qw_2, req->qw_3, - iommu->prq_seq_number++); - mutex_unlock(&iommu->iopf_lock); -prq_advance: - head =3D (head + sizeof(*req)) & PRQ_RING_MASK; - } - - dmar_writeq(iommu->reg + DMAR_PQH_REG, tail); - - /* - * Clear the page request overflow bit and wake up all threads that - * are waiting for the completion of this handling. - */ - if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) { - pr_info_ratelimited("IOMMU: %s: PRQ overflow detected\n", - iommu->name); - head =3D dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; - tail =3D dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; - if (head =3D=3D tail) { - iopf_queue_discard_partial(iommu->iopf_queue); - writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG); - pr_info_ratelimited("IOMMU: %s: PRQ overflow cleared", - iommu->name); - } - } - - if (!completion_done(&iommu->prq_complete)) - complete(&iommu->prq_complete); - - return IRQ_RETVAL(handled); -} - -void intel_svm_page_response(struct device *dev, struct iopf_fault *evt, - struct iommu_page_response *msg) -{ - struct device_domain_info *info =3D dev_iommu_priv_get(dev); - struct intel_iommu *iommu =3D info->iommu; - u8 bus =3D info->bus, devfn =3D info->devfn; - struct iommu_fault_page_request *prm; - struct qi_desc desc; - bool pasid_present; - bool last_page; - u16 sid; - - prm =3D &evt->fault.prm; - sid =3D PCI_DEVID(bus, devfn); - pasid_present =3D prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; - last_page =3D prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; - - desc.qw0 =3D QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) | - QI_PGRP_PASID_P(pasid_present) | - QI_PGRP_RESP_CODE(msg->code) | - QI_PGRP_RESP_TYPE; - desc.qw1 =3D QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page); - desc.qw2 =3D 0; - desc.qw3 =3D 0; - - qi_submit_sync(iommu, &desc, 1, 0); -} - static void intel_svm_domain_free(struct iommu_domain *domain) { struct dmar_domain *dmar_domain =3D to_dmar_domain(domain); --=20 2.43.0