The SMMU engine on HIP09 chip has a hardware issue.
SMMU pagetable prefetch features may prefetch and use a invalid PTE
even the PTE is valid at that time. This will cause the device trigger
fake pagefaults. The solution is to avoid prefetching by adding a
SYNC command when smmu mapping a iova. But the performance of nic has a
sharp drop. Then we do this workaround, always enable tx bounce buffer,
avoid mapping/unmapping on TX path.
This issue only affects HNS3, so we always enable
tx bounce buffer when smmu enabled to improve performance.
Fixes: 295ba232a8c3 ("net: hns3: add device version to replace pci revision")
Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Jijie Shao <shaojijie@huawei.com>
---
.../net/ethernet/hisilicon/hns3/hns3_enet.c | 31 +++++++++++++++++
.../net/ethernet/hisilicon/hns3/hns3_enet.h | 2 ++
.../ethernet/hisilicon/hns3/hns3_ethtool.c | 33 +++++++++++++++++++
3 files changed, 66 insertions(+)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 49fcee7a6d0f..b028ca9a67a5 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -11,6 +11,7 @@
#include <linux/irq.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
+#include <linux/iommu.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/skbuff.h>
@@ -1039,6 +1040,8 @@ static bool hns3_can_use_tx_sgl(struct hns3_enet_ring *ring,
static void hns3_init_tx_spare_buffer(struct hns3_enet_ring *ring)
{
u32 alloc_size = ring->tqp->handle->kinfo.tx_spare_buf_size;
+ struct net_device *netdev = ring_to_netdev(ring);
+ struct hns3_nic_priv *priv = netdev_priv(netdev);
struct hns3_tx_spare *tx_spare;
struct page *page;
dma_addr_t dma;
@@ -1080,6 +1083,7 @@ static void hns3_init_tx_spare_buffer(struct hns3_enet_ring *ring)
tx_spare->buf = page_address(page);
tx_spare->len = PAGE_SIZE << order;
ring->tx_spare = tx_spare;
+ ring->tx_copybreak = priv->tx_copybreak;
return;
dma_mapping_error:
@@ -4874,6 +4878,30 @@ static void hns3_nic_dealloc_vector_data(struct hns3_nic_priv *priv)
devm_kfree(&pdev->dev, priv->tqp_vector);
}
+static void hns3_update_tx_spare_buf_config(struct hns3_nic_priv *priv)
+{
+#define HNS3_MIN_SPARE_BUF_SIZE (2 * 1024 * 1024)
+#define HNS3_MAX_PACKET_SIZE (64 * 1024)
+
+ struct iommu_domain *domain = iommu_get_domain_for_dev(priv->dev);
+ struct hnae3_ae_dev *ae_dev = hns3_get_ae_dev(priv->ae_handle);
+ struct hnae3_handle *handle = priv->ae_handle;
+
+ if (ae_dev->dev_version < HNAE3_DEVICE_VERSION_V3)
+ return;
+
+ if (!(domain && iommu_is_dma_domain(domain)))
+ return;
+
+ priv->min_tx_copybreak = HNS3_MAX_PACKET_SIZE;
+ priv->min_tx_spare_buf_size = HNS3_MIN_SPARE_BUF_SIZE;
+
+ if (priv->tx_copybreak < priv->min_tx_copybreak)
+ priv->tx_copybreak = priv->min_tx_copybreak;
+ if (handle->kinfo.tx_spare_buf_size < priv->min_tx_spare_buf_size)
+ handle->kinfo.tx_spare_buf_size = priv->min_tx_spare_buf_size;
+}
+
static void hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv,
unsigned int ring_type)
{
@@ -5107,6 +5135,7 @@ int hns3_init_all_ring(struct hns3_nic_priv *priv)
int i, j;
int ret;
+ hns3_update_tx_spare_buf_config(priv);
for (i = 0; i < ring_num; i++) {
ret = hns3_alloc_ring_memory(&priv->ring[i]);
if (ret) {
@@ -5311,6 +5340,8 @@ static int hns3_client_init(struct hnae3_handle *handle)
priv->ae_handle = handle;
priv->tx_timeout_count = 0;
priv->max_non_tso_bd_num = ae_dev->dev_specs.max_non_tso_bd_num;
+ priv->min_tx_copybreak = 0;
+ priv->min_tx_spare_buf_size = 0;
set_bit(HNS3_NIC_STATE_DOWN, &priv->state);
handle->msg_enable = netif_msg_init(debug, DEFAULT_MSG_LEVEL);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index d3bad5d1b888..933e3527ed82 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -596,6 +596,8 @@ struct hns3_nic_priv {
struct hns3_enet_coalesce rx_coal;
u32 tx_copybreak;
u32 rx_copybreak;
+ u32 min_tx_copybreak;
+ u32 min_tx_spare_buf_size;
};
union l3_hdr_info {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index d5454e126c85..a752d0e3db3a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -1927,6 +1927,31 @@ static int hns3_set_tx_spare_buf_size(struct net_device *netdev,
return ret;
}
+static int hns3_check_tx_copybreak(struct net_device *netdev, u32 copybreak)
+{
+ struct hns3_nic_priv *priv = netdev_priv(netdev);
+
+ if (copybreak < priv->min_tx_copybreak) {
+ netdev_err(netdev, "tx copybreak %u should be no less than %u!\n",
+ copybreak, priv->min_tx_copybreak);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int hns3_check_tx_spare_buf_size(struct net_device *netdev, u32 buf_size)
+{
+ struct hns3_nic_priv *priv = netdev_priv(netdev);
+
+ if (buf_size < priv->min_tx_spare_buf_size) {
+ netdev_err(netdev,
+ "tx spare buf size %u should be no less than %u!\n",
+ buf_size, priv->min_tx_spare_buf_size);
+ return -EINVAL;
+ }
+ return 0;
+}
+
static int hns3_set_tunable(struct net_device *netdev,
const struct ethtool_tunable *tuna,
const void *data)
@@ -1943,6 +1968,10 @@ static int hns3_set_tunable(struct net_device *netdev,
switch (tuna->id) {
case ETHTOOL_TX_COPYBREAK:
+ ret = hns3_check_tx_copybreak(netdev, *(u32 *)data);
+ if (ret)
+ return ret;
+
priv->tx_copybreak = *(u32 *)data;
for (i = 0; i < h->kinfo.num_tqps; i++)
@@ -1957,6 +1986,10 @@ static int hns3_set_tunable(struct net_device *netdev,
break;
case ETHTOOL_TX_COPYBREAK_BUF_SIZE:
+ ret = hns3_check_tx_spare_buf_size(netdev, *(u32 *)data);
+ if (ret)
+ return ret;
+
old_tx_spare_buf_size = h->kinfo.tx_spare_buf_size;
new_tx_spare_buf_size = *(u32 *)data;
netdev_info(netdev, "request to set tx spare buf size from %u to %u\n",
--
2.33.0
On Wed, Jul 02, 2025 at 09:09:01PM +0800, Jijie Shao wrote: > The SMMU engine on HIP09 chip has a hardware issue. > SMMU pagetable prefetch features may prefetch and use a invalid PTE > even the PTE is valid at that time. This will cause the device trigger > fake pagefaults. The solution is to avoid prefetching by adding a > SYNC command when smmu mapping a iova. But the performance of nic has a > sharp drop. Then we do this workaround, always enable tx bounce buffer, > avoid mapping/unmapping on TX path. > > This issue only affects HNS3, so we always enable > tx bounce buffer when smmu enabled to improve performance. > > Fixes: 295ba232a8c3 ("net: hns3: add device version to replace pci revision") The cited commit may be a pre-requisite for this patch to check HNAE3_DEVICE_VERSION_V3. But it seems to me that the problem being addressed existed before the cited commit. If so, I think a different Fixes tag is appropriate. > Signed-off-by: Jian Shen <shenjian15@huawei.com> > Signed-off-by: Jijie Shao <shaojijie@huawei.com> > --- > .../net/ethernet/hisilicon/hns3/hns3_enet.c | 31 +++++++++++++++++ > .../net/ethernet/hisilicon/hns3/hns3_enet.h | 2 ++ > .../ethernet/hisilicon/hns3/hns3_ethtool.c | 33 +++++++++++++++++++ It seems to me that the hns3_ethtool.c changes a) are not a requirement for the work-around introduced by this patch and b) may introduce complex behaviour between the effect of ethtool copybreak settings and the enablement/non-enablement of the work-around. Would it be possible to make a more minimal fix for net, that omits the ethtool changes. And then follow-up with them for net-next once the fix is present there? -- pw-bot: cr
on 2025/7/5 0:31, Simon Horman wrote: > On Wed, Jul 02, 2025 at 09:09:01PM +0800, Jijie Shao wrote: >> The SMMU engine on HIP09 chip has a hardware issue. >> SMMU pagetable prefetch features may prefetch and use a invalid PTE >> even the PTE is valid at that time. This will cause the device trigger >> fake pagefaults. The solution is to avoid prefetching by adding a >> SYNC command when smmu mapping a iova. But the performance of nic has a >> sharp drop. Then we do this workaround, always enable tx bounce buffer, >> avoid mapping/unmapping on TX path. >> >> This issue only affects HNS3, so we always enable >> tx bounce buffer when smmu enabled to improve performance. >> >> Fixes: 295ba232a8c3 ("net: hns3: add device version to replace pci revision") > The cited commit may be a pre-requisite for this patch > to check HNAE3_DEVICE_VERSION_V3. But it seems to me that the problem > being addressed existed before the cited commit. If so, I think a different > Fixes tag is appropriate. > >> Signed-off-by: Jian Shen <shenjian15@huawei.com> >> Signed-off-by: Jijie Shao <shaojijie@huawei.com> >> --- >> .../net/ethernet/hisilicon/hns3/hns3_enet.c | 31 +++++++++++++++++ >> .../net/ethernet/hisilicon/hns3/hns3_enet.h | 2 ++ >> .../ethernet/hisilicon/hns3/hns3_ethtool.c | 33 +++++++++++++++++++ > It seems to me that the hns3_ethtool.c changes a) are not a requirement > for the work-around introduced by this patch and b) may introduce > complex behaviour between the effect of ethtool copybreak settings > and the enablement/non-enablement of the work-around. > > Would it be possible to make a more minimal fix for net, that > omits the ethtool changes. And then follow-up with them for net-next > once the fix is present there? I will discuss your suggestion with my team. Thanks Jijie Shao
On Tue, Jul 08, 2025 at 05:41:36PM +0800, Jijie Shao wrote: > > on 2025/7/5 0:31, Simon Horman wrote: > > On Wed, Jul 02, 2025 at 09:09:01PM +0800, Jijie Shao wrote: > > > The SMMU engine on HIP09 chip has a hardware issue. > > > SMMU pagetable prefetch features may prefetch and use a invalid PTE > > > even the PTE is valid at that time. This will cause the device trigger > > > fake pagefaults. The solution is to avoid prefetching by adding a > > > SYNC command when smmu mapping a iova. But the performance of nic has a > > > sharp drop. Then we do this workaround, always enable tx bounce buffer, > > > avoid mapping/unmapping on TX path. > > > > > > This issue only affects HNS3, so we always enable > > > tx bounce buffer when smmu enabled to improve performance. > > > > > > Fixes: 295ba232a8c3 ("net: hns3: add device version to replace pci revision") > > The cited commit may be a pre-requisite for this patch > > to check HNAE3_DEVICE_VERSION_V3. But it seems to me that the problem > > being addressed existed before the cited commit. If so, I think a different > > Fixes tag is appropriate. > > > > > Signed-off-by: Jian Shen <shenjian15@huawei.com> > > > Signed-off-by: Jijie Shao <shaojijie@huawei.com> > > > --- > > > .../net/ethernet/hisilicon/hns3/hns3_enet.c | 31 +++++++++++++++++ > > > .../net/ethernet/hisilicon/hns3/hns3_enet.h | 2 ++ > > > .../ethernet/hisilicon/hns3/hns3_ethtool.c | 33 +++++++++++++++++++ > > It seems to me that the hns3_ethtool.c changes a) are not a requirement > > for the work-around introduced by this patch and b) may introduce > > complex behaviour between the effect of ethtool copybreak settings > > and the enablement/non-enablement of the work-around. > > > > Would it be possible to make a more minimal fix for net, that > > omits the ethtool changes. And then follow-up with them for net-next > > once the fix is present there? > > I will discuss your suggestion with my team. Thanks, much appreciated.
© 2016 - 2025 Red Hat, Inc.