From nobody Mon Nov 25 20:38:29 2024 Received: from szxga05-in.huawei.com (szxga05-in.huawei.com [45.249.212.191]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 58AA81D27A4; Fri, 25 Oct 2024 09:36:24 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=45.249.212.191 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729848987; cv=none; b=jY72zUIiXkqRGT0hoHn7gmD0n58JgrlSJk0fd53Ei93++cFQz0IJIn+VVYxBYjjvUVloB3Ad/6q6h2ZDWOLoBJ8IaR8F/ujpOEm1+N5puTZuXiESA0HkvifjI4pBZq7zfcuetUvJzJnVCbh2qrp/jGhW/MdhK9crXg6CXGa/O9k= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729848987; c=relaxed/simple; bh=vqyAYGKGo1O3by185swqyg05OnYozT8QSJOu/jtU96Y=; h=From:To:CC:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=bxb6qjQ+712407jS/lQTCrpPDg275xNT24K0cku/AEdoRb3dWG7lsDWXmz0Q5U/XktCdj5aW1G7Aq1LCPkLcrsmoTvpvlvNggdwHsC8zGdp82aUNho77kfTC9AxcUVpB6TWQCVRqmoUA6geBk+YEdRUuVLArBsz2pi6BZEcwrEw= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=quarantine dis=none) header.from=huawei.com; spf=pass smtp.mailfrom=huawei.com; arc=none smtp.client-ip=45.249.212.191 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=quarantine dis=none) header.from=huawei.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=huawei.com Received: from mail.maildlp.com (unknown [172.19.163.17]) by szxga05-in.huawei.com (SkyGuard) with ESMTP id 4XZd0v4Rg9z1jvr4; Fri, 25 Oct 2024 17:34:55 +0800 (CST) Received: from kwepemm000007.china.huawei.com (unknown [7.193.23.189]) by mail.maildlp.com (Postfix) with ESMTPS id 1B3E81A0188; Fri, 25 Oct 2024 17:36:21 +0800 (CST) Received: from localhost.localdomain (10.90.30.45) by kwepemm000007.china.huawei.com (7.193.23.189) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.1.2507.39; Fri, 25 Oct 2024 17:36:20 +0800 From: Jijie Shao To: , , , , , CC: , , , , , , , Subject: [PATCH V3 net 1/9] net: hns3: default enable tx bounce buffer when smmu enabled Date: Fri, 25 Oct 2024 17:29:30 +0800 Message-ID: <20241025092938.2912958-2-shaojijie@huawei.com> X-Mailer: git-send-email 2.30.0 In-Reply-To: <20241025092938.2912958-1-shaojijie@huawei.com> References: <20241025092938.2912958-1-shaojijie@huawei.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-ClientProxiedBy: dggems702-chm.china.huawei.com (10.3.19.179) To kwepemm000007.china.huawei.com (7.193.23.189) Content-Type: text/plain; charset="utf-8" From: Peiyang Wang The SMMU engine on HIP09 chip has a hardware issue. SMMU pagetable prefetch features may prefetch and use a invalid PTE even the PTE is valid at that time. This will cause the device trigger fake pagefaults. The solution is to avoid prefetching by adding a SYNC command when smmu mapping a iova. But the performance of nic has a sharp drop. Then we do this workaround, always enable tx bounce buffer, avoid mapping/unmapping on TX path. This issue only affects HNS3, so we always enable tx bounce buffer when smmu enabled to improve performance. Fixes: 295ba232a8c3 ("net: hns3: add device version to replace pci revision= ") Signed-off-by: Peiyang Wang Signed-off-by: Jian Shen Signed-off-by: Jijie Shao --- .../net/ethernet/hisilicon/hns3/hns3_enet.c | 31 +++++++++++++++++ .../net/ethernet/hisilicon/hns3/hns3_enet.h | 2 ++ .../ethernet/hisilicon/hns3/hns3_ethtool.c | 33 +++++++++++++++++++ 3 files changed, 66 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/= ethernet/hisilicon/hns3/hns3_enet.c index 4cbc4d069a1f..ac88e301f221 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -1032,6 +1033,8 @@ static bool hns3_can_use_tx_sgl(struct hns3_enet_ring= *ring, static void hns3_init_tx_spare_buffer(struct hns3_enet_ring *ring) { u32 alloc_size =3D ring->tqp->handle->kinfo.tx_spare_buf_size; + struct net_device *netdev =3D ring_to_netdev(ring); + struct hns3_nic_priv *priv =3D netdev_priv(netdev); struct hns3_tx_spare *tx_spare; struct page *page; dma_addr_t dma; @@ -1073,6 +1076,7 @@ static void hns3_init_tx_spare_buffer(struct hns3_ene= t_ring *ring) tx_spare->buf =3D page_address(page); tx_spare->len =3D PAGE_SIZE << order; ring->tx_spare =3D tx_spare; + ring->tx_copybreak =3D priv->tx_copybreak; return; =20 dma_mapping_error: @@ -4868,6 +4872,30 @@ static void hns3_nic_dealloc_vector_data(struct hns3= _nic_priv *priv) devm_kfree(&pdev->dev, priv->tqp_vector); } =20 +static void hns3_update_tx_spare_buf_config(struct hns3_nic_priv *priv) +{ +#define HNS3_MIN_SPARE_BUF_SIZE (2 * 1024 * 1024) +#define HNS3_MAX_PACKET_SIZE (64 * 1024) + + struct iommu_domain *domain =3D iommu_get_domain_for_dev(priv->dev); + struct hnae3_ae_dev *ae_dev =3D hns3_get_ae_dev(priv->ae_handle); + struct hnae3_handle *handle =3D priv->ae_handle; + + if (ae_dev->dev_version < HNAE3_DEVICE_VERSION_V3) + return; + + if (!(domain && iommu_is_dma_domain(domain))) + return; + + priv->min_tx_copybreak =3D HNS3_MAX_PACKET_SIZE; + priv->min_tx_spare_buf_size =3D HNS3_MIN_SPARE_BUF_SIZE; + + if (priv->tx_copybreak < priv->min_tx_copybreak) + priv->tx_copybreak =3D priv->min_tx_copybreak; + if (handle->kinfo.tx_spare_buf_size < priv->min_tx_spare_buf_size) + handle->kinfo.tx_spare_buf_size =3D priv->min_tx_spare_buf_size; +} + static void hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv = *priv, unsigned int ring_type) { @@ -5101,6 +5129,7 @@ int hns3_init_all_ring(struct hns3_nic_priv *priv) int i, j; int ret; =20 + hns3_update_tx_spare_buf_config(priv); for (i =3D 0; i < ring_num; i++) { ret =3D hns3_alloc_ring_memory(&priv->ring[i]); if (ret) { @@ -5305,6 +5334,8 @@ static int hns3_client_init(struct hnae3_handle *hand= le) priv->ae_handle =3D handle; priv->tx_timeout_count =3D 0; priv->max_non_tso_bd_num =3D ae_dev->dev_specs.max_non_tso_bd_num; + priv->min_tx_copybreak =3D 0; + priv->min_tx_spare_buf_size =3D 0; set_bit(HNS3_NIC_STATE_DOWN, &priv->state); =20 handle->msg_enable =3D netif_msg_init(debug, DEFAULT_MSG_LEVEL); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/= ethernet/hisilicon/hns3/hns3_enet.h index d36c4ed16d8d..caf7a4df8585 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h @@ -596,6 +596,8 @@ struct hns3_nic_priv { struct hns3_enet_coalesce rx_coal; u32 tx_copybreak; u32 rx_copybreak; + u32 min_tx_copybreak; + u32 min_tx_spare_buf_size; }; =20 union l3_hdr_info { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/n= et/ethernet/hisilicon/hns3/hns3_ethtool.c index b1e988347347..97eaeec1952b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -1933,6 +1933,31 @@ static int hns3_set_tx_spare_buf_size(struct net_dev= ice *netdev, return ret; } =20 +static int hns3_check_tx_copybreak(struct net_device *netdev, u32 copybrea= k) +{ + struct hns3_nic_priv *priv =3D netdev_priv(netdev); + + if (copybreak < priv->min_tx_copybreak) { + netdev_err(netdev, "tx copybreak %u should be no less than %u!\n", + copybreak, priv->min_tx_copybreak); + return -EINVAL; + } + return 0; +} + +static int hns3_check_tx_spare_buf_size(struct net_device *netdev, u32 buf= _size) +{ + struct hns3_nic_priv *priv =3D netdev_priv(netdev); + + if (buf_size < priv->min_tx_spare_buf_size) { + netdev_err(netdev, + "tx spare buf size %u should be no less than %u!\n", + buf_size, priv->min_tx_spare_buf_size); + return -EINVAL; + } + return 0; +} + static int hns3_set_tunable(struct net_device *netdev, const struct ethtool_tunable *tuna, const void *data) @@ -1949,6 +1974,10 @@ static int hns3_set_tunable(struct net_device *netde= v, =20 switch (tuna->id) { case ETHTOOL_TX_COPYBREAK: + ret =3D hns3_check_tx_copybreak(netdev, *(u32 *)data); + if (ret) + return ret; + priv->tx_copybreak =3D *(u32 *)data; =20 for (i =3D 0; i < h->kinfo.num_tqps; i++) @@ -1963,6 +1992,10 @@ static int hns3_set_tunable(struct net_device *netde= v, =20 break; case ETHTOOL_TX_COPYBREAK_BUF_SIZE: + ret =3D hns3_check_tx_spare_buf_size(netdev, *(u32 *)data); + if (ret) + return ret; + old_tx_spare_buf_size =3D h->kinfo.tx_spare_buf_size; new_tx_spare_buf_size =3D *(u32 *)data; netdev_info(netdev, "request to set tx spare buf size from %u to %u\n", --=20 2.33.0