drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c | 5 ++++- drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c | 5 +++-- drivers/net/ethernet/stmicro/stmmac/enh_desc.c | 1 + drivers/net/ethernet/stmicro/stmmac/norm_desc.c | 1 + drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 -- 5 files changed, 9 insertions(+), 5 deletions(-)
From: Tan En De <ende.tan@starfivetech.com>
Currently, some set_rx_owner() callbacks set interrupt-on-completion bit
in addition to OWN bit, without inserting a dma_wmb() barrier in
between. This might cause missed interrupt if the DMA sees the OWN bit
before the interrupt-on-completion bit is set.
Thus, this patch adds dma_wmb() barrier right before setting OWN bit in
each of the callbacks. Now that the responsibility of calling dma_wmb()
is delegated to the callbacks, let's simplify main driver code by
removing dma_wmb() before stmmac_set_rx_owner().
Signed-off-by: Tan En De <ende.tan@starfivetech.com>
---
v2:
- Avoid introducing a new function just to set the interrupt-on-completion
bit, as it is wasteful to do so.
- Delegate the responsibility of calling dma_wmb() from main driver code
to set_rx_owner() callbacks (i.e. let callbacks to manage the low-level
ordering/barrier rather than cluttering up the main driver code).
v1:
- https://patchwork.kernel.org/project/netdevbpf/patch/20240814092438.3129-1-ende.tan@starfivetech.com/
---
drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c | 5 ++++-
drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c | 5 +++--
drivers/net/ethernet/stmicro/stmmac/enh_desc.c | 1 +
drivers/net/ethernet/stmicro/stmmac/norm_desc.c | 1 +
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 --
5 files changed, 9 insertions(+), 5 deletions(-)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
index 1c5802e0d7f4..95aea6ad485b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
@@ -186,10 +186,13 @@ static void dwmac4_set_tx_owner(struct dma_desc *p)
static void dwmac4_set_rx_owner(struct dma_desc *p, int disable_rx_ic)
{
- p->des3 |= cpu_to_le32(RDES3_OWN | RDES3_BUFFER1_VALID_ADDR);
+ p->des3 |= cpu_to_le32(RDES3_BUFFER1_VALID_ADDR);
if (!disable_rx_ic)
p->des3 |= cpu_to_le32(RDES3_INT_ON_COMPLETION_EN);
+
+ dma_wmb();
+ p->des3 |= cpu_to_le32(RDES3_OWN);
}
static int dwmac4_get_tx_ls(struct dma_desc *p)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
index fc82862a612c..d76ae833c840 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
@@ -56,10 +56,11 @@ static void dwxgmac2_set_tx_owner(struct dma_desc *p)
static void dwxgmac2_set_rx_owner(struct dma_desc *p, int disable_rx_ic)
{
- p->des3 |= cpu_to_le32(XGMAC_RDES3_OWN);
-
if (!disable_rx_ic)
p->des3 |= cpu_to_le32(XGMAC_RDES3_IOC);
+
+ dma_wmb();
+ p->des3 |= cpu_to_le32(XGMAC_RDES3_OWN);
}
static int dwxgmac2_get_tx_ls(struct dma_desc *p)
diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
index 937b7a0466fc..9219fe69ea44 100644
--- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
@@ -289,6 +289,7 @@ static void enh_desc_set_tx_owner(struct dma_desc *p)
static void enh_desc_set_rx_owner(struct dma_desc *p, int disable_rx_ic)
{
+ dma_wmb();
p->des0 |= cpu_to_le32(RDES0_OWN);
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
index 68a7cfcb1d8f..d0b703a3346f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
@@ -155,6 +155,7 @@ static void ndesc_set_tx_owner(struct dma_desc *p)
static void ndesc_set_rx_owner(struct dma_desc *p, int disable_rx_ic)
{
+ dma_wmb();
p->des0 |= cpu_to_le32(RDES0_OWN);
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index d9fca8d1227c..859a2c4c9e5c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -4848,7 +4848,6 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue)
if (!priv->use_riwt)
use_rx_wd = false;
- dma_wmb();
stmmac_set_rx_owner(priv, p, use_rx_wd);
entry = STMMAC_GET_ENTRY(entry, priv->dma_conf.dma_rx_size);
@@ -5205,7 +5204,6 @@ static bool stmmac_rx_refill_zc(struct stmmac_priv *priv, u32 queue, u32 budget)
if (!priv->use_riwt)
use_rx_wd = false;
- dma_wmb();
stmmac_set_rx_owner(priv, rx_desc, use_rx_wd);
entry = STMMAC_GET_ENTRY(entry, priv->dma_conf.dma_rx_size);
--
2.34.1
Hi Tan
On Wed, Aug 21, 2024 at 02:03:07PM +0800, ende.tan@starfivetech.com wrote:
> From: Tan En De <ende.tan@starfivetech.com>
>
> Currently, some set_rx_owner() callbacks set interrupt-on-completion bit
> in addition to OWN bit, without inserting a dma_wmb() barrier in
> between. This might cause missed interrupt if the DMA sees the OWN bit
> before the interrupt-on-completion bit is set.
>
> Thus, this patch adds dma_wmb() barrier right before setting OWN bit in
> each of the callbacks. Now that the responsibility of calling dma_wmb()
> is delegated to the callbacks, let's simplify main driver code by
> removing dma_wmb() before stmmac_set_rx_owner().
>
> Signed-off-by: Tan En De <ende.tan@starfivetech.com>
> ---
> v2:
> - Avoid introducing a new function just to set the interrupt-on-completion
> bit, as it is wasteful to do so.
> - Delegate the responsibility of calling dma_wmb() from main driver code
> to set_rx_owner() callbacks (i.e. let callbacks to manage the low-level
> ordering/barrier rather than cluttering up the main driver code).
> v1:
> - https://patchwork.kernel.org/project/netdevbpf/patch/20240814092438.3129-1-ende.tan@starfivetech.com/
> ---
> drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c | 5 ++++-
> drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c | 5 +++--
> drivers/net/ethernet/stmicro/stmmac/enh_desc.c | 1 +
> drivers/net/ethernet/stmicro/stmmac/norm_desc.c | 1 +
> drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 --
> 5 files changed, 9 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
> index 1c5802e0d7f4..95aea6ad485b 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
> @@ -186,10 +186,13 @@ static void dwmac4_set_tx_owner(struct dma_desc *p)
>
> static void dwmac4_set_rx_owner(struct dma_desc *p, int disable_rx_ic)
> {
> - p->des3 |= cpu_to_le32(RDES3_OWN | RDES3_BUFFER1_VALID_ADDR);
> + p->des3 |= cpu_to_le32(RDES3_BUFFER1_VALID_ADDR);
>
> if (!disable_rx_ic)
> p->des3 |= cpu_to_le32(RDES3_INT_ON_COMPLETION_EN);
> +
> + dma_wmb();
> + p->des3 |= cpu_to_le32(RDES3_OWN);
> }
>
> static int dwmac4_get_tx_ls(struct dma_desc *p)
> diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
> index fc82862a612c..d76ae833c840 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
> @@ -56,10 +56,11 @@ static void dwxgmac2_set_tx_owner(struct dma_desc *p)
>
> static void dwxgmac2_set_rx_owner(struct dma_desc *p, int disable_rx_ic)
> {
> - p->des3 |= cpu_to_le32(XGMAC_RDES3_OWN);
> -
> if (!disable_rx_ic)
> p->des3 |= cpu_to_le32(XGMAC_RDES3_IOC);
> +
> + dma_wmb();
> + p->des3 |= cpu_to_le32(XGMAC_RDES3_OWN);
I am not against moving the barrier here but really I don't see a firm
reason of why you can't collect the flags in a local variable and
then flush it out to the DES3 field.
Getting back to your discussion with Andrew:
https://lore.kernel.org/netdev/06297829-0bf7-4a06-baaf-e32c39888947@lunn.ch/
you said:
> I didn't use local variable because I worry about CPU out-of-order execution.
> For example,
> ```
> local_var = (INT_ON_COMPLETION | OWN)
> des3 |= local_var
> ```
> CPU optimization might result in this
> ```
> des3 |= INT_ON_COMPLETION
> des3 |= OWN
> ```
> or worst, out of order like this
> ```
> des3 |= OWN
> des3 |= INT_ON_COMPLETION
> ```
Why do you think the CPU would split up the pre-initialized local
variable write into the two-staged write?
Anyway Andrew is right about the descriptors memory nature. It's a
coherent memory to which the access is expensive and should be
minimized as much as possible.
-Serge(y)
> }
>
> static int dwxgmac2_get_tx_ls(struct dma_desc *p)
> diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
> index 937b7a0466fc..9219fe69ea44 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
> @@ -289,6 +289,7 @@ static void enh_desc_set_tx_owner(struct dma_desc *p)
>
> static void enh_desc_set_rx_owner(struct dma_desc *p, int disable_rx_ic)
> {
> + dma_wmb();
> p->des0 |= cpu_to_le32(RDES0_OWN);
> }
>
> diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
> index 68a7cfcb1d8f..d0b703a3346f 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
> @@ -155,6 +155,7 @@ static void ndesc_set_tx_owner(struct dma_desc *p)
>
> static void ndesc_set_rx_owner(struct dma_desc *p, int disable_rx_ic)
> {
> + dma_wmb();
> p->des0 |= cpu_to_le32(RDES0_OWN);
> }
>
> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> index d9fca8d1227c..859a2c4c9e5c 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> @@ -4848,7 +4848,6 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue)
> if (!priv->use_riwt)
> use_rx_wd = false;
>
> - dma_wmb();
> stmmac_set_rx_owner(priv, p, use_rx_wd);
>
> entry = STMMAC_GET_ENTRY(entry, priv->dma_conf.dma_rx_size);
> @@ -5205,7 +5204,6 @@ static bool stmmac_rx_refill_zc(struct stmmac_priv *priv, u32 queue, u32 budget)
> if (!priv->use_riwt)
> use_rx_wd = false;
>
> - dma_wmb();
> stmmac_set_rx_owner(priv, rx_desc, use_rx_wd);
>
> entry = STMMAC_GET_ENTRY(entry, priv->dma_conf.dma_rx_size);
> --
> 2.34.1
>
>
On Wed, Aug 21, 2024 at 8:03 AM <ende.tan@starfivetech.com> wrote:
>
> From: Tan En De <ende.tan@starfivetech.com>
>
> Currently, some set_rx_owner() callbacks set interrupt-on-completion bit
> in addition to OWN bit, without inserting a dma_wmb() barrier in
> between. This might cause missed interrupt if the DMA sees the OWN bit
> before the interrupt-on-completion bit is set.
>
> Thus, this patch adds dma_wmb() barrier right before setting OWN bit in
> each of the callbacks. Now that the responsibility of calling dma_wmb()
> is delegated to the callbacks, let's simplify main driver code by
> removing dma_wmb() before stmmac_set_rx_owner().
>
> Signed-off-by: Tan En De <ende.tan@starfivetech.com>
> ---
> v2:
> - Avoid introducing a new function just to set the interrupt-on-completion
> bit, as it is wasteful to do so.
> - Delegate the responsibility of calling dma_wmb() from main driver code
> to set_rx_owner() callbacks (i.e. let callbacks to manage the low-level
> ordering/barrier rather than cluttering up the main driver code).
> v1:
> - https://patchwork.kernel.org/project/netdevbpf/patch/20240814092438.3129-1-ende.tan@starfivetech.com/
> ---
> drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c | 5 ++++-
> drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c | 5 +++--
> drivers/net/ethernet/stmicro/stmmac/enh_desc.c | 1 +
> drivers/net/ethernet/stmicro/stmmac/norm_desc.c | 1 +
> drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 --
> 5 files changed, 9 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
> index 1c5802e0d7f4..95aea6ad485b 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
> @@ -186,10 +186,13 @@ static void dwmac4_set_tx_owner(struct dma_desc *p)
>
> static void dwmac4_set_rx_owner(struct dma_desc *p, int disable_rx_ic)
> {
> - p->des3 |= cpu_to_le32(RDES3_OWN | RDES3_BUFFER1_VALID_ADDR);
> + p->des3 |= cpu_to_le32(RDES3_BUFFER1_VALID_ADDR);
>
> if (!disable_rx_ic)
> p->des3 |= cpu_to_le32(RDES3_INT_ON_COMPLETION_EN);
RDES3_INT_ON_COMPLETION_EN was possibly written after RDES3_OWN
This looks like a bug fix to me, this should target the net tree.
> +
> + dma_wmb();
> + p->des3 |= cpu_to_le32(RDES3_OWN);
> }
>
> static int dwmac4_get_tx_ls(struct dma_desc *p)
> diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
> index fc82862a612c..d76ae833c840 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
> @@ -56,10 +56,11 @@ static void dwxgmac2_set_tx_owner(struct dma_desc *p)
>
> static void dwxgmac2_set_rx_owner(struct dma_desc *p, int disable_rx_ic)
> {
> - p->des3 |= cpu_to_le32(XGMAC_RDES3_OWN);
> -
> if (!disable_rx_ic)
> p->des3 |= cpu_to_le32(XGMAC_RDES3_IOC);
Same kind of bug here.
> +
> + dma_wmb();
> + p->des3 |= cpu_to_le32(XGMAC_RDES3_OWN);
> }
© 2016 - 2026 Red Hat, Inc.