This patch provides callbacks for struct net_device_ops for MTIP
L2 switch.
Signed-off-by: Lukasz Majewski <lukasz.majewski@mailbox.org>
---
Changes for v13:
- New patch - created by excluding some code from large (i.e. v12 and
earlier) MTIP driver
Changes for v14:
- Add read memory barier (rmb) before reading current descriptor
- Use proper locking primitives
Changes for v15 - v15:
- None
Changes for v16:
- Enable MTIP ports to support bridge offloading
- Use dev_err_ratelimited() instead of plain dev_err()
- Move skb storage and tx ring buffer modifications after
dma mapping code.
- Do not increase tx_errors when frames are dropped after
failed dma_mapping.
- Refactor the code for better readability
- Remove legacy call to netif_trans_update()
- Remove not needed rmb() - synchronized data read already assured by
coherent DMA allocation
- Replace spin_{un}lock() with _bh variant
Changes for v17:
- Add missing _bh() variant of spin_unlock
- Avoid reverse christmas tree in swap_buffer()
- Print error message after unlock
- Add DO_ONCE() and a separate function to print state of switch HW
- Remove dev->stats.tx_errors++
Changes for v18 - v19:
- None
Changes for v20:
- Perform data swap on SKB data only when it is copied to a separate
buffer.
- Clean up the comment
- Stop both network interfaces' TX queues when no resources for
transmission available (uDMA0 descriptors)
- Do not use fep->skb_cur and fep->tx_full
Changes for v21 - v22:
- None
Changes for v23:
- Move cancel_work_sync(&priv->tx_timeout_work); to where it is handled
- Remove dev->stats.tx_errors++; as errors are already noted in net stack
---
.../net/ethernet/freescale/mtipsw/mtipl2sw.c | 284 ++++++++++++++++++
1 file changed, 284 insertions(+)
diff --git a/drivers/net/ethernet/freescale/mtipsw/mtipl2sw.c b/drivers/net/ethernet/freescale/mtipsw/mtipl2sw.c
index d209501a99b0..4c64681602d6 100644
--- a/drivers/net/ethernet/freescale/mtipsw/mtipl2sw.c
+++ b/drivers/net/ethernet/freescale/mtipsw/mtipl2sw.c
@@ -43,6 +43,15 @@
#include "mtipl2sw.h"
+static void swap_buffer(void *bufaddr, int len)
+{
+ unsigned int *buf = bufaddr;
+ int i;
+
+ for (i = 0; i < len; i += 4, buf++)
+ swab32s(buf);
+}
+
/* Set the last buffer to wrap */
static void mtip_set_last_buf_to_wrap(struct cbd_t *bdp)
{
@@ -463,6 +472,120 @@ static void mtip_config_switch(struct switch_enet_private *fep)
fep->hwp + ESW_IMR);
}
+static netdev_tx_t mtip_start_xmit_port(struct sk_buff *skb,
+ struct net_device *dev, int port)
+{
+ struct mtip_ndev_priv *priv = netdev_priv(dev);
+ struct switch_enet_private *fep = priv->fep;
+ unsigned short status;
+ unsigned int index;
+ struct cbd_t *bdp;
+ void *bufaddr;
+
+ spin_lock_bh(&fep->hw_lock);
+
+ if (!fep->link[0] && !fep->link[1]) {
+ /* Link is down or autonegotiation is in progress. */
+ mtip_netif_stop_queues(fep);
+ spin_unlock_bh(&fep->hw_lock);
+ return NETDEV_TX_BUSY;
+ }
+
+ /* Fill in a Tx ring entry */
+ bdp = fep->cur_tx;
+ status = bdp->cbd_sc;
+
+ if (status & BD_ENET_TX_READY) {
+ /* All transmit buffers are full. Bail out. */
+ mtip_netif_stop_queues(fep);
+ spin_unlock_bh(&fep->hw_lock);
+ dev_err_ratelimited(&fep->pdev->dev, "%s: tx queue full!.\n",
+ dev->name);
+ return NETDEV_TX_BUSY;
+ }
+
+ /* Clear all of the status flags */
+ status &= ~BD_ENET_TX_STATS;
+
+ /* Set buffer length and buffer pointer */
+ bufaddr = skb->data;
+ bdp->cbd_datlen = skb->len;
+
+ index = bdp - fep->tx_bd_base;
+ /* On some FEC implementations data must be aligned on
+ * 4-byte boundaries. Use bounce buffers to copy data
+ * and get it aligned.
+ */
+ if ((unsigned long)bufaddr & MTIP_ALIGNMENT ||
+ fep->quirks & FEC_QUIRK_SWAP_FRAME) {
+ memcpy(fep->tx_bounce[index], skb->data, skb->len);
+ bufaddr = fep->tx_bounce[index];
+
+ if (fep->quirks & FEC_QUIRK_SWAP_FRAME)
+ swap_buffer(bufaddr, skb->len);
+ }
+
+ /* Push the data cache so the CPM does not get stale memory
+ * data.
+ */
+ bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, bufaddr,
+ MTIP_SWITCH_TX_FRSIZE,
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(&fep->pdev->dev, bdp->cbd_bufaddr))) {
+ dev_err(&fep->pdev->dev,
+ "Failed to map descriptor tx buffer\n");
+ dev->stats.tx_dropped++;
+ dev_kfree_skb_any(skb);
+ goto err;
+ }
+
+ /* Save skb pointer. */
+ fep->tx_skbuff[index] = skb;
+
+ /* Send it on its way. Tell FEC it's ready, interrupt when done,
+ * it's the last BD of the frame, and to put the CRC on the end.
+ */
+
+ status |= (BD_ENET_TX_READY | BD_ENET_TX_INTR | BD_ENET_TX_LAST |
+ BD_ENET_TX_TC);
+
+ /* Synchronize all descriptor writes */
+ wmb();
+ bdp->cbd_sc = status;
+
+ skb_tx_timestamp(skb);
+
+ dev->stats.tx_bytes += skb->len;
+ /* If this was the last BD in the ring,
+ * start at the beginning again.
+ */
+ if (status & BD_ENET_TX_WRAP)
+ bdp = fep->tx_bd_base;
+ else
+ bdp++;
+
+ fep->cur_tx = bdp;
+ /* When TX descriptors' ring buffer is full stop both interfaces */
+ if (fep->cur_tx == fep->dirty_tx)
+ mtip_netif_stop_queues(fep);
+
+ /* Trigger transmission start */
+ writel(MCF_ESW_TDAR_X_DES_ACTIVE, fep->hwp + ESW_TDAR);
+
+ err:
+ spin_unlock_bh(&fep->hw_lock);
+
+ return NETDEV_TX_OK;
+}
+
+static netdev_tx_t mtip_start_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct mtip_ndev_priv *priv = netdev_priv(dev);
+
+ return mtip_start_xmit_port(skb, dev, priv->portnum);
+}
+
static void mtip_configure_enet_mii(struct switch_enet_private *fep, int port)
{
struct phy_device *phydev = fep->phy_dev[port - 1];
@@ -618,6 +741,73 @@ static void mtip_switch_restart(struct net_device *dev, int duplex0,
mtip_config_switch(fep);
}
+static void mtip_print_hw_state(struct net_device *dev)
+{
+ struct mtip_ndev_priv *priv = netdev_priv(dev);
+ struct switch_enet_private *fep = priv->fep;
+ struct cbd_t *bdp;
+ bool tx_full;
+ int i;
+
+ spin_lock_bh(&fep->hw_lock);
+ tx_full = fep->dirty_tx == fep->cur_tx &&
+ mtip_netif_queues_stopped(fep);
+
+ dev_info(&dev->dev, "%s: transmit timed out.\n", dev->name);
+ dev_info(&dev->dev,
+ "Ring data: cur_tx 0x%p%s, dirty_tx 0x%p cur_rx: 0x%p\n",
+ fep->cur_tx, tx_full ? " (full)" : "", fep->dirty_tx,
+ fep->cur_rx);
+
+ bdp = fep->tx_bd_base;
+ dev_info(&dev->dev, " tx: %u buffers\n", TX_RING_SIZE);
+ for (i = 0; i < TX_RING_SIZE; i++) {
+ dev_info(&dev->dev, " 0x%p: %04x %04x %08x\n",
+ bdp, bdp->cbd_sc, bdp->cbd_datlen,
+ (int)bdp->cbd_bufaddr);
+ bdp++;
+ }
+
+ bdp = fep->rx_bd_base;
+ dev_info(&dev->dev, " rx: %lu buffers\n", RX_RING_SIZE);
+ for (i = 0 ; i < RX_RING_SIZE; i++) {
+ dev_info(&dev->dev, " 0x%p: %04x %04x %08x\n",
+ bdp, bdp->cbd_sc, bdp->cbd_datlen,
+ (int)bdp->cbd_bufaddr);
+ bdp++;
+ }
+ spin_unlock_bh(&fep->hw_lock);
+}
+
+static void mtip_timeout(struct net_device *dev, unsigned int txqueue)
+{
+ struct mtip_ndev_priv *priv = netdev_priv(dev);
+
+ DO_ONCE(mtip_print_hw_state, dev);
+
+ schedule_work(&priv->tx_timeout_work);
+}
+
+static void mtip_timeout_work(struct work_struct *work)
+{
+ struct mtip_ndev_priv *priv =
+ container_of(work, struct mtip_ndev_priv, tx_timeout_work);
+ struct switch_enet_private *fep = priv->fep;
+ struct net_device *dev = priv->dev;
+
+ rtnl_lock();
+ if (netif_device_present(dev) || netif_running(dev)) {
+ napi_disable(&fep->napi);
+ netif_tx_lock_bh(dev);
+ mtip_switch_restart(dev, fep->full_duplex[0],
+ fep->full_duplex[1]);
+ netif_tx_wake_all_queues(dev);
+ netif_tx_unlock_bh(dev);
+ napi_enable(&fep->napi);
+ }
+ rtnl_unlock();
+}
+
static irqreturn_t mtip_interrupt(int irq, void *ptr_fep)
{
struct switch_enet_private *fep = ptr_fep;
@@ -1096,6 +1286,92 @@ static int mtip_close(struct net_device *dev)
return 0;
}
+#define FEC_HASH_BITS 6 /* #bits in hash */
+static void mtip_set_multicast_list(struct net_device *dev)
+{
+ struct mtip_ndev_priv *priv = netdev_priv(dev);
+ unsigned int hash_high = 0, hash_low = 0, crc;
+ struct switch_enet_private *fep = priv->fep;
+ void __iomem *enet_addr = fep->enet_addr;
+ struct netdev_hw_addr *ha;
+ unsigned char hash;
+
+ if (priv->portnum == 2)
+ enet_addr += MCF_ESW_ENET_PORT_OFFSET;
+
+ if (dev->flags & IFF_PROMISC) {
+ /* Promisc mode is required for switch - it is
+ * already enabled during driver's probe.
+ */
+ dev_dbg(&dev->dev, "%s: IFF_PROMISC\n", __func__);
+ return;
+ }
+
+ if (dev->flags & IFF_ALLMULTI) {
+ dev_dbg(&dev->dev, "%s: IFF_ALLMULTI\n", __func__);
+
+ /* Allow all multicast addresses */
+ writel(0xFFFFFFFF, enet_addr + MCF_FEC_GRP_HASH_TABLE_HIGH);
+ writel(0xFFFFFFFF, enet_addr + MCF_FEC_GRP_HASH_TABLE_LOW);
+
+ return;
+ }
+
+ netdev_for_each_mc_addr(ha, dev) {
+ /* Calculate crc32 value of mac address */
+ crc = ether_crc_le(dev->addr_len, ha->addr);
+
+ /* Only upper 6 bits (FEC_HASH_BITS) are used
+ * which point to specific bit in the hash registers
+ */
+ hash = (crc >> (32 - FEC_HASH_BITS)) & 0x3F;
+
+ if (hash > 31)
+ hash_high |= 1 << (hash - 32);
+ else
+ hash_low |= 1 << hash;
+ }
+
+ writel(hash_high, enet_addr + MCF_FEC_GRP_HASH_TABLE_HIGH);
+ writel(hash_low, enet_addr + MCF_FEC_GRP_HASH_TABLE_LOW);
+}
+
+static int mtip_set_mac_address(struct net_device *dev, void *p)
+{
+ struct mtip_ndev_priv *priv = netdev_priv(dev);
+ struct switch_enet_private *fep = priv->fep;
+ void __iomem *enet_addr = fep->enet_addr;
+ struct sockaddr *addr = p;
+
+ if (!is_valid_ether_addr(addr->sa_data))
+ return -EADDRNOTAVAIL;
+ eth_hw_addr_set(dev, addr->sa_data);
+
+ if (priv->portnum == 2)
+ enet_addr += MCF_ESW_ENET_PORT_OFFSET;
+
+ writel(dev->dev_addr[3] | (dev->dev_addr[2] << 8) |
+ (dev->dev_addr[1] << 16) | (dev->dev_addr[0] << 24),
+ enet_addr + MCF_FEC_PALR);
+ writel((dev->dev_addr[5] << 16) | (dev->dev_addr[4] << 24),
+ enet_addr + MCF_FEC_PAUR);
+
+ return mtip_update_atable_static((unsigned char *)dev->dev_addr,
+ 7, 7, fep);
+}
+
+static int mtip_get_port_parent_id(struct net_device *ndev,
+ struct netdev_phys_item_id *ppid)
+{
+ struct mtip_ndev_priv *priv = netdev_priv(ndev);
+ struct switch_enet_private *fep = priv->fep;
+
+ ppid->id_len = sizeof(fep->mac[0]);
+ memcpy(&ppid->id, &fep->mac[0], ppid->id_len);
+
+ return 0;
+}
+
static const struct ethtool_ops mtip_ethtool_ops = {
.get_link_ksettings = phy_ethtool_get_link_ksettings,
.set_link_ksettings = phy_ethtool_set_link_ksettings,
@@ -1107,6 +1383,11 @@ static const struct ethtool_ops mtip_ethtool_ops = {
static const struct net_device_ops mtip_netdev_ops = {
.ndo_open = mtip_open,
.ndo_stop = mtip_close,
+ .ndo_start_xmit = mtip_start_xmit,
+ .ndo_set_rx_mode = mtip_set_multicast_list,
+ .ndo_tx_timeout = mtip_timeout,
+ .ndo_set_mac_address = mtip_set_mac_address,
+ .ndo_get_port_parent_id = mtip_get_port_parent_id,
};
bool mtip_is_switch_netdev_port(const struct net_device *ndev)
@@ -1165,6 +1446,7 @@ static void mtip_ndev_cleanup(struct switch_enet_private *fep)
for (i = 0; i < SWITCH_EPORT_NUMBER; i++) {
if (fep->ndev[i]) {
priv = netdev_priv(fep->ndev[i]);
+ cancel_work_sync(&priv->tx_timeout_work);
unregister_netdev(fep->ndev[i]);
free_netdev(fep->ndev[i]);
@@ -1210,6 +1492,8 @@ static int mtip_ndev_init(struct switch_enet_private *fep,
goto cleanup_created_ndev;
}
+ INIT_WORK(&priv->tx_timeout_work, mtip_timeout_work);
+
dev_dbg(&fep->ndev[i]->dev, "%s: MTIP eth L2 switch %pM\n",
fep->ndev[i]->name, fep->ndev[i]->dev_addr);
}
--
2.39.5
This is an AI-generated review of your patch. The human sending this
email has considered the AI review valid, or at least plausible.
For local reproduction steps see https://netdev-ai.bots.linux.dev/ai-local.html
---
net: mtip: Add net_device_ops functions to the L2 switch driver
This patch provides callbacks for struct net_device_ops for MTIP L2
switch.
> diff --git a/drivers/net/ethernet/freescale/mtipsw/mtipl2sw.c b/drivers/net/ethernet/freescale/mtipsw/mtipl2sw.c
> index d209501a99b0..4c64681602d6 100644
> --- a/drivers/net/ethernet/freescale/mtipsw/mtipl2sw.c
> +++ b/drivers/net/ethernet/freescale/mtipsw/mtipl2sw.c
[ ... ]
> @@ -618,6 +741,73 @@ static void mtip_switch_restart(struct net_device *dev, int duplex0,
> mtip_config_switch(fep);
> }
>
> +static void mtip_print_hw_state(struct net_device *dev)
> +{
> + struct mtip_ndev_priv *priv = netdev_priv(dev);
> + struct switch_enet_private *fep = priv->fep;
> + struct cbd_t *bdp;
> + bool tx_full;
> + int i;
> +
> + spin_lock_bh(&fep->hw_lock);
> + tx_full = fep->dirty_tx == fep->cur_tx &&
> + mtip_netif_queues_stopped(fep);
> +
> + dev_info(&dev->dev, "%s: transmit timed out.\n", dev->name);
> + dev_info(&dev->dev,
> + "Ring data: cur_tx 0x%p%s, dirty_tx 0x%p cur_rx: 0x%p\n",
> + fep->cur_tx, tx_full ? " (full)" : "", fep->dirty_tx,
> + fep->cur_rx);
> +
> + bdp = fep->tx_bd_base;
> + dev_info(&dev->dev, " tx: %u buffers\n", TX_RING_SIZE);
> + for (i = 0; i < TX_RING_SIZE; i++) {
> + dev_info(&dev->dev, " 0x%p: %04x %04x %08x\n",
> + bdp, bdp->cbd_sc, bdp->cbd_datlen,
> + (int)bdp->cbd_bufaddr);
> + bdp++;
> + }
> +
> + bdp = fep->rx_bd_base;
> + dev_info(&dev->dev, " rx: %lu buffers\n", RX_RING_SIZE);
> + for (i = 0 ; i < RX_RING_SIZE; i++) {
> + dev_info(&dev->dev, " 0x%p: %04x %04x %08x\n",
> + bdp, bdp->cbd_sc, bdp->cbd_datlen,
> + (int)bdp->cbd_bufaddr);
> + bdp++;
> + }
> + spin_unlock_bh(&fep->hw_lock);
> +}
> +
> +static void mtip_timeout(struct net_device *dev, unsigned int txqueue)
> +{
> + struct mtip_ndev_priv *priv = netdev_priv(dev);
> +
> + DO_ONCE(mtip_print_hw_state, dev);
> +
> + schedule_work(&priv->tx_timeout_work);
^^^^
This isn't a regression, but as noted in your v22 response, timeouts are
already counted by the network stack and exposed per-queue in sysfs. Does
this driver need additional counting beyond what the stack provides?
> +}
> +
> +static void mtip_timeout_work(struct work_struct *work)
> +{
> + struct mtip_ndev_priv *priv =
> + container_of(work, struct mtip_ndev_priv, tx_timeout_work);
> + struct switch_enet_private *fep = priv->fep;
> + struct net_device *dev = priv->dev;
> +
> + rtnl_lock();
> + if (netif_device_present(dev) || netif_running(dev)) {
> + napi_disable(&fep->napi);
> + netif_tx_lock_bh(dev);
> + mtip_switch_restart(dev, fep->full_duplex[0],
> + fep->full_duplex[1]);
> + netif_tx_wake_all_queues(dev);
> + netif_tx_unlock_bh(dev);
> + napi_enable(&fep->napi);
> + }
> + rtnl_unlock();
> +}
[ ... ]
> @@ -1096,6 +1286,92 @@ static int mtip_close(struct net_device *dev)
> return 0;
> }
>
> +#define FEC_HASH_BITS 6 /* #bits in hash */
> +static void mtip_set_multicast_list(struct net_device *dev)
> +{
> + struct mtip_ndev_priv *priv = netdev_priv(dev);
> + unsigned int hash_high = 0, hash_low = 0, crc;
> + struct switch_enet_private *fep = priv->fep;
> + void __iomem *enet_addr = fep->enet_addr;
> + struct netdev_hw_addr *ha;
> + unsigned char hash;
> +
> + if (priv->portnum == 2)
> + enet_addr += MCF_ESW_ENET_PORT_OFFSET;
> +
> + if (dev->flags & IFF_PROMISC) {
> + /* Promisc mode is required for switch - it is
> + * already enabled during driver's probe.
> + */
> + dev_dbg(&dev->dev, "%s: IFF_PROMISC\n", __func__);
> + return;
> + }
> +
> + if (dev->flags & IFF_ALLMULTI) {
> + dev_dbg(&dev->dev, "%s: IFF_ALLMULTI\n", __func__);
> +
> + /* Allow all multicast addresses */
> + writel(0xFFFFFFFF, enet_addr + MCF_FEC_GRP_HASH_TABLE_HIGH);
> + writel(0xFFFFFFFF, enet_addr + MCF_FEC_GRP_HASH_TABLE_LOW);
> +
> + return;
> + }
> +
> + netdev_for_each_mc_addr(ha, dev) {
> + /* Calculate crc32 value of mac address */
> + crc = ether_crc_le(dev->addr_len, ha->addr);
> +
> + /* Only upper 6 bits (FEC_HASH_BITS) are used
> + * which point to specific bit in the hash registers
> + */
> + hash = (crc >> (32 - FEC_HASH_BITS)) & 0x3F;
> +
> + if (hash > 31)
> + hash_high |= 1 << (hash - 32);
> + else
> + hash_low |= 1 << hash;
> + }
> +
> + writel(hash_high, enet_addr + MCF_FEC_GRP_HASH_TABLE_HIGH);
> + writel(hash_low, enet_addr + MCF_FEC_GRP_HASH_TABLE_LOW);
> +}
> +
> +static int mtip_set_mac_address(struct net_device *dev, void *p)
> +{
> + struct mtip_ndev_priv *priv = netdev_priv(dev);
> + struct switch_enet_private *fep = priv->fep;
> + void __iomem *enet_addr = fep->enet_addr;
> + struct sockaddr *addr = p;
> +
> + if (!is_valid_ether_addr(addr->sa_data))
> + return -EADDRNOTAVAIL;
> + eth_hw_addr_set(dev, addr->sa_data);
> +
> + if (priv->portnum == 2)
> + enet_addr += MCF_ESW_ENET_PORT_OFFSET;
> +
> + writel(dev->dev_addr[3] | (dev->dev_addr[2] << 8) |
> + (dev->dev_addr[1] << 16) | (dev->dev_addr[0] << 24),
> + enet_addr + MCF_FEC_PALR);
> + writel((dev->dev_addr[5] << 16) | (dev->dev_addr[4] << 24),
> + enet_addr + MCF_FEC_PAUR);
> +
> + return mtip_update_atable_static((unsigned char *)dev->dev_addr,
> + 7, 7, fep);
^^^^
If mtip_update_atable_static() returns an error (like -ENOSPC when the
address table is full), the device MAC address has already been updated
by eth_hw_addr_set() and the hardware registers have been written.
This leaves the system with a MAC address in dev->dev_addr and the
hardware registers that isn't in the switch's address table. Should the
MAC address change be rolled back on error, or should the address table
be updated first before committing the MAC change?
> +}
[ ... ]
> @@ -1210,6 +1492,8 @@ static int mtip_ndev_init(struct switch_enet_private *fep,
> goto cleanup_created_ndev;
> }
>
> + INIT_WORK(&priv->tx_timeout_work, mtip_timeout_work);
^^^^
Could there be a race if a TX timeout occurs between register_netdev()
(which happens a few lines above at line 1488) and INIT_WORK() here?
At line 1478, netdev_ops is set to &mtip_netdev_ops which includes
.ndo_tx_timeout = mtip_timeout. Then register_netdev() is called at line
1488, making the device active. If a TX timeout occurs before
INIT_WORK() initializes the work struct, mtip_timeout() would call
schedule_work() on an uninitialized work struct.
Should INIT_WORK() be moved before register_netdev()?
> +
> dev_dbg(&fep->ndev[i]->dev, "%s: MTIP eth L2 switch %pM\n",
> fep->ndev[i]->name, fep->ndev[i]->dev_addr);
> }
© 2016 - 2026 Red Hat, Inc.