This patch provides mtip_switch_tx and mtip_switch_rx functions
code for MTIP L2 switch.
Signed-off-by: Lukasz Majewski <lukasz.majewski@mailbox.org>
---
Changes for v13:
- New patch - created by excluding some code from large (i.e. v12 and
earlier) MTIP driver
Changes for v14:
- Rewrite RX error handling code
- Remove } else { from if (unlikely(!skb)) { condition in mtip_switch_rx()
- Remove locking from RX patch (done under NAPI API and similar to fec_main.c
driver)
- Use net_prefetch() instead of prefetch()
Changes for v15:
- Use page_address() instead of __va()
- Remove the check if data is NOT null, as it cannot be (those values are
assured to be allocated earlier for RX path).
Changes for v16:
- Disable RX interrupt when in switch RX function
- Set offload_fwd_mark when L2 offloading is enabled (fix broadcast flooding)
- Replace spin_{un}lock() with _bh variant
Changes for v17 - v18:
- None
Changes for v19:
- Pass the page with data to upper part of the network stack
- Use new page from page pool for new transfer
- Remove extra copy of the data
Changes for v20:
- Use dev_err_ratelimited() to not spam console
- Replace dev_consume_skb_irq() with dev_consume_skb_any()
- Use skb->dev to assign it to tx packet device (avoid assigning to
napi->dev)
- Remove the need to export the port information
- Do not use fep->skb_dirty (calculate proper 'index' instead)
- Use information about stopped queues to determine if driver can accept
further the packets for TX
Changes for v21 - v23:
- None
---
.../net/ethernet/freescale/mtipsw/mtipl2sw.c | 257 +++++++++++++++++-
1 file changed, 256 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/freescale/mtipsw/mtipl2sw.c b/drivers/net/ethernet/freescale/mtipsw/mtipl2sw.c
index 4c64681602d6..61a624f3df2b 100644
--- a/drivers/net/ethernet/freescale/mtipsw/mtipl2sw.c
+++ b/drivers/net/ethernet/freescale/mtipsw/mtipl2sw.c
@@ -246,6 +246,39 @@ struct mtip_port_info *mtip_portinfofifo_read(struct switch_enet_private *fep)
return info;
}
+static void mtip_atable_get_entry_port_number(struct switch_enet_private *fep,
+ unsigned char *mac_addr, u8 *port)
+{
+ int block_index, block_index_end, entry;
+ u32 mac_addr_lo, mac_addr_hi;
+ u32 read_lo, read_hi;
+
+ mac_addr_lo = (u32)((mac_addr[3] << 24) | (mac_addr[2] << 16) |
+ (mac_addr[1] << 8) | mac_addr[0]);
+ mac_addr_hi = (u32)((mac_addr[5] << 8) | (mac_addr[4]));
+
+ block_index = GET_BLOCK_PTR(crc8_calc(mac_addr));
+ block_index_end = block_index + ATABLE_ENTRY_PER_SLOT;
+
+ /* now search all the entries in the selected block */
+ for (entry = block_index; entry < block_index_end; entry++) {
+ mtip_read_atable(fep, entry, &read_lo, &read_hi);
+ *port = MTIP_PORT_FORWARDING_INIT;
+
+ if (read_lo == mac_addr_lo &&
+ ((read_hi & 0x0000FFFF) ==
+ (mac_addr_hi & 0x0000FFFF))) {
+ /* found the correct address */
+ if ((read_hi & (1 << 16)) && (!(read_hi & (1 << 17))))
+ *port = FIELD_GET(AT_PORT_MASK, read_hi);
+ break;
+ }
+ }
+
+ dev_dbg(&fep->pdev->dev, "%s: MAC: %pM PORT: 0x%x\n", __func__,
+ mac_addr, *port);
+}
+
/* Clear complete MAC Look Up Table */
void mtip_clear_atable(struct switch_enet_private *fep)
{
@@ -834,11 +867,233 @@ static irqreturn_t mtip_interrupt(int irq, void *ptr_fep)
static void mtip_switch_tx(struct switch_enet_private *fep)
{
+ struct net_device *dev;
+ unsigned short status;
+ struct sk_buff *skb;
+ struct cbd_t *bdp;
+ int index;
+
+ spin_lock_bh(&fep->hw_lock);
+ bdp = fep->dirty_tx;
+
+ while (((status = bdp->cbd_sc) & BD_ENET_TX_READY) == 0) {
+ if (bdp == fep->cur_tx &&
+ !mtip_netif_queues_stopped(fep))
+ break;
+
+ index = bdp - fep->tx_bd_base;
+ dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr,
+ MTIP_SWITCH_TX_FRSIZE, DMA_TO_DEVICE);
+ bdp->cbd_bufaddr = 0;
+ skb = fep->tx_skbuff[index];
+ dev = skb->dev;
+ /* Check for errors */
+ if (status & (BD_ENET_TX_HB | BD_ENET_TX_LC |
+ BD_ENET_TX_RL | BD_ENET_TX_UN |
+ BD_ENET_TX_CSL)) {
+ dev->stats.tx_errors++;
+ if (status & BD_ENET_TX_HB) /* No heartbeat */
+ dev->stats.tx_heartbeat_errors++;
+ if (status & BD_ENET_TX_LC) /* Late collision */
+ dev->stats.tx_window_errors++;
+ if (status & BD_ENET_TX_RL) /* Retrans limit */
+ dev->stats.tx_aborted_errors++;
+ if (status & BD_ENET_TX_UN) /* Underrun */
+ dev->stats.tx_fifo_errors++;
+ if (status & BD_ENET_TX_CSL) /* Carrier lost */
+ dev->stats.tx_carrier_errors++;
+ } else {
+ dev->stats.tx_packets++;
+ }
+
+ if (status & BD_ENET_TX_READY)
+ dev_err_ratelimited(&fep->pdev->dev,
+ "xmit interrupt and TX_READY.\n");
+
+ /* Deferred means some collisions occurred during transmit,
+ * but we eventually sent the packet OK.
+ */
+ if (status & BD_ENET_TX_DEF)
+ dev->stats.collisions++;
+
+ /* Free the sk buffer associated with this last transmit */
+ dev_consume_skb_any(skb);
+ fep->tx_skbuff[index] = NULL;
+
+ /* Update pointer to next buffer descriptor to be transmitted */
+ if (status & BD_ENET_TX_WRAP)
+ bdp = fep->tx_bd_base;
+ else
+ bdp++;
+
+ /* Since we have freed up a buffer, the ring is no longer
+ * full.
+ */
+ if (fep->dirty_tx == fep->cur_tx &&
+ mtip_netif_queues_stopped(fep))
+ mtip_netif_wake_queues(fep);
+ }
+ fep->dirty_tx = bdp;
+ spin_unlock_bh(&fep->hw_lock);
+}
+
+static int mtip_update_cbd(struct switch_enet_private *fep, struct cbd_t *bdp,
+ int index)
+{
+ struct page *new_page;
+
+ new_page = page_pool_dev_alloc_pages(fep->page_pool);
+ if (unlikely(!new_page))
+ return -ENOMEM;
+
+ fep->page[index] = new_page;
+ bdp->cbd_bufaddr = page_pool_get_dma_addr(new_page);
+
+ return 0;
}
+/* During a receive, the cur_rx points to the current incoming buffer.
+ * When we update through the ring, if the next incoming buffer has
+ * not been given to the system, we just set the empty indicator,
+ * effectively tossing the packet.
+ */
static int mtip_switch_rx(struct net_device *dev, int budget)
{
- return -ENOMEM;
+ struct mtip_ndev_priv *priv = netdev_priv(dev);
+ u8 *data, rx_port = MTIP_PORT_FORWARDING_INIT;
+ struct switch_enet_private *fep = priv->fep;
+ unsigned short status, pkt_len;
+ struct net_device *pndev;
+ struct ethhdr *eth_hdr;
+ int pkt_received = 0;
+ struct sk_buff *skb;
+ struct cbd_t *bdp;
+ struct page *page;
+ int index;
+
+ /* First, grab all of the stats for the incoming packet.
+ * These get messed up if we get called due to a busy condition.
+ */
+ bdp = fep->cur_rx;
+
+ while (!((status = bdp->cbd_sc) & BD_ENET_RX_EMPTY)) {
+ if (pkt_received >= budget)
+ break;
+
+ pkt_received++;
+
+ writel(MCF_ESW_IMR_RXF, fep->hwp + ESW_ISR);
+ if (!fep->usage_count)
+ goto rx_processing_done;
+
+ status ^= BD_ENET_RX_LAST;
+ /* Check for errors. */
+ if (status & (BD_ENET_RX_LG | BD_ENET_RX_SH | BD_ENET_RX_NO |
+ BD_ENET_RX_CR | BD_ENET_RX_OV | BD_ENET_RX_LAST |
+ BD_ENET_RX_CL)) {
+ dev->stats.rx_errors++;
+ if (status & BD_ENET_RX_OV) {
+ /* FIFO overrun */
+ dev->stats.rx_fifo_errors++;
+ goto rx_processing_done;
+ }
+ if (status & (BD_ENET_RX_LG | BD_ENET_RX_SH
+ | BD_ENET_RX_LAST)) {
+ /* Frame too long or too short. */
+ dev->stats.rx_length_errors++;
+ if (status & BD_ENET_RX_LAST)
+ netdev_err(dev, "rcv is not +last\n");
+ }
+ if (status & BD_ENET_RX_CR) /* CRC Error */
+ dev->stats.rx_crc_errors++;
+
+ /* Report late collisions as a frame error. */
+ if (status & (BD_ENET_RX_NO | BD_ENET_RX_CL))
+ dev->stats.rx_frame_errors++;
+ goto rx_processing_done;
+ }
+
+ /* Get correct RX page */
+ index = bdp - fep->rx_bd_base;
+ page = fep->page[index];
+ /* Process the incoming frame */
+ pkt_len = bdp->cbd_datlen;
+
+ dma_sync_single_for_cpu(&fep->pdev->dev, bdp->cbd_bufaddr,
+ pkt_len, DMA_FROM_DEVICE);
+ net_prefetch(page_address(page));
+ data = page_address(page);
+
+ if (fep->quirks & FEC_QUIRK_SWAP_FRAME)
+ swap_buffer(data, pkt_len);
+
+ eth_hdr = (struct ethhdr *)data;
+ mtip_atable_get_entry_port_number(fep, eth_hdr->h_source,
+ &rx_port);
+ if (rx_port == MTIP_PORT_FORWARDING_INIT)
+ mtip_atable_dynamicms_learn_migration(fep,
+ mtip_get_time(),
+ eth_hdr->h_source,
+ &rx_port);
+
+ if ((rx_port == 1 || rx_port == 2) && fep->ndev[rx_port - 1])
+ pndev = fep->ndev[rx_port - 1];
+ else
+ pndev = dev;
+
+ if (mtip_update_cbd(fep, bdp, index)) {
+ pndev->stats.rx_dropped++;
+ goto rx_processing_done;
+ }
+
+ /* The packet length includes FCS, but we don't want to
+ * include that when passing upstream as it messes up
+ * bridging applications.
+ */
+ skb = build_skb(page_address(page), PAGE_SIZE);
+ if (unlikely(!skb)) {
+ page_pool_recycle_direct(fep->page_pool, page);
+ pndev->stats.rx_dropped++;
+
+ netdev_err_once(pndev, "build_skb failed!\n");
+ goto rx_processing_done;
+ }
+
+ skb_put(skb, pkt_len); /* Make room */
+ skb_mark_for_recycle(skb);
+ skb->protocol = eth_type_trans(skb, pndev);
+ skb->offload_fwd_mark = fep->br_offload;
+ napi_gro_receive(&fep->napi, skb);
+
+ pndev->stats.rx_packets++;
+ pndev->stats.rx_bytes += pkt_len;
+
+ rx_processing_done:
+ /* Clear the status flags for this buffer */
+ status &= ~BD_ENET_RX_STATS;
+
+ /* Mark the buffer empty */
+ status |= BD_ENET_RX_EMPTY;
+ /* Make sure that updates to the descriptor are performed */
+ wmb();
+ bdp->cbd_sc = status;
+
+ /* Update BD pointer to next entry */
+ if (status & BD_ENET_RX_WRAP)
+ bdp = fep->rx_bd_base;
+ else
+ bdp++;
+
+ /* Doing this here will keep the FEC running while we process
+ * incoming frames. On a heavily loaded network, we should be
+ * able to keep up at the expense of system resources.
+ */
+ writel(MCF_ESW_RDAR_R_DES_ACTIVE, fep->hwp + ESW_RDAR);
+ } /* while (!((status = bdp->cbd_sc) & BD_ENET_RX_EMPTY)) */
+
+ fep->cur_rx = bdp;
+
+ return pkt_received;
}
static void mtip_adjust_link(struct net_device *dev)
--
2.39.5
This is an AI-generated review of your patch. The human sending this
email has considered the AI review valid, or at least plausible.
For local reproduction steps see https://netdev-ai.bots.linux.dev/ai-local.html
---
net: mtip: Add mtip_switch_{rx|tx} functions to the L2 switch driver
This patch provides mtip_switch_tx and mtip_switch_rx functions code for
MTIP L2 switch.
> diff --git a/drivers/net/ethernet/freescale/mtipsw/mtipl2sw.c b/drivers/net/ethernet/freescale/mtipsw/mtipl2sw.c
> index 4c64681602d6..61a624f3df2b 100644
> --- a/drivers/net/ethernet/freescale/mtipsw/mtipl2sw.c
> +++ b/drivers/net/ethernet/freescale/mtipsw/mtipl2sw.c
> @@ -246,6 +246,39 @@ struct mtip_port_info *mtip_portinfofifo_read(struct switch_enet_private *fep)
> return info;
> }
>
> +static void mtip_atable_get_entry_port_number(struct switch_enet_private *fep,
> + unsigned char *mac_addr, u8 *port)
> +{
> + int block_index, block_index_end, entry;
> + u32 mac_addr_lo, mac_addr_hi;
> + u32 read_lo, read_hi;
> +
> + mac_addr_lo = (u32)((mac_addr[3] << 24) | (mac_addr[2] << 16) |
> + (mac_addr[1] << 8) | mac_addr[0]);
> + mac_addr_hi = (u32)((mac_addr[5] << 8) | (mac_addr[4]));
> +
> + block_index = GET_BLOCK_PTR(crc8_calc(mac_addr));
> + block_index_end = block_index + ATABLE_ENTRY_PER_SLOT;
> +
> + /* now search all the entries in the selected block */
> + for (entry = block_index; entry < block_index_end; entry++) {
> + mtip_read_atable(fep, entry, &read_lo, &read_hi);
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Can this create a torn read of the address table entry?
mtip_read_atable() performs two separate readl() operations to read a
64-bit address table entry:
*read_lo = readl(&atable_base[index].lo);
*read_hi = readl(&atable_base[index].hi);
If another thread holding fep->learn_lock updates the same entry between
these two reads, the lookup would see an inconsistent state (old low word
with new high word, or vice versa).
Looking at the call chain:
Thread 1 (no lock):
mtip_switch_rx()->mtip_atable_get_entry_port_number()->mtip_read_atable()
Thread 2 (lock held):
mtip_mgnt_timer()->mtip_atable_dynamicms_learn_migration()->
mtip_update_atable_dynamic1()->mtip_write_atable()
The race could cause a MAC address lookup to fail or match the wrong entry,
delivering packets to the wrong port.
All other functions that read or write the address table acquire
fep->learn_lock before calling mtip_read_atable() or mtip_write_atable().
Should this function also hold the lock during the lookup loop?
> + *port = MTIP_PORT_FORWARDING_INIT;
> +
> + if (read_lo == mac_addr_lo &&
> + ((read_hi & 0x0000FFFF) ==
> + (mac_addr_hi & 0x0000FFFF))) {
> + /* found the correct address */
> + if ((read_hi & (1 << 16)) && (!(read_hi & (1 << 17))))
> + *port = FIELD_GET(AT_PORT_MASK, read_hi);
> + break;
> + }
> + }
> +
> + dev_dbg(&fep->pdev->dev, "%s: MAC: %pM PORT: 0x%x\n", __func__,
> + mac_addr, *port);
> +}
[ ... ]
© 2016 - 2026 Red Hat, Inc.