[PATCH RFC net-next 1/2] net: pcs: xpcs: Add special code to operate in Microchip KSZ9477 switch

Tristram.Ha@microchip.com posted 2 patches 1 year ago
There is a newer version of this series
[PATCH RFC net-next 1/2] net: pcs: xpcs: Add special code to operate in Microchip KSZ9477 switch
Posted by Tristram.Ha@microchip.com 1 year ago
From: Tristram Ha <tristram.ha@microchip.com>

It was recommended to use this XPCS driver for Microchip KSZ9477 switch
to operate its SGMII port as the SGMII implementation uses the same
Synopsys DesignWare IP, but current code does not work in some cases.
The only solution is to add a quirk field and use that to operate KSZ
specific code.

For 1000BaseX mode setting neg_mode to false works, but that does not
work for SGMII mode.  Setting 0x18 value in register 0x1f8001 allows
1000BaseX mode to work with auto-negotiation enabled.

However SGMII mode in KSZ9477 has a bug in which the current speed
needs to be set in MII_BMCR to pass traffic.  The current driver code
does not do anything when link is up with auto-negotiation enabled, so
that code needs to be changed for KSZ9477.

Signed-off-by: Tristram Ha <tristram.ha@microchip.com>
---
 drivers/net/pcs/pcs-xpcs.c   | 52 ++++++++++++++++++++++++++++++++++--
 drivers/net/pcs/pcs-xpcs.h   |  2 ++
 include/linux/pcs/pcs-xpcs.h |  6 +++++
 3 files changed, 58 insertions(+), 2 deletions(-)

diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
index 1faa37f0e7b9..ddf6cd4b37a7 100644
--- a/drivers/net/pcs/pcs-xpcs.c
+++ b/drivers/net/pcs/pcs-xpcs.c
@@ -768,6 +768,14 @@ static int xpcs_config_aneg_c37_1000basex(struct dw_xpcs *xpcs,
 		val |= DW_VR_MII_AN_INTR_EN;
 	}
 
+	if (xpcs->quirk == DW_XPCS_QUIRK_MICROCHIP_KSZ &&
+	    neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED) {
+		mask |= DW_VR_MII_SGMII_LINK_STS | DW_VR_MII_TX_CONFIG_MASK;
+		val |= FIELD_PREP(DW_VR_MII_TX_CONFIG_MASK,
+				  DW_VR_MII_TX_CONFIG_PHY_SIDE_SGMII);
+		val |= DW_VR_MII_SGMII_LINK_STS;
+	}
+
 	ret = xpcs_modify(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_CTRL, mask, val);
 	if (ret < 0)
 		return ret;
@@ -982,6 +990,15 @@ static int xpcs_get_state_c37_sgmii(struct dw_xpcs *xpcs,
 	if (ret < 0)
 		return ret;
 
+	/* DW_VR_MII_AN_STS_C37_ANCMPLT_INTR just means link change in SGMII
+	 * mode, so needs to be cleared.  It can appear just by itself, which
+	 * does not mean the link is up.
+	 */
+	if (xpcs->quirk == DW_XPCS_QUIRK_MICROCHIP_KSZ &&
+	    (ret & DW_VR_MII_AN_STS_C37_ANCMPLT_INTR)) {
+		ret &= ~DW_VR_MII_AN_STS_C37_ANCMPLT_INTR;
+		xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_INTR_STS, 0);
+	}
 	if (ret & DW_VR_MII_C37_ANSGM_SP_LNKSTS) {
 		int speed_value;
 
@@ -1037,6 +1054,18 @@ static int xpcs_get_state_c37_1000basex(struct dw_xpcs *xpcs,
 {
 	int lpa, bmsr;
 
+	/* DW_VR_MII_AN_STS_C37_ANCMPLT_INTR just means link change, so needs
+	 * to be cleared.  If polling is not used then it is cleared by
+	 * following code.
+	 */
+	if (xpcs->quirk == DW_XPCS_QUIRK_MICROCHIP_KSZ && xpcs->pcs.poll) {
+		int val;
+
+		val = xpcs_read(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_INTR_STS);
+		if (val & DW_VR_MII_AN_STS_C37_ANCMPLT_INTR)
+			xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_INTR_STS,
+				   0);
+	}
 	if (linkmode_test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT,
 			      state->advertising)) {
 		/* Reset link state */
@@ -1138,9 +1167,14 @@ static void xpcs_link_up_sgmii_1000basex(struct dw_xpcs *xpcs,
 					 phy_interface_t interface,
 					 int speed, int duplex)
 {
+	u16 val = 0;
 	int ret;
 
-	if (neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED)
+	/* Microchip KSZ SGMII implementation has a bug that needs MII_BMCR
+	 * register to be updated with current speed to pass traffic.
+	 */
+	if (xpcs->quirk != DW_XPCS_QUIRK_MICROCHIP_KSZ &&
+	    neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED)
 		return;
 
 	if (interface == PHY_INTERFACE_MODE_1000BASEX) {
@@ -1155,10 +1189,18 @@ static void xpcs_link_up_sgmii_1000basex(struct dw_xpcs *xpcs,
 			dev_err(&xpcs->mdiodev->dev,
 				"%s: half duplex not supported\n",
 				__func__);
+
+		/* No need to update MII_BMCR register if not in SGMII mode. */
+		if (xpcs->quirk == DW_XPCS_QUIRK_MICROCHIP_KSZ &&
+		    neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED)
+			return;
 	}
 
+	if (xpcs->quirk == DW_XPCS_QUIRK_MICROCHIP_KSZ &&
+	    neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED)
+		val = BMCR_ANENABLE;
 	ret = xpcs_write(xpcs, MDIO_MMD_VEND2, MII_BMCR,
-			 mii_bmcr_encode_fixed(speed, duplex));
+			 val | mii_bmcr_encode_fixed(speed, duplex));
 	if (ret)
 		dev_err(&xpcs->mdiodev->dev, "%s: xpcs_write returned %pe\n",
 			__func__, ERR_PTR(ret));
@@ -1563,5 +1605,11 @@ void xpcs_destroy_pcs(struct phylink_pcs *pcs)
 }
 EXPORT_SYMBOL_GPL(xpcs_destroy_pcs);
 
+void xpcs_set_quirk(struct dw_xpcs *xpcs, int quirk)
+{
+	xpcs->quirk = quirk;
+}
+EXPORT_SYMBOL_GPL(xpcs_set_quirk);
+
 MODULE_DESCRIPTION("Synopsys DesignWare XPCS library");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/pcs/pcs-xpcs.h b/drivers/net/pcs/pcs-xpcs.h
index adc5a0b3c883..1348a9a05ca6 100644
--- a/drivers/net/pcs/pcs-xpcs.h
+++ b/drivers/net/pcs/pcs-xpcs.h
@@ -73,6 +73,7 @@
 
 /* VR_MII_AN_CTRL */
 #define DW_VR_MII_AN_CTRL_8BIT			BIT(8)
+#define DW_VR_MII_SGMII_LINK_STS		BIT(4)
 #define DW_VR_MII_TX_CONFIG_MASK		BIT(3)
 #define DW_VR_MII_TX_CONFIG_PHY_SIDE_SGMII	0x1
 #define DW_VR_MII_TX_CONFIG_MAC_SIDE_SGMII	0x0
@@ -122,6 +123,7 @@ struct dw_xpcs {
 	struct phylink_pcs pcs;
 	phy_interface_t interface;
 	bool need_reset;
+	int quirk;
 };
 
 int xpcs_read(struct dw_xpcs *xpcs, int dev, u32 reg);
diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h
index 733f4ddd2ef1..7fccbff2383d 100644
--- a/include/linux/pcs/pcs-xpcs.h
+++ b/include/linux/pcs/pcs-xpcs.h
@@ -41,6 +41,10 @@ enum dw_xpcs_pma_id {
 	WX_TXGBE_XPCS_PMA_10G_ID = 0x0018fc80,
 };
 
+enum dw_xpcs_quirks {
+	DW_XPCS_QUIRK_MICROCHIP_KSZ = 1,
+};
+
 struct dw_xpcs_info {
 	u32 pcs;
 	u32 pma;
@@ -59,4 +63,6 @@ void xpcs_destroy(struct dw_xpcs *xpcs);
 struct phylink_pcs *xpcs_create_pcs_mdiodev(struct mii_bus *bus, int addr);
 void xpcs_destroy_pcs(struct phylink_pcs *pcs);
 
+void xpcs_set_quirk(struct dw_xpcs *xpcs, int quirk);
+
 #endif /* __LINUX_PCS_XPCS_H */
-- 
2.34.1
Re: [PATCH RFC net-next 1/2] net: pcs: xpcs: Add special code to operate in Microchip KSZ9477 switch
Posted by Vladimir Oltean 1 year ago
On Mon, Jan 27, 2025 at 07:32:25PM -0800, Tristram.Ha@microchip.com wrote:
> However SGMII mode in KSZ9477 has a bug in which the current speed
> needs to be set in MII_BMCR to pass traffic.  The current driver code
> does not do anything when link is up with auto-negotiation enabled, so
> that code needs to be changed for KSZ9477.

Does this claimed SGMII bug have an erratum number like Microchip usually
assign for something this serious? Is it something we can look up?
Re: [PATCH RFC net-next 1/2] net: pcs: xpcs: Add special code to operate in Microchip KSZ9477 switch
Posted by Andrew Lunn 1 year ago
> For 1000BaseX mode setting neg_mode to false works, but that does not
> work for SGMII mode.  Setting 0x18 value in register 0x1f8001 allows
> 1000BaseX mode to work with auto-negotiation enabled.

Unless you have the datasheet, writing 0x18 to 0x1f8001 is pretty
meaningless. You need to explain what these two bits mean in this
register.

	Andrew
Re: [PATCH RFC net-next 1/2] net: pcs: xpcs: Add special code to operate in Microchip KSZ9477 switch
Posted by Russell King (Oracle) 1 year ago
On Tue, Jan 28, 2025 at 02:16:28PM +0100, Andrew Lunn wrote:
> > For 1000BaseX mode setting neg_mode to false works, but that does not
> > work for SGMII mode.  Setting 0x18 value in register 0x1f8001 allows
> > 1000BaseX mode to work with auto-negotiation enabled.
> 
> Unless you have the datasheet, writing 0x18 to 0x1f8001 is pretty
> meaningless. You need to explain what these two bits mean in this
> register.

Well, this is the reason I searched for the datasheet to find out
what it was, and discovered further information which suggests
other stuff is wrong in the current driver.

bit 4: SGMII link status. This is used to populate the SGMII tx_config
register bit 15 when XPCS is operating in PHY mode. KSZ9477
documentation states that this bit must be set in "SerDes" mode, aka
1000base-X. If that requirement comes from Synopsys, then the current
XPCS driver is buggy...

bit 3: Transmit configuration. In SGMII mode, selects between PHY mode
(=1) or MAC mode (=0). KSZ9477 documentation states that this bit must
be set when operating in "SerDes" mode. (Same concern as for bit 4.)

I will also note here that bits 2:1 are documented as 00=SerDes mode,
10=SGMII mode.

Cross-referencing with the SJA1105 documentation, Digital Control
Register 1 bit 0 = 0 gives a tx_config format of:

	tx_config[15] = comes from SGMII link status above
	tx_config[12] = MII_ADVERTISE.bit5 (which, even though operating
			in SGMII mode, MII_ADVERTISE is 802.3z format.)
	tx_config[11:10] = MII_BMCR speed bits

As stated elsewhere, changes to the AN control register in KSZ9477
are documented as only taking effect when the MII_ADVERTISE register
is subsequently written (which the driver doesn't do, nor does this
patch!)

The lack of access to Synopsys Designware XPCS documentation makes
working out how to properly drive this hardware problematical. We're
subject to the randomness of the register set documentation that can
be found in various chip manufacturers who publish it.

-- 
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 80Mbps down 10Mbps up. Decent connectivity at last!
Re: [PATCH RFC net-next 1/2] net: pcs: xpcs: Add special code to operate in Microchip KSZ9477 switch
Posted by Russell King (Oracle) 1 year ago
On Mon, Jan 27, 2025 at 07:32:25PM -0800, Tristram.Ha@microchip.com wrote:
> For 1000BaseX mode setting neg_mode to false works, but that does not
> work for SGMII mode.  Setting 0x18 value in register 0x1f8001 allows
> 1000BaseX mode to work with auto-negotiation enabled.

I'm not sure (a) exactly what the above paragraph is trying to tell me,
and (b) why setting the AN control register to 0x18, which should only
affect SGMII, has an effect on 1000BASE-X.

Note that a config word formatted for SGMII can result in a link with
1000BASE-X to come up, but it is not correct. So, I highly recommend you
check the config word sent by the XPCS to the other end of the link.
Bit 0 of that will tell you whether it is SGMII-formatted or 802.3z
formatted.

> However SGMII mode in KSZ9477 has a bug in which the current speed
> needs to be set in MII_BMCR to pass traffic.  The current driver code
> does not do anything when link is up with auto-negotiation enabled, so
> that code needs to be changed for KSZ9477.
> 
> Signed-off-by: Tristram Ha <tristram.ha@microchip.com>
> ---
>  drivers/net/pcs/pcs-xpcs.c   | 52 ++++++++++++++++++++++++++++++++++--
>  drivers/net/pcs/pcs-xpcs.h   |  2 ++
>  include/linux/pcs/pcs-xpcs.h |  6 +++++
>  3 files changed, 58 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
> index 1faa37f0e7b9..ddf6cd4b37a7 100644
> --- a/drivers/net/pcs/pcs-xpcs.c
> +++ b/drivers/net/pcs/pcs-xpcs.c
> @@ -768,6 +768,14 @@ static int xpcs_config_aneg_c37_1000basex(struct dw_xpcs *xpcs,
>  		val |= DW_VR_MII_AN_INTR_EN;
>  	}
>  
> +	if (xpcs->quirk == DW_XPCS_QUIRK_MICROCHIP_KSZ &&
> +	    neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED) {
> +		mask |= DW_VR_MII_SGMII_LINK_STS | DW_VR_MII_TX_CONFIG_MASK;
> +		val |= FIELD_PREP(DW_VR_MII_TX_CONFIG_MASK,
> +				  DW_VR_MII_TX_CONFIG_PHY_SIDE_SGMII);
> +		val |= DW_VR_MII_SGMII_LINK_STS;
> +	}
> +
>  	ret = xpcs_modify(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_CTRL, mask, val);
>  	if (ret < 0)
>  		return ret;
> @@ -982,6 +990,15 @@ static int xpcs_get_state_c37_sgmii(struct dw_xpcs *xpcs,
>  	if (ret < 0)
>  		return ret;
>  
> +	/* DW_VR_MII_AN_STS_C37_ANCMPLT_INTR just means link change in SGMII
> +	 * mode, so needs to be cleared.  It can appear just by itself, which
> +	 * does not mean the link is up.
> +	 */
> +	if (xpcs->quirk == DW_XPCS_QUIRK_MICROCHIP_KSZ &&
> +	    (ret & DW_VR_MII_AN_STS_C37_ANCMPLT_INTR)) {
> +		ret &= ~DW_VR_MII_AN_STS_C37_ANCMPLT_INTR;
> +		xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_INTR_STS, 0);
> +	}

*_get_state() is not an interrupt acknowledgement function. It isn't
_necessarily_ called when an interrupt has happened, and it will be
called "sometime after" the interrupt has been handled as it's called
from an entirely separate workqueue.

Would it be better to just ignore the block following:

	} else if (ret == DW_VR_MII_AN_STS_C37_ANCMPLT_INTR) {

instead? I'm not sure that block of code/if statement was correct,
and was added in "net: pcs: xpcs: adapt Wangxun NICs for SGMII mode".

>  	if (ret & DW_VR_MII_C37_ANSGM_SP_LNKSTS) {
>  		int speed_value;
>  
> @@ -1037,6 +1054,18 @@ static int xpcs_get_state_c37_1000basex(struct dw_xpcs *xpcs,
>  {
>  	int lpa, bmsr;
>  
> +	/* DW_VR_MII_AN_STS_C37_ANCMPLT_INTR just means link change, so needs
> +	 * to be cleared.  If polling is not used then it is cleared by
> +	 * following code.
> +	 */
> +	if (xpcs->quirk == DW_XPCS_QUIRK_MICROCHIP_KSZ && xpcs->pcs.poll) {
> +		int val;
> +
> +		val = xpcs_read(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_INTR_STS);
> +		if (val & DW_VR_MII_AN_STS_C37_ANCMPLT_INTR)
> +			xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_INTR_STS,
> +				   0);
> +	}

Same concerns.

>  	if (linkmode_test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT,
>  			      state->advertising)) {
>  		/* Reset link state */
> @@ -1138,9 +1167,14 @@ static void xpcs_link_up_sgmii_1000basex(struct dw_xpcs *xpcs,
>  					 phy_interface_t interface,
>  					 int speed, int duplex)
>  {
> +	u16 val = 0;
>  	int ret;
>  
> -	if (neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED)
> +	/* Microchip KSZ SGMII implementation has a bug that needs MII_BMCR
> +	 * register to be updated with current speed to pass traffic.
> +	 */
> +	if (xpcs->quirk != DW_XPCS_QUIRK_MICROCHIP_KSZ &&

	if (!(xpcs->quirk == DW_XPCS_QUIRK_MICROCHIP_KSZ &&
	      interface != PHY_INTERFACE_MODE_SGMII) &&

> +	    neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED)
>  		return;
>  
>  	if (interface == PHY_INTERFACE_MODE_1000BASEX) {
> @@ -1155,10 +1189,18 @@ static void xpcs_link_up_sgmii_1000basex(struct dw_xpcs *xpcs,
>  			dev_err(&xpcs->mdiodev->dev,
>  				"%s: half duplex not supported\n",
>  				__func__);
> +
> +		/* No need to update MII_BMCR register if not in SGMII mode. */
> +		if (xpcs->quirk == DW_XPCS_QUIRK_MICROCHIP_KSZ &&
> +		    neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED)
> +			return;

then you don't need this.

>  	}
>  
> +	if (xpcs->quirk == DW_XPCS_QUIRK_MICROCHIP_KSZ &&
> +	    neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED)
> +		val = BMCR_ANENABLE;
>  	ret = xpcs_write(xpcs, MDIO_MMD_VEND2, MII_BMCR,
> -			 mii_bmcr_encode_fixed(speed, duplex));
> +			 val | mii_bmcr_encode_fixed(speed, duplex));

I think in this case, I'd prefer this to just modify the speed and
duplex bits rather than writing the whole register.

>  	if (ret)
>  		dev_err(&xpcs->mdiodev->dev, "%s: xpcs_write returned %pe\n",
>  			__func__, ERR_PTR(ret));
> @@ -1563,5 +1605,11 @@ void xpcs_destroy_pcs(struct phylink_pcs *pcs)
>  }
>  EXPORT_SYMBOL_GPL(xpcs_destroy_pcs);
>  
> +void xpcs_set_quirk(struct dw_xpcs *xpcs, int quirk)
> +{
> +	xpcs->quirk = quirk;
> +}
> +EXPORT_SYMBOL_GPL(xpcs_set_quirk);

According to the KSZ9477 data, the physid is 0x7996ced0 (which is the
DW value according to the xpcs header file). We also read the PMA ID
(xpcs->info.pma). Can this be used to identify the KSZ9477 without
introducing quirks?

I would prefer to avoid going down the route of introducing a quirk
mask - that seems to be a recipe to breed lots of flags that make
long term maintenance more difficult.

-- 
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 80Mbps down 10Mbps up. Decent connectivity at last!
Re: [PATCH RFC net-next 1/2] net: pcs: xpcs: Add special code to operate in Microchip KSZ9477 switch
Posted by Vladimir Oltean 1 year ago
On Tue, Jan 28, 2025 at 09:24:45AM +0000, Russell King (Oracle) wrote:
> On Mon, Jan 27, 2025 at 07:32:25PM -0800, Tristram.Ha@microchip.com wrote:
> > For 1000BaseX mode setting neg_mode to false works, but that does not
> > work for SGMII mode.  Setting 0x18 value in register 0x1f8001 allows
> > 1000BaseX mode to work with auto-negotiation enabled.
> 
> I'm not sure (a) exactly what the above paragraph is trying to tell me,
> and (b) why setting the AN control register to 0x18, which should only
> affect SGMII, has an effect on 1000BASE-X.
> 
> Note that a config word formatted for SGMII can result in a link with
> 1000BASE-X to come up, but it is not correct. So, I highly recommend you
> check the config word sent by the XPCS to the other end of the link.
> Bit 0 of that will tell you whether it is SGMII-formatted or 802.3z
> formatted.

I, too, am concerned about the sentence "setting neg_mode to false works".
If this is talking about the only neg_mode field that is a boolean, aka
struct phylink_pcs :: neg_mode, then setting it to false is not
something driver customizable, it should be true for API compliance,
and all that remains false in current kernel trees will eventually get
converted to true, AFAIU. If 1000BaseX works by setting xpcs->pcs.neg_mode
to false and not modifying anything else, it should be purely a
coincidence that it "works", since that makes the driver comparisons
with PHYLINK_PCS_NEG_* constants meaningless.

> According to the KSZ9477 data, the physid is 0x7996ced0 (which is the
> DW value according to the xpcs header file). We also read the PMA ID
> (xpcs->info.pma). Can this be used to identify the KSZ9477 without
> introducing quirks?

If nothing else works, and it turns out that different IP integrations
report the same value in ID registers but need different handling, then
in principle the hack approach is also on the table. SJA1105, whose
hardware reads zeroes for the ID registers, reports a fake and unique ID
for the XPCS to identify it, because it, like the KSZ9477 driver, is in
control of the MDIO read operations and can selectively manipulate their
result.
Re: [PATCH RFC net-next 1/2] net: pcs: xpcs: Add special code to operate in Microchip KSZ9477 switch
Posted by Russell King (Oracle) 1 year ago
On Tue, Jan 28, 2025 at 12:21:28PM +0200, Vladimir Oltean wrote:
> On Tue, Jan 28, 2025 at 09:24:45AM +0000, Russell King (Oracle) wrote:
> > On Mon, Jan 27, 2025 at 07:32:25PM -0800, Tristram.Ha@microchip.com wrote:
> > > For 1000BaseX mode setting neg_mode to false works, but that does not
> > > work for SGMII mode.  Setting 0x18 value in register 0x1f8001 allows
> > > 1000BaseX mode to work with auto-negotiation enabled.
> > 
> > I'm not sure (a) exactly what the above paragraph is trying to tell me,
> > and (b) why setting the AN control register to 0x18, which should only
> > affect SGMII, has an effect on 1000BASE-X.
> > 
> > Note that a config word formatted for SGMII can result in a link with
> > 1000BASE-X to come up, but it is not correct. So, I highly recommend you
> > check the config word sent by the XPCS to the other end of the link.
> > Bit 0 of that will tell you whether it is SGMII-formatted or 802.3z
> > formatted.
> 
> I, too, am concerned about the sentence "setting neg_mode to false works".
> If this is talking about the only neg_mode field that is a boolean, aka
> struct phylink_pcs :: neg_mode, then setting it to false is not
> something driver customizable, it should be true for API compliance,
> and all that remains false in current kernel trees will eventually get
> converted to true, AFAIU. If 1000BaseX works by setting xpcs->pcs.neg_mode
> to false and not modifying anything else, it should be purely a
> coincidence that it "works", since that makes the driver comparisons
> with PHYLINK_PCS_NEG_* constants meaningless.

Another related issue that concerns me is:

         * Note: Since it is MAC side SGMII, there is no need to set
         *       SR_MII_AN_ADV. MAC side SGMII receives AN Tx Config from
         *       PHY about the link state change after C28 AN is completed
         *       between PHY and Link Partner. There is also no need to
         *       trigger AN restart for MAC-side SGMII.

However, 2a22b7ae2fa3 ("net: pcs: xpcs: adapt Wangxun NICs for SGMII
mode") added support for switching this to PHY-side SGMII, so this
comment is no longer true.

Again, I still wonder whether PHY-side is some kind of hack despite
my queries during the review of that change. Sadly, I didn't catch
that comment (and until recently, I didn't have any documentation
on these registers. I now have the KS9477 and SJA1105 manuals that
document some of the register set.)

-- 
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 80Mbps down 10Mbps up. Decent connectivity at last!
Re: [PATCH RFC net-next 1/2] net: pcs: xpcs: Add special code to operate in Microchip KSZ9477 switch
Posted by Russell King (Oracle) 1 year ago
On Tue, Jan 28, 2025 at 12:21:28PM +0200, Vladimir Oltean wrote:
> On Tue, Jan 28, 2025 at 09:24:45AM +0000, Russell King (Oracle) wrote:
> > On Mon, Jan 27, 2025 at 07:32:25PM -0800, Tristram.Ha@microchip.com wrote:
> > > For 1000BaseX mode setting neg_mode to false works, but that does not
> > > work for SGMII mode.  Setting 0x18 value in register 0x1f8001 allows
> > > 1000BaseX mode to work with auto-negotiation enabled.
> > 
> > I'm not sure (a) exactly what the above paragraph is trying to tell me,
> > and (b) why setting the AN control register to 0x18, which should only
> > affect SGMII, has an effect on 1000BASE-X.
> > 
> > Note that a config word formatted for SGMII can result in a link with
> > 1000BASE-X to come up, but it is not correct. So, I highly recommend you
> > check the config word sent by the XPCS to the other end of the link.
> > Bit 0 of that will tell you whether it is SGMII-formatted or 802.3z
> > formatted.
> 
> I, too, am concerned about the sentence "setting neg_mode to false works".
> If this is talking about the only neg_mode field that is a boolean, aka
> struct phylink_pcs :: neg_mode, then setting it to false is not
> something driver customizable, it should be true for API compliance,
> and all that remains false in current kernel trees will eventually get
> converted to true, AFAIU. If 1000BaseX works by setting xpcs->pcs.neg_mode
> to false and not modifying anything else, it should be purely a
> coincidence that it "works", since that makes the driver comparisons
> with PHYLINK_PCS_NEG_* constants meaningless.
> 
> > According to the KSZ9477 data, the physid is 0x7996ced0 (which is the
> > DW value according to the xpcs header file). We also read the PMA ID
> > (xpcs->info.pma). Can this be used to identify the KSZ9477 without
> > introducing quirks?
> 
> If nothing else works, and it turns out that different IP integrations
> report the same value in ID registers but need different handling, then
> in principle the hack approach is also on the table. SJA1105, whose
> hardware reads zeroes for the ID registers, reports a fake and unique ID
> for the XPCS to identify it, because it, like the KSZ9477 driver, is in
> control of the MDIO read operations and can selectively manipulate their
> result.

Further review of the KS9477 documentation finds this:

5.5.9 SGMII AUTO-NEGOTIATION CONTROL REGISTER

"After making changes to this register, the changes don’t take effect
until SGMII Auto-Negotiation Advertisement Register is written."

In xpcs_config_aneg_c37_sgmii() we have:

        ret = xpcs_modify(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_CTRL, mask, val);
        if (ret < 0)
                return ret;

However, MII_ADVERTISE in MDIO_MMD_VEND2 is not written by this
function. If the documentation is correct, then this has no effect
on KS9477, and could be part of the problem.

I notice the SJA1105 doesn't make any similar statement, so I wonder
what the original Synopsys documentation says about the AN control
register.

Note that xpcs_config_aneg_c37_1000basex() does also write this
register, but it is followed by a write to the MII_ADVERTISE
register.

-- 
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 80Mbps down 10Mbps up. Decent connectivity at last!
Re: [PATCH RFC net-next 1/2] net: pcs: xpcs: Add special code to operate in Microchip KSZ9477 switch
Posted by Vladimir Oltean 1 year ago
On Tue, Jan 28, 2025 at 12:33:11PM +0000, Russell King (Oracle) wrote:
> I wonder what the original Synopsys documentation says about the AN
> control register.

My private XPCS databook 2017-12.pdf, the applicability of which I cannot
vouch for, has a section with programming guidelines for SGMII
Auto-Negotiation. I quote:

| Clause 37 auto-negotiation can be performed in the SGMII mode by
| programming various registers as follows:
| 
| 1. In configurations with both 10G and 1G mode speed mode, switch
|    DWC_xpcs to 1G speed mode by following the steps described in
|    "Switching to 1G or KX Mode and 10 or 100 Mbps SGMII Speed".
| 
| 2. In Backplane Ethernet PCS configurations, program bit[12] (AN_EN) of
|    SR_AN_CTRL Register to 0 and bit[12] (CL37_BP) of VR_XS_PCS_DIG_CTRL1
|    Register to 1.
| 
| 3. Disable Clause 37 auto-negotiation by programming bit [12]
|    (AN_ENABLE) of SR_MII_CTRL Register to 0 (in case it is already
|    enabled).
| 
| 4. Program various fields of VR_MII_AN_CTRL Register appropriately as
|    follows:
|    - Program PCS_MODE to 2’b10
|    - Program TX_CONFIG to 1 (PHY side SGMII) or 0 (MAC side SGMII) based
|      on your requirement
|    - Program MII_AN_INTR_EN to 1, to enable auto-negotiation complete
|      interrupt
|    - If TX_CONFIG is set to 1 and bit[0] of VR_MII_DIG_CTRL1 Register is
|      set to 0, program SGMII_LINK_STS to indicate the link status to the
|      MAC side SGMII.
|    - Program MII_CTRL to 0 or 1, as per your requirement.
| 
| 5. If DWC_xpcs is configured as PHY-side SGMII in the above step, you
|    can program bit [0] of VR_MII_DIG_CTRL1 Register to 1, if you wish to
|    use the values of the xpcs_sgmii_link_sts_i input ports.
|    xpcs_sgmii_full_duplex_i and xpcs_sgmii_link_speed_i as the
|    transmitted configuration word.
| 
| 6. If DWC_xpcs is configured as PHY-side SGMII and if bit[0] of
|    VR_MII_DIG_CTRL1 Register is set to 0,
|    - Program SS13 and SS6 bits of SR_MII_CTRL Register to the required
|      SGMII Speed
|    - Program bit [5] (FD) of SR_MII_AN_ADV to the desired mode. This
|      step is mandatory even if you wish to leave the FD register bit to
|      its default value.
| 
| 7. If DWC_xpcs is configured as MAC-side SGMII in step 4, program bit[9]
|    of VR_MII_DIG_CTRL1 Register to 1, for DWC_xpcs to automatically
|    switch to the negotiated link-speed, after the completion of
|    auto-negotiation.
| 
| 8. Enable CL37 Auto-negotiation, by programming bit[12] of the
|    SR_MII_CTRL Register to 1.

In my reading of these steps, writing to DW_VR_MII_AN_CTRL does not
depend on a subsequent write to SR_MII_AN_ADV to take effect.
But there is this additional note in the databook:

| If TX_CONFIG=1 and Bit[0] (SGMII_PHY_MODE_CTRL) of VR_MII_DIG_CTRL1 = 0,
| program the SR_MII_AN_ADV only after programming 'SGMII_LINK_STS' bit
| (of VR_MII_AN_CTRL) and SS13 and SS6 bits (of SR_MII_CTRL)

So my understanding is that SR_MII_AN_ADV needs to be written only if
TX_CONFIG=1 (SJA1105 calls this AUTONEG_CONTROL[PHY_MODE]). That's quite
different, and that will make sense when you consider that there's also
a table with the places the autoneg code word gets its info from:

Config_Reg Bits in the 1000BASE-X and SGMII Mode

 +----------------+-------------------+--------------------+--------------------------------------------+
 | Config_Reg bit | 1000Base-X mode   | SGMII mode value   | SGMII mode value                           |
 |                |                   | when TX_CONFIG = 0 | when TX_CONFIG = 1                         |
 +----------------+-------------------+--------------------+--------------------------------------------+
 | 15             | Next page support | 0                  | Link up or down.                           |
 |                |                   |                    | If DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL == 0, |
 |                |                   |                    | this bit is derived from Bit 4             |
 |                |                   |                    | (SGMII_LINK_STS) of the VR_MII_AN_CTRL.    |
 |                |                   |                    | If DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL == 1, |
 |                |                   |                    | this bit is derived from the input port    |
 |                |                   |                    | 'xpcs_sgmii_link_sts_i'                    |
 +----------------+-------------------+--------------------+--------------------------------------------+
 | 14             | ACK               | 1                  | 1                                          |
 +----------------+-------------------+--------------------+--------------------------------------------+
 | 13             | RF[1]             | 0                  | 0                                          |
 +----------------+-------------------+--------------------+--------------------------------------------+
 | 12             | RF[0]             | 0                  | FULL_DUPLEX                                |
 |                |                   |                    | If DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL == 0, |
 |                |                   |                    | this bit is derived from Bit 5 (FD) of     |
 |                |                   |                    | the SR_MII_AN_ADV.                         |
 |                |                   |                    | If DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL == 1, |
 |                |                   |                    | this bit is derived from the input port    |
 |                |                   |                    | 'xpcs_sgmii_full_duplex_i'                 |
 +----------------+-------------------+--------------------+--------------------------------------------+
 | 11:10          | Reserved          | 0                  | SPEED                                      |
 |                |                   |                    | If DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL == 0, |
 |                |                   |                    | these bits are derived from Bit 13 (SS13)  |
 |                |                   |                    | and Bit 6 (SS6) of the SR_MII_CTRL.        |
 |                |                   |                    | If DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL == 1, |
 |                |                   |                    | this bit is derived from the input port    |
 |                |                   |                    | 'xpcs_sgmii_link_speed_i[1:0]'             |
 +----------------+-------------------+--------------------+--------------------------------------------+
 | 9              | Reserved          | 0                  | 0                                          |
 +----------------+-------------------+--------------------+--------------------------------------------+
 | 8:7            | PAUSE[1:0]        | 0                  | 0                                          |
 +----------------+-------------------+--------------------+--------------------------------------------+
 | 6              | HALF_DUPLEX       | 0                  | 0                                          |
 +----------------+-------------------+--------------------+--------------------------------------------+
 | 5              | FULL_DUPLEX       | 0                  | 0                                          |
 +----------------+-------------------+--------------------+--------------------------------------------+
 | 4:1            | Reserved          | 0                  | 0                                          |
 +----------------+-------------------+--------------------+--------------------------------------------+
 | 0              | Reserved          | 1                  | 1                                          |
 +----------------+-------------------+--------------------+--------------------------------------------+

I haven't figured out either what might be going on with the KSZ9477
integration, I just made a quick refresher and I thought this might be
useful for you as well, Russell. I do notice Tristram does force
TX_CONFIG=1 (DW_VR_MII_TX_CONFIG_PHY_SIDE_SGMII), but I don't understand
what's truly behind that. Hopefully just a misunderstanding.

Tristram, why do you set this field to 1? Linux only supports the
configuration where a MAC behaves like a MAC. There needs to be an
entire discussion if you want to configure a MAC to be a SGMII autoneg
master (like a PHY), how to model that.

Could you confirm that the requirement to program the SGMII
Auto-Negotiation Advertisement Register only exists if the Transmit
Configuration Master field is programmed to 1, like you are doing?
Re: [PATCH RFC net-next 1/2] net: pcs: xpcs: Add special code to operate in Microchip KSZ9477 switch
Posted by Russell King (Oracle) 1 year ago
On Tue, Jan 28, 2025 at 05:23:24PM +0200, Vladimir Oltean wrote:
> On Tue, Jan 28, 2025 at 12:33:11PM +0000, Russell King (Oracle) wrote:
> > I wonder what the original Synopsys documentation says about the AN
> > control register.
> 
> My private XPCS databook 2017-12.pdf, the applicability of which I cannot
> vouch for, has a section with programming guidelines for SGMII
> Auto-Negotiation. I quote:

Thanks for this, most useful.

> | Clause 37 auto-negotiation can be performed in the SGMII mode by
> | programming various registers as follows:
> | 
> | 1. In configurations with both 10G and 1G mode speed mode, switch
> |    DWC_xpcs to 1G speed mode by following the steps described in
> |    "Switching to 1G or KX Mode and 10 or 100 Mbps SGMII Speed".
> | 
> | 2. In Backplane Ethernet PCS configurations, program bit[12] (AN_EN) of
> |    SR_AN_CTRL Register to 0 and bit[12] (CL37_BP) of VR_XS_PCS_DIG_CTRL1
> |    Register to 1.
> | 
> | 3. Disable Clause 37 auto-negotiation by programming bit [12]
> |    (AN_ENABLE) of SR_MII_CTRL Register to 0 (in case it is already
> |    enabled).
> | 
> | 4. Program various fields of VR_MII_AN_CTRL Register appropriately as
> |    follows:
> |    - Program PCS_MODE to 2’b10
> |    - Program TX_CONFIG to 1 (PHY side SGMII) or 0 (MAC side SGMII) based
> |      on your requirement
> |    - Program MII_AN_INTR_EN to 1, to enable auto-negotiation complete
> |      interrupt
> |    - If TX_CONFIG is set to 1 and bit[0] of VR_MII_DIG_CTRL1 Register is
> |      set to 0, program SGMII_LINK_STS to indicate the link status to the
> |      MAC side SGMII.
> |    - Program MII_CTRL to 0 or 1, as per your requirement.
> | 
> | 5. If DWC_xpcs is configured as PHY-side SGMII in the above step, you
> |    can program bit [0] of VR_MII_DIG_CTRL1 Register to 1, if you wish to
> |    use the values of the xpcs_sgmii_link_sts_i input ports.
> |    xpcs_sgmii_full_duplex_i and xpcs_sgmii_link_speed_i as the
> |    transmitted configuration word.
> | 
> | 6. If DWC_xpcs is configured as PHY-side SGMII and if bit[0] of
> |    VR_MII_DIG_CTRL1 Register is set to 0,
> |    - Program SS13 and SS6 bits of SR_MII_CTRL Register to the required
> |      SGMII Speed
> |    - Program bit [5] (FD) of SR_MII_AN_ADV to the desired mode. This
> |      step is mandatory even if you wish to leave the FD register bit to
> |      its default value.

I suspect this is where the requirement comes from in the KSZ9477
documentation - and it's been "translated" badly. I note that in the
KSZ9477 documentation, bit[0] of VR_MII_DIG_CTRL1 is marked as
"reserved" and takes the value zero, so in the case of PHY-side SGMII,
(6) always applies to KSZ9477 and (5) never does.

This also solves my concern about 2a22b7ae2fa3 ("net: pcs: xpcs: adapt
Wangxun NICs for SGMII mode"), because there (5) would apply.

> | 
> | 7. If DWC_xpcs is configured as MAC-side SGMII in step 4, program bit[9]
> |    of VR_MII_DIG_CTRL1 Register to 1, for DWC_xpcs to automatically
> |    switch to the negotiated link-speed, after the completion of
> |    auto-negotiation.
> | 
> | 8. Enable CL37 Auto-negotiation, by programming bit[12] of the
> |    SR_MII_CTRL Register to 1.
> 
> In my reading of these steps, writing to DW_VR_MII_AN_CTRL does not
> depend on a subsequent write to SR_MII_AN_ADV to take effect.
> But there is this additional note in the databook:
> 
> | If TX_CONFIG=1 and Bit[0] (SGMII_PHY_MODE_CTRL) of VR_MII_DIG_CTRL1 = 0,
> | program the SR_MII_AN_ADV only after programming 'SGMII_LINK_STS' bit
> | (of VR_MII_AN_CTRL) and SS13 and SS6 bits (of SR_MII_CTRL)
> 
> So my understanding is that SR_MII_AN_ADV needs to be written only if
> TX_CONFIG=1 (SJA1105 calls this AUTONEG_CONTROL[PHY_MODE]).

Yes, agreed. Thankfully, SJA1105 sets PHY_MODE/TX_CONFIG=0 and leaves
SGMII_PHY_MODE_CTRL unaltered. TXGBE sets TX_CONFIG=1 but sets
SGMII_PHY_MODE_CTRL=1 which also avoids this requirement.

> That's quite
> different, and that will make sense when you consider that there's also
> a table with the places the autoneg code word gets its info from:
> 
> Config_Reg Bits in the 1000BASE-X and SGMII Mode
> 
>  +----------------+-------------------+--------------------+--------------------------------------------+
>  | Config_Reg bit | 1000Base-X mode   | SGMII mode value   | SGMII mode value                           |
>  |                |                   | when TX_CONFIG = 0 | when TX_CONFIG = 1                         |
>  +----------------+-------------------+--------------------+--------------------------------------------+
>  | 15             | Next page support | 0                  | Link up or down.                           |
>  |                |                   |                    | If DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL == 0, |
>  |                |                   |                    | this bit is derived from Bit 4             |
>  |                |                   |                    | (SGMII_LINK_STS) of the VR_MII_AN_CTRL.    |
>  |                |                   |                    | If DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL == 1, |
>  |                |                   |                    | this bit is derived from the input port    |
>  |                |                   |                    | 'xpcs_sgmii_link_sts_i'                    |
>  +----------------+-------------------+--------------------+--------------------------------------------+
>  | 14             | ACK               | 1                  | 1                                          |
>  +----------------+-------------------+--------------------+--------------------------------------------+
>  | 13             | RF[1]             | 0                  | 0                                          |
>  +----------------+-------------------+--------------------+--------------------------------------------+
>  | 12             | RF[0]             | 0                  | FULL_DUPLEX                                |
>  |                |                   |                    | If DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL == 0, |
>  |                |                   |                    | this bit is derived from Bit 5 (FD) of     |
>  |                |                   |                    | the SR_MII_AN_ADV.                         |
>  |                |                   |                    | If DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL == 1, |
>  |                |                   |                    | this bit is derived from the input port    |
>  |                |                   |                    | 'xpcs_sgmii_full_duplex_i'                 |
>  +----------------+-------------------+--------------------+--------------------------------------------+
>  | 11:10          | Reserved          | 0                  | SPEED                                      |
>  |                |                   |                    | If DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL == 0, |
>  |                |                   |                    | these bits are derived from Bit 13 (SS13)  |
>  |                |                   |                    | and Bit 6 (SS6) of the SR_MII_CTRL.        |
>  |                |                   |                    | If DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL == 1, |
>  |                |                   |                    | this bit is derived from the input port    |
>  |                |                   |                    | 'xpcs_sgmii_link_speed_i[1:0]'             |
>  +----------------+-------------------+--------------------+--------------------------------------------+
>  | 9              | Reserved          | 0                  | 0                                          |
>  +----------------+-------------------+--------------------+--------------------------------------------+
>  | 8:7            | PAUSE[1:0]        | 0                  | 0                                          |
>  +----------------+-------------------+--------------------+--------------------------------------------+
>  | 6              | HALF_DUPLEX       | 0                  | 0                                          |
>  +----------------+-------------------+--------------------+--------------------------------------------+
>  | 5              | FULL_DUPLEX       | 0                  | 0                                          |
>  +----------------+-------------------+--------------------+--------------------------------------------+
>  | 4:1            | Reserved          | 0                  | 0                                          |
>  +----------------+-------------------+--------------------+--------------------------------------------+
>  | 0              | Reserved          | 1                  | 1                                          |
>  +----------------+-------------------+--------------------+--------------------------------------------+
> 
> I haven't figured out either what might be going on with the KSZ9477
> integration, I just made a quick refresher and I thought this might be
> useful for you as well, Russell. I do notice Tristram does force
> TX_CONFIG=1 (DW_VR_MII_TX_CONFIG_PHY_SIDE_SGMII), but I don't understand
> what's truly behind that. Hopefully just a misunderstanding.

If you want to peek at the KSZ9477 documentation, what I'm looking at is
available from here:
https://www.microchip.com/en-us/product/ksz9477#Documentation

Interestingly, there are a number of errata:

Module 7 - SGMII auto-negotiation does not set bit 0 in the
auto-negotiation code word
Basically requires MII_ADVERTISE to be written as 0x1a0, and is only
needed after power-up or reset.

Module 8 - SGMII port link details from the connected SGMII PHY are not
passed properly to the port 7 GMAC
Basically, AUTONEG_INTR_STATUS needs to be read, and the PCS
manually programmed for the speed.

Module 15 - SGMII registers are not initialized by hardware reset
Requires that bit 15 of BASIC_CONTROL is set to reset the registers.

All three are not scheduled to be fixed, apparently.

There seems to be no information listed there regarding the requirement
for SGMII PHY mode.

> Tristram, why do you set this field to 1? Linux only supports the
> configuration where a MAC behaves like a MAC. There needs to be an
> entire discussion if you want to configure a MAC to be a SGMII autoneg
> master (like a PHY), how to model that.

(Using SJA1105 register terminology...)

Looking at the patch, Tristram is setting PHY_MODE=1 and SGMII_LINK=1
not when configuring for SGMII, but when configuring for 1000base-X.

This is reflected in the documentation for KSZ9477 - which states that
both these bits need to be set in "SerDes" aka 1000base-X mode. The
question is... where did that statement come from, and should we be
doing that for 1000base-X mode anyway?

-- 
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 80Mbps down 10Mbps up. Decent connectivity at last!
Re: [PATCH RFC net-next 1/2] net: pcs: xpcs: Add special code to operate in Microchip KSZ9477 switch
Posted by Vladimir Oltean 1 year ago
On Tue, Jan 28, 2025 at 04:32:07PM +0000, Russell King (Oracle) wrote:
> I note that in the KSZ9477 documentation, bit[0] of VR_MII_DIG_CTRL1
> is marked as "reserved" and takes the value zero, so in the case of
> PHY-side SGMII, (6) always applies to KSZ9477 and (5) never does.

Good point. In human language, this means that when configuring the
KSZ9477 XPCS for the SGMII PHY role, it always expects the contents of
the autoneg code word to come from registers (VR_MII_AN_CTRL,
SR_MII_AN_ADV and SR_MII_CTRL), and never from input wires coming
from blocks external to the XPCS IP (xpcs_sgmii_link_sts_i,
xpcs_sgmii_full_duplex_i, xpcs_sgmii_link_speed_i[1:0]).

With the very important note that said SGMII PHY mode is still
under-specified in Linux, and the discussion about it still needs
to be had.

> This also solves my concern about 2a22b7ae2fa3 ("net: pcs: xpcs: adapt
> Wangxun NICs for SGMII mode"), because there (5) would apply.

Ok, so Wangxun shimmied in an operating mode where the XPCS does act
like a PHY, but receives the info about how to populate the autoneg
code word from wires, not from registers, and that's why Tristram is
noticing that the driver does not write the registers.

Not great, but at least the info we're gathering seems consistent thus far.

> > So my understanding is that SR_MII_AN_ADV needs to be written only if
> > TX_CONFIG=1 (SJA1105 calls this AUTONEG_CONTROL[PHY_MODE]).
> 
> Yes, agreed. Thankfully, SJA1105 sets PHY_MODE/TX_CONFIG=0 and leaves
> SGMII_PHY_MODE_CTRL unaltered. TXGBE sets TX_CONFIG=1 but sets
> SGMII_PHY_MODE_CTRL=1 which also avoids this requirement.

Correct, for SJA1105 I realized the mode where PHY_MODE/TX_CONFIG=1
isn't supported in phylink, and I didn't consider it important enough to
raise the issue, so it's simply not configurable that way. I was thinking,
just like we have phy-mode = "rev-mii" and "rev-rmii" for MII and RMII
in the role of a PHY, that something like "rev-sgmii" would be the way
to explore. But I really have no interest or use case to explore this myself.

> > That's quite
> > different, and that will make sense when you consider that there's also
> > a table with the places the autoneg code word gets its info from:
> > 
> > Config_Reg Bits in the 1000BASE-X and SGMII Mode
> > 
> >  +----------------+-------------------+--------------------+--------------------------------------------+
> >  | Config_Reg bit | 1000Base-X mode   | SGMII mode value   | SGMII mode value                           |
> >  |                |                   | when TX_CONFIG = 0 | when TX_CONFIG = 1                         |
> >  +----------------+-------------------+--------------------+--------------------------------------------+
> >  | 15             | Next page support | 0                  | Link up or down.                           |
> >  |                |                   |                    | If DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL == 0, |
> >  |                |                   |                    | this bit is derived from Bit 4             |
> >  |                |                   |                    | (SGMII_LINK_STS) of the VR_MII_AN_CTRL.    |
> >  |                |                   |                    | If DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL == 1, |
> >  |                |                   |                    | this bit is derived from the input port    |
> >  |                |                   |                    | 'xpcs_sgmii_link_sts_i'                    |
> >  +----------------+-------------------+--------------------+--------------------------------------------+
> >  | 14             | ACK               | 1                  | 1                                          |
> >  +----------------+-------------------+--------------------+--------------------------------------------+
> >  | 13             | RF[1]             | 0                  | 0                                          |
> >  +----------------+-------------------+--------------------+--------------------------------------------+
> >  | 12             | RF[0]             | 0                  | FULL_DUPLEX                                |
> >  |                |                   |                    | If DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL == 0, |
> >  |                |                   |                    | this bit is derived from Bit 5 (FD) of     |
> >  |                |                   |                    | the SR_MII_AN_ADV.                         |
> >  |                |                   |                    | If DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL == 1, |
> >  |                |                   |                    | this bit is derived from the input port    |
> >  |                |                   |                    | 'xpcs_sgmii_full_duplex_i'                 |
> >  +----------------+-------------------+--------------------+--------------------------------------------+
> >  | 11:10          | Reserved          | 0                  | SPEED                                      |
> >  |                |                   |                    | If DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL == 0, |
> >  |                |                   |                    | these bits are derived from Bit 13 (SS13)  |
> >  |                |                   |                    | and Bit 6 (SS6) of the SR_MII_CTRL.        |
> >  |                |                   |                    | If DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL == 1, |
> >  |                |                   |                    | this bit is derived from the input port    |
> >  |                |                   |                    | 'xpcs_sgmii_link_speed_i[1:0]'             |
> >  +----------------+-------------------+--------------------+--------------------------------------------+
> >  | 9              | Reserved          | 0                  | 0                                          |
> >  +----------------+-------------------+--------------------+--------------------------------------------+
> >  | 8:7            | PAUSE[1:0]        | 0                  | 0                                          |
> >  +----------------+-------------------+--------------------+--------------------------------------------+
> >  | 6              | HALF_DUPLEX       | 0                  | 0                                          |
> >  +----------------+-------------------+--------------------+--------------------------------------------+
> >  | 5              | FULL_DUPLEX       | 0                  | 0                                          |
> >  +----------------+-------------------+--------------------+--------------------------------------------+
> >  | 4:1            | Reserved          | 0                  | 0                                          |
> >  +----------------+-------------------+--------------------+--------------------------------------------+
> >  | 0              | Reserved          | 1                  | 1                                          |
> >  +----------------+-------------------+--------------------+--------------------------------------------+
> > 
> > I haven't figured out either what might be going on with the KSZ9477
> > integration, I just made a quick refresher and I thought this might be
> > useful for you as well, Russell. I do notice Tristram does force
> > TX_CONFIG=1 (DW_VR_MII_TX_CONFIG_PHY_SIDE_SGMII), but I don't understand
> > what's truly behind that. Hopefully just a misunderstanding.
> 
> If you want to peek at the KSZ9477 documentation, what I'm looking at is
> available from here:
> https://www.microchip.com/en-us/product/ksz9477#Documentation

Thanks.

> Interestingly, there are a number of errata:
> 
> Module 7 - SGMII auto-negotiation does not set bit 0 in the
> auto-negotiation code word
> Basically requires MII_ADVERTISE to be written as 0x1a0, and is only
> needed after power-up or reset.

Ok, I guess 0x1a0 would otherwise be the SR_MII_AN_ADV default value -
Nothing looks special about it. The layout of this register is the
table above. Bits 8:7 (PAUSE) and 5 (FULL_DUPLEX) are set, the rest
(NEXT_PAGE, REMOTE_FAULT, HALF_DUPLEX) are unset.

It's odd that writing this register would fix anything, especially
seeing that it isn't documented anywhere that bit 0 of the autoneg code
word would ever originate from SR_MII_AN_ADV in the 2 SGMII mode columns,
or would be affected by a modification of that register. But ok, I can't
contradict that...

It is under-specified whether the erratum occurs when TX_CONFIG is 1 or 0.
Unless specified otherwise, I am going to assume both modes.

I believe this erratum workaround should be implemented by Tristram;
I'm not seeing it in this patch.

> Module 8 - SGMII port link details from the connected SGMII PHY are not
> passed properly to the port 7 GMAC
> Basically, AUTONEG_INTR_STATUS needs to be read, and the PCS
> manually programmed for the speed.

Ok, I think this is answering my question to Tristram:
xpcs_link_up_sgmii_1000basex() needs to force the speed in MII_BMCR even
for PHYLINK_PCS_NEG_INBAND_ENABLED, where that otherwise shouldn't be
needed. It means that VR_MII_DIG_CTRL1[MAC_AUTO_SW] doesn't work? Bit 9
of "SGMII DIGITAL CONTROL REGISTER" is also hidden, and marked as reserved (0).

A reference to the KSZ9477 errata sheet module 8 in the code would be
nice. To me that is sufficient.

> Module 15 - SGMII registers are not initialized by hardware reset
> Requires that bit 15 of BASIC_CONTROL is set to reset the registers.

Nothing actionable here, the xpcs driver already performs reset.

> All three are not scheduled to be fixed, apparently.
> 
> There seems to be no information listed there regarding the requirement
> for SGMII PHY mode.

Not that I can find either.

> > Tristram, why do you set this field to 1? Linux only supports the
> > configuration where a MAC behaves like a MAC. There needs to be an
> > entire discussion if you want to configure a MAC to be a SGMII autoneg
> > master (like a PHY), how to model that.
> 
> (Using SJA1105 register terminology...)
> 
> Looking at the patch, Tristram is setting PHY_MODE=1 and SGMII_LINK=1
> not when configuring for SGMII, but when configuring for 1000base-X.

Yeah, you're right... (?!) Again, misunderstanding, I hope?

> This is reflected in the documentation for KSZ9477 - which states that
> both these bits need to be set in "SerDes" aka 1000base-X mode. The
> question is... where did that statement come from, and should we be
> doing that for 1000base-X mode anyway?

Here, because SJA1105 only supports SGMII and _not_ 1000Base-X, I don't
have practical experience. Thus I can only refer you to:

Programming Guidelines for Clause 37 Auto-Negotiation

The Clause 37 auto-negotiation is enabled only when Bit 12 of the
SR_MII_CTRL Register is set. For Backplane Ethernet configurations,
the default value of this bit is 0. For all other configurations, the
default value of this bit is 1. When this bit is enabled, the Clause 37
auto-negotiation is initiated on the following events:
- Power on Reset
- Soft Reset

The DWC_xpcs initiates the auto-negotiation on the following events:
- When the link partner requests auto-negotiation by transmitting
  configuration code groups.
- When the receive data path loses code group synchronization for more
  than 10 ms (in 1000BASE-X mode) or 1.6 ms (in SGMII mode).
- When an error condition is detected while receiving the /C/ or /I/
  order sets.
- When the host or software requests auto-negotiation by setting Bit 9
  in the SR_MII_CTRL Register.

The following list explains the auto-negotiation process:

1. The DWC_xpcs starts auto-negotiation by first transmitting the
   configuration words with all zeroes for 10 ms (1.6 ms for the SGMII
   interface).

2. The SR_MII_AN_ADV Register content is transmitted in the
   configuration words in the 1000BASE-X mode. In the SGMII mode, the
   values given in the "Config_Reg Bits in the 1000BASE-X and SGMII Mode"
   table are transmitted in the configuration word. The auto-negotiation
   is complete when both link partners have exchanged their base pages.

3. DWC_xpcs generates an interrupt on completion (sbd_intr_o) of
   auto-negotiation when Bit 0 of VR_MII_AN_CTRL Register is set to 1.

4. The auto-negotiation completion is indicated in the VR_MII_AN_INTR_STS
   register.

Note: In configurations with MDIO interface, you must read the
      VR_MII_AN_INTR_STS register after the auto-negotiation is complete.
      Auto-negotiation Status reads incorrect value when Status is not
      read in the previous Auto-negotiation session and MDC clock is not
      active when the Management is not accessing the PHY/PCS registers.
      Because of this limitation, Management may get incorrect
      information of Link partner and this can cause link failure.
      However, this limitation is not visible if the programming
      guidelines are followed as mentioned in the databook. This is
      because the minimum time between two successive auto-negotiations
      has at least 3.2ms and Management or Host is expected to read the
      current status of the Auto-negotiation before the next auto-negotiation
      is completed.

5. In the MAC attached to the DWC_xpcs, the Transmit and Receive Flow
   Control mode is determined based on the capabilities of the partner
   (given in SR_MII_LP_BABL) and the half-duplex or full-duplex
   operating mode.

   In SGMII auto-negotiation, the received link status such as speed,
   duplex mode is also given in the VR_MII_AN_INTR_STS Register.

   When Clause 37 auto-negotiation is complete, DWC_xpcs automatically
   resolves the duplex mode by selecting the duplex mode of its link
   partner. If auto-negotiation is disabled, the DWC_xpcs selects the
   duplex mode defined in Bit 8 of SR_MII_CTRL Register.

Nothing, in my reading, suggests to me that DW_VR_MII_TX_CONFIG_PHY_SIDE_SGMII
would be considered by the XPCS when operating in 1000Base-X mode
(DW_VR_MII_PCS_MODE_MASK == DW_VR_MII_PCS_MODE_C37_1000BASEX).