arch/arm64/boot/dts/rockchip/Makefile | 5 + .../rockchip/rk3588-jaguar-ethernet-switch.dtso | 192 +++++++++++++++++++++ 2 files changed, 197 insertions(+)
From: Quentin Schulz <quentin.schulz@cherry.de>
This adds support for the Ethernet Switch adapter connected through the
mezzanine connector on RK3588 Jaguar.
This adapter has a KSZ9896 Ethernet Switch with 4 1GbE Ethernet
connectors, two user controllable LEDs, and an M12 12-pin connector
which exposes the following signals:
- RS232/RS485 (max 250Kbps/500Kbps, RX pin1, TX pin2)
- two digital inputs (pin4 routed to GPIO3_C5 on SoC, pin5 to GPIO4_B4)
- two digital outputs (pin7 routed to GPIO3_D3 on SoC, pin8 to
GPIO3_D1)
- two analog inputs (pin10 to channel1 of ADS1015, pin11 to channel2)
Signed-off-by: Quentin Schulz <quentin.schulz@cherry.de>
---
Note that for this to work, you need this commit:
https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git/commit/?id=ba54bce747fa9e07896c1abd9b48545f7b4b31d2
otherwise most packets are ignored by the DSA switch driver.
Note that looking into downstream kernel it seems like they handle the
absence of rx_delay and tx_delay DT properties differently than us in
the GMAC driver.
rx_delay and tx_delay default to -1 instead of 0x10/0x30 for us. For
rgmii-{,tx,rx}id phy-mode, -1 is passed instead of 0.
If the value is negative for the delay, then the delay circuitry is
disabled entirely. This differs from us simply putting 0 in there (but
still have the circuitry enabled). If that results in the same thing, I
do not know yet.
@Jakob, is this something you could check? devmem2 0xfd58c31c w 0x3c0000
should do the trick to disable the circuitry according to the TRM?
---
Changes in v2:
- removed patch 1 adding ethernet1 alias to jaguar base DTS,
- added ethernet1 alias in the DTSO,
- change rgmii phy-mode to rgmii-id and have the delay in the PHY
instead, as suggested by Andrew,
- Link to v1: https://lore.kernel.org/r/20250521-jaguar-mezz-eth-switch-v1-0-9b5c48ebb867@cherry.de
---
arch/arm64/boot/dts/rockchip/Makefile | 5 +
.../rockchip/rk3588-jaguar-ethernet-switch.dtso | 192 +++++++++++++++++++++
2 files changed, 197 insertions(+)
diff --git a/arch/arm64/boot/dts/rockchip/Makefile b/arch/arm64/boot/dts/rockchip/Makefile
index 3e8771ef69ba1c1428117cc2ae29b84e13523e21..6d5ad354b77de1c3f995b119f97541f9c2cc9dbd 100644
--- a/arch/arm64/boot/dts/rockchip/Makefile
+++ b/arch/arm64/boot/dts/rockchip/Makefile
@@ -151,6 +151,7 @@ dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3588-firefly-itx-3588j.dtb
dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3588-friendlyelec-cm3588-nas.dtb
dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3588-h96-max-v58.dtb
dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3588-jaguar.dtb
+dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3588-jaguar-ethernet-switch.dtbo
dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3588-jaguar-pre-ict-tester.dtbo
dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3588-mnt-reform2.dtb
dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3588-nanopc-t6.dtb
@@ -222,6 +223,10 @@ dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3588-edgeble-neu6b-wifi.dtb
rk3588-edgeble-neu6b-wifi-dtbs := rk3588-edgeble-neu6b-io.dtb \
rk3588-edgeble-neu6a-wifi.dtbo
+dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3588-jaguar-ethernet-switch.dtb
+rk3588-jaguar-ethernet-switch-dtbs := rk3588-jaguar.dtb \
+ rk3588-jaguar-ethernet-switch.dtbo
+
dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3588-jaguar-pre-ict-tester.dtb
rk3588-jaguar-pre-ict-tester-dtbs := rk3588-jaguar.dtb \
rk3588-jaguar-pre-ict-tester.dtbo
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-jaguar-ethernet-switch.dtso b/arch/arm64/boot/dts/rockchip/rk3588-jaguar-ethernet-switch.dtso
new file mode 100644
index 0000000000000000000000000000000000000000..4d5c3249121c0483d19dcc3d51dfd57d7ce2c8ee
--- /dev/null
+++ b/arch/arm64/boot/dts/rockchip/rk3588-jaguar-ethernet-switch.dtso
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (c) 2025 Cherry Embedded Solutions GmbH
+ *
+ * Device Tree Overlay for the Ethernet Switch adapter for the Mezzanine
+ * connector on RK3588 Jaguar.
+ *
+ * This adapter has a KSZ9896 Ethernet Switch with 4 1GbE Ethernet connectors,
+ * two user controllable LEDs, and an M12 12-pin connector which exposes the
+ * following signals:
+ * - RS232/RS485 (max 250Kbps/500Kbps, RX pin1, TX pin2)
+ * - two digital inputs (pin4 routed to GPIO3_C5 on SoC, pin5 to GPIO4_B4)
+ * - two digital outputs (pin7 routed to GPIO3_D3 on SoC, pin8 to GPIO3_D1)
+ * - two analog inputs (pin10 to channel1 of ADS1015, pin11 to channel2)
+ *
+ * RK3588 Jaguar can be powered entirely through the adapter via the M8 3-pin
+ * connector (12-24V).
+ */
+
+/dts-v1/;
+/plugin/;
+
+#include <dt-bindings/clock/rockchip,rk3588-cru.h>
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/interrupt-controller/irq.h>
+#include <dt-bindings/leds/common.h>
+#include <dt-bindings/pinctrl/rockchip.h>
+
+&{/} {
+ aliases {
+ ethernet1 = "/ethernet@fe1c0000";
+ };
+
+ mezzanine-leds {
+ compatible = "gpio-leds";
+ pinctrl-names = "default";
+ pinctrl-0 = <&led_usr1_pin &led_usr2_pin>;
+
+ led-1 {
+ gpios = <&gpio1 RK_PC1 GPIO_ACTIVE_HIGH>;
+ label = "USR1";
+ };
+
+ led-2 {
+ gpios = <&gpio3 RK_PC4 GPIO_ACTIVE_HIGH>;
+ label = "USR2";
+ };
+ };
+};
+
+&gmac1 {
+ clock_in_out = "output";
+ phy-mode = "rgmii-id";
+ pinctrl-names = "default";
+ pinctrl-0 = <&gmac1_rx_bus2
+ &gmac1_tx_bus2
+ &gmac1_rgmii_clk
+ &gmac1_rgmii_bus
+ ð1_pins>;
+ rx_delay = <0x0>;
+ tx_delay = <0x0>;
+ status = "okay";
+
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ };
+};
+
+&i2c1 {
+ #address-cells = <1>;
+ /* ADS1015 can handle high-speed (HS) mode (up to 3.4MHz) on I2C bus,
+ but SOC can handle only up to 400kHz. */
+ clock-frequency = <400000>;
+ #size-cells = <0>;
+ status = "okay";
+
+ adc@48 {
+ compatible = "ti,ads1015";
+ reg = <0x48>;
+ #address-cells = <1>;
+ interrupt-parent = <&gpio3>;
+ interrupts = <RK_PC7 IRQ_TYPE_EDGE_FALLING>;
+ pinctrl-0 = <&adc_alert>;
+ pinctrl-names = "default";
+ #io-channel-cells = <1>;
+ #size-cells = <0>;
+
+ channel@1 {
+ reg = <5>; /* Single-ended between AIN1 and GND */
+ ti,datarate = <0>;
+ ti,gain = <5>;
+ };
+
+ channel@2 {
+ reg = <6>; /* Single-ended between AIN2 and GND */
+ ti,datarate = <0>;
+ ti,gain = <5>;
+ };
+ };
+
+ switch@5f {
+ compatible = "microchip,ksz9896";
+ reg = <0x5f>;
+ interrupt-parent = <&gpio3>;
+ interrupts = <RK_PB7 IRQ_TYPE_EDGE_FALLING>; /* ETH_INTRP_N */
+ pinctrl-0 = <ð_reset_n ð_intrp_n>;
+ pinctrl-names = "default";
+ reset-gpios = <&gpio3 RK_PB6 GPIO_ACTIVE_LOW>; /* ETH_RESET */
+ microchip,synclko-disable; /* CLKO_25_125 only routed to TP1 */
+
+ ethernet-ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ lan1: port@0 {
+ reg = <0>;
+ label = "ETH1";
+ };
+
+ lan2: port@1 {
+ reg = <1>;
+ label = "ETH2";
+ };
+
+ lan3: port@2 {
+ reg = <2>;
+ label = "ETH3";
+ };
+
+ lan4: port@3 {
+ reg = <3>;
+ label = "ETH4";
+ };
+
+ port@5 {
+ reg = <5>;
+ ethernet = <&gmac1>;
+ label = "CPU";
+ phy-mode = "rgmii-id";
+ rx-internal-delay-ps = <2000>;
+ tx-internal-delay-ps = <2000>;
+
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ };
+ };
+ };
+ };
+};
+
+&pinctrl {
+ adc {
+ adc_alert: adc-alert-irq {
+ rockchip,pins =
+ <3 RK_PC7 RK_FUNC_GPIO &pcfg_pull_up>;
+ };
+ };
+
+ ethernet {
+ eth_intrp_n: eth-intrp-n {
+ rockchip,pins =
+ <3 RK_PB7 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+
+ eth_reset_n: eth-reset-n {
+ rockchip,pins =
+ <3 RK_PB6 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+ };
+
+ leds {
+ led_usr1_pin: led-usr1-pin {
+ rockchip,pins =
+ <1 RK_PC1 RK_FUNC_GPIO &pcfg_pull_down>;
+ };
+
+ led_usr2_pin: led-usr2-pin {
+ rockchip,pins =
+ <3 RK_PC4 RK_FUNC_GPIO &pcfg_pull_down>;
+ };
+ };
+};
+
+&uart9 {
+ /* GPIO3_D0/EN_RS485_MODE for switching between RS232 and RS485 */
+ pinctrl-0 = <&uart9m2_xfer &uart9m2_rtsn>;
+ pinctrl-names = "default";
+ linux,rs485-enabled-at-boot-time;
+ status = "okay";
+};
---
base-commit: 4a95bc121ccdaee04c4d72f84dbfa6b880a514b6
change-id: 20250521-jaguar-mezz-eth-switch-75445fd1c725
Best regards,
--
Quentin Schulz <quentin.schulz@cherry.de>
> @Jakob, is this something you could check? devmem2 0xfd58c31c w 0x3c0000 > should do the trick to disable the circuitry according to the TRM? I measured TXCLK vs TXD3 on an oscilloscope on gmac1: Setting Decimal Actual TXCLK delay (ps) 00 0 47 0a 10 283 10 16 440 20 32 893 30 48 1385 40 64 1913 50 80 2514 60 96 3077 70 112 3565 7f 127 4009 off x -315 Setting = tx_delay (hex) Decimal = tx_delay (dec) Actual TXCLK delay (ps) = Measurement from oscilloscope Plotting this we can deduce that one tx_delay unit is about 31ps. We can also see that turning off the delay unit does not give us zero delay, but setting tx_delay to zero does (almost). I did this on the Rockchip 6.1 kernel because it allows to set the delays on runtime via sysfs ( https://gitlab.com/firefly-linux/docs/-/blob/rk356x/firefly/Common/GMAC/Rockchip_Developer_Guide_Linux_GMAC_RGMII_Delayline_EN.pdf ) but I don't expect it to make a difference. Regards, Jakob
On Tue, May 27, 2025 at 03:11:42PM +0200, Jakob Unterwurzacher wrote:
> > @Jakob, is this something you could check? devmem2 0xfd58c31c w 0x3c0000
> > should do the trick to disable the circuitry according to the TRM?
>
> I measured TXCLK vs TXD3 on an oscilloscope on gmac1:
>
> Setting Decimal Actual TXCLK delay (ps)
> 00 0 47
> 0a 10 283
> 10 16 440
> 20 32 893
> 30 48 1385
> 40 64 1913
> 50 80 2514
> 60 96 3077
> 70 112 3565
> 7f 127 4009
>
> off x -315
>
> Setting = tx_delay (hex)
> Decimal = tx_delay (dec)
> Actual TXCLK delay (ps) = Measurement from oscilloscope
>
> Plotting this we can deduce that one tx_delay unit is about 31ps.
Nice to see somebody actually do the measurements. Based on this, it
would be good to implement:
tx-internal-delay-ps:
description:
RGMII Transmit Clock Delay defined in pico seconds. This is used for
controllers that have configurable TX internal delays. If this
property is present then the MAC applies the TX delay.
For the moment, please limit it to just the device you measured it on.
Andrew
Hi Andrew, On 5/27/25 6:18 PM, Andrew Lunn wrote: > On Tue, May 27, 2025 at 03:11:42PM +0200, Jakob Unterwurzacher wrote: >>> @Jakob, is this something you could check? devmem2 0xfd58c31c w 0x3c0000 >>> should do the trick to disable the circuitry according to the TRM? >> >> I measured TXCLK vs TXD3 on an oscilloscope on gmac1: >> >> Setting Decimal Actual TXCLK delay (ps) >> 00 0 47 >> 0a 10 283 >> 10 16 440 >> 20 32 893 >> 30 48 1385 >> 40 64 1913 >> 50 80 2514 >> 60 96 3077 >> 70 112 3565 >> 7f 127 4009 >> >> off x -315 >> >> Setting = tx_delay (hex) >> Decimal = tx_delay (dec) >> Actual TXCLK delay (ps) = Measurement from oscilloscope >> >> Plotting this we can deduce that one tx_delay unit is about 31ps. > > Nice to see somebody actually do the measurements. Based on this, it > would be good to implement: > > tx-internal-delay-ps: > description: > RGMII Transmit Clock Delay defined in pico seconds. This is used for > controllers that have configurable TX internal delays. If this > property is present then the MAC applies the TX delay. > > For the moment, please limit it to just the device you measured it on. > What exactly do you mean with "limit it to just the device you measured it on"? I'll need to implement reading the delay from the stmmac driver to use this property, do I need to restrict reading this property to the SoC we tested (RK3588)? Or should I just apply it indiscriminately (considering that no Rockchip board actually set this property in its DT?) and let future users fix up the scale for the other SoCs whenever they want to use this property? I assume you're then expecting tx-internal-delay-ps only on this new DTSO's gmac1? Would you still want rx_delay/tx_delay to be set to 0x00? Maybe only rx_delay since we won't have a companion rx-internal-delay-ps for now (until someone from Rockchip answers :); adding Kever back to the Cc for that)? Or should I remove both of them? Is this request blocking the merging of this DTSO patch or would a follow-up series be okay? Cheers, Quentin
On Wed, May 28, 2025 at 09:56:51AM +0200, Quentin Schulz wrote: > Hi Andrew, > > On 5/27/25 6:18 PM, Andrew Lunn wrote: > > On Tue, May 27, 2025 at 03:11:42PM +0200, Jakob Unterwurzacher wrote: > > > > @Jakob, is this something you could check? devmem2 0xfd58c31c w 0x3c0000 > > > > should do the trick to disable the circuitry according to the TRM? > > > > > > I measured TXCLK vs TXD3 on an oscilloscope on gmac1: > > > > > > Setting Decimal Actual TXCLK delay (ps) > > > 00 0 47 > > > 0a 10 283 > > > 10 16 440 > > > 20 32 893 > > > 30 48 1385 > > > 40 64 1913 > > > 50 80 2514 > > > 60 96 3077 > > > 70 112 3565 > > > 7f 127 4009 > > > > > > off x -315 > > > > > > Setting = tx_delay (hex) > > > Decimal = tx_delay (dec) > > > Actual TXCLK delay (ps) = Measurement from oscilloscope > > > > > > Plotting this we can deduce that one tx_delay unit is about 31ps. > > > > Nice to see somebody actually do the measurements. Based on this, it > > would be good to implement: > > > > tx-internal-delay-ps: > > description: > > RGMII Transmit Clock Delay defined in pico seconds. This is used for > > controllers that have configurable TX internal delays. If this > > property is present then the MAC applies the TX delay. > > > > For the moment, please limit it to just the device you measured it on. > > > > What exactly do you mean with "limit it to just the device you measured it > on"? Nobody seems to know if rx_delay & tx_delay operate the same across the whole range of SoCs. I don't particularly care if these properties are difference between SoC, they are vendor properties, with undocumented magic values. However 'tx-internal-delay-ps' is standardised, and has a very clear meaning. I don't want it used unless somebody has performed a measurement and we know that 2000 produces a 2ns delay. > I'll need to implement reading the delay from the stmmac driver to use this > property, do I need to restrict reading this property to the SoC we tested > (RK3588)? Yes, please only allow it to be used on RK3588, and any other SoC you can test and verify its behaviour. > I assume you're then expecting tx-internal-delay-ps only on this new DTSO's > gmac1? I would like rx_delay and tx_delay to be marked deprecated, with the end goal they are no longer used, the standard properties take there place. But to get there, we need more measurements from real hardware, or some other way to be sure what we have the correct delay. > Would you still want rx_delay/tx_delay to be set to 0x00? Maybe only > rx_delay since we won't have a companion rx-internal-delay-ps for now (until > someone from Rockchip answers :); adding Kever back to the Cc for that)? Or > should I remove both of them? rx-internal-delay-ps and rx_delay should be mutually exclusive. If both are present -EINVAL. We have to keep supporting rx_delay for rk3588 otherwise we break backwards compatibility, but ideally no more instances of it should be added once rx-internal-delay-ps is added. Andrew
Hi Andrew,
On 5/28/25 3:09 PM, Andrew Lunn wrote:
> On Wed, May 28, 2025 at 09:56:51AM +0200, Quentin Schulz wrote:
>> Hi Andrew,
>>
>> On 5/27/25 6:18 PM, Andrew Lunn wrote:
>>> On Tue, May 27, 2025 at 03:11:42PM +0200, Jakob Unterwurzacher wrote:
>>>>> @Jakob, is this something you could check? devmem2 0xfd58c31c w 0x3c0000
>>>>> should do the trick to disable the circuitry according to the TRM?
>>>>
>>>> I measured TXCLK vs TXD3 on an oscilloscope on gmac1:
>>>>
>>>> Setting Decimal Actual TXCLK delay (ps)
>>>> 00 0 47
>>>> 0a 10 283
>>>> 10 16 440
>>>> 20 32 893
>>>> 30 48 1385
>>>> 40 64 1913
>>>> 50 80 2514
>>>> 60 96 3077
>>>> 70 112 3565
>>>> 7f 127 4009
>>>>
>>>> off x -315
>>>>
>>>> Setting = tx_delay (hex)
>>>> Decimal = tx_delay (dec)
>>>> Actual TXCLK delay (ps) = Measurement from oscilloscope
>>>>
>>>> Plotting this we can deduce that one tx_delay unit is about 31ps.
>>>
>>> Nice to see somebody actually do the measurements. Based on this, it
>>> would be good to implement:
>>>
>>> tx-internal-delay-ps:
>>> description:
>>> RGMII Transmit Clock Delay defined in pico seconds. This is used for
>>> controllers that have configurable TX internal delays. If this
>>> property is present then the MAC applies the TX delay.
>>>
>>> For the moment, please limit it to just the device you measured it on.
>>>
>>
>> What exactly do you mean with "limit it to just the device you measured it
>> on"?
>
> Nobody seems to know if rx_delay & tx_delay operate the same across
> the whole range of SoCs. I don't particularly care if these properties
> are difference between SoC, they are vendor properties, with
> undocumented magic values. However 'tx-internal-delay-ps' is
> standardised, and has a very clear meaning. I don't want it used
> unless somebody has performed a measurement and we know that 2000
> produces a 2ns delay.
>
>> I'll need to implement reading the delay from the stmmac driver to use this
>> property, do I need to restrict reading this property to the SoC we tested
>> (RK3588)?
>
> Yes, please only allow it to be used on RK3588, and any other SoC you
> can test and verify its behaviour.
Coming back to this topic, I'm unfortunately the bearer of some bad news.
I implemented the suggested logic (see at the end of this mail) and then
went to validate it with Jakob's help. Unfortunately, it seems that the
delay value really isn't stable or reliable.
We tested the same adapter with two different main boards (the same
product, just two different units). With a value of 0x40 for tx_delay
(which should be ~2000ps if we have a 31ps per tx_delay unit as
empirically decided), we have one board with 1778ps and one with 1391ps.
Following a hunch, we started to stress (or cool) the device (with
stress-ng/a fan) and it did slightly change the result too. Changing the
CPU operating points (and by extension at least CPU clocks) didn't
impact the result though.
While this could be observed with tx_delay property too, this property
doesn't claim to provide a value in picoseconds that
tx-internal-delay-ps would (but at the same time this didn't stop it to
be implemented for the DSA switch we have which claims "more than 1.5ns"
and nothing more, so maybe that would be acceptable?).
I feel uncomfortable contributing this considering the wildly different
results across our very small test sample pool of two units and slightly
different operating temperature.
Cheers,
Quentin
Here's the patch I made:
"""
From 4ad2976b08b0ca93504942a80aba4bfe76fdbc51 Mon Sep 17 00:00:00 2001
From: Quentin Schulz <quentin.schulz@cherry.de>
Date: Wed, 4 Jun 2025 14:12:01 +0200
Subject: [PATCH] net: stmmac: dwmac-rk: add support for tx-internal-delay-ps
for RK3588
The RGMII v2.0 spec[1] specifies the clock output needs to be delayed by
at least 1.2ns (typically 2.0ns) compared to the data lanes.
As documented[2], there are multiple ways to implement this delay. The
PCB routing adds this delay by making the clock lane longer than the
data lanes. The MAC adds the delay. The PHY adds the delay. Mixed delays
are heavily discouraged. The first one is represented by rgmii phy-mode,
the second and third one as rgmii-id. Since RGMII has two clocks (RX and
TX), it is possible their delay implementation differ. In which case,
rgmii-rxid represents RX delay added by the MAC/PHY while TX delay is
added by way of PCB routing, and rgmii-txid represents the opposite.
The Rockchip DWMAC driver actually implements the RGMII internal delay
the complete opposite way.
The driver allows to specify the RX and TX delay (using an undocumented
and yet unknown unit) through the Device Tree (via rx_delay and
tx_delay). However, rx_delay is only applied when in rgmii or rgmii-txid
mode and tx_delay only when in rgmii or rgmii-rxid.
This means the driver is currently entirely incompatible with the Device
Tree binding as in order to add a delay on the MAC we need to set
phy-mode on the MAC side to rgmii instead of rgmii-id, rgmii-rxid
instead of rgmii-txid or vice-versa, along with rx_delay/tx_delay
properties.
Because we cannot break backward compatibility with existing Device
Trees, an attempt to fix this requires a new property. Fortunately, the
Ethernet controller binding already defines such property:
tx-internal-delay-ps and rx-internal-delay-ps. Those represent delays in
picoseconds.
Unfortunately, there's no documentation as far as I could tell about the
relation between an rx_delay/tx_delay to write in the DWMAC registers on
Rockchip and the actual delay in picoseconds.
However, based on empirical data[3], it seems a unit in the
gmac{0,1}_clk_tx_dl_cfg bitfield SYS_GRF_SOC_CON{8,9} equals roughly 31
picoseconds of delay on RK3588. Considering that the RGMII spec only
requires a delay of at least 1200ps, a granularity of 31ps seems
acceptable even if it later turns out the scale isn't perfectly linear.
Users will typically set tx-internal-delay-ps above 1200 (in
tx_delay "units": above 0x27).
So this adds support for setting up the delay via tx-internal-delay-ps
on RK3588 only as there's no empirical data for other SoCs just yet and
only for TX delay as we cannot measure RX delay within the SoC.
Setting both tx_delay and tx-internal-delay-ps is nonsensical as they
represent the same delay but in entirely different HW setups that are
defined in Device Tree, so this is forbidden by the driver.
Only print error messages if tx_delay is absent AND tx-internal-delay-ps
is absent as well. Still allow tx_delay to be absent for backward
compatibility.
If one really has a PCB which only adds an RX delay, one unfortunately
still needs to set rx_delay to 0 in Device Tree in addition to setting
the phy-mode to rgmii-txid as a missing rx_delay property will make the
RX delay in the MAC default to 0x10.
If tx-internal-delay-ps is missing, tx_delay_from_ps is guaranteed to be
0 thanks to kzalloc allocation of bsp_priv rk_priv_data struct so
current behavior is left unchanged for existing Device Trees and SoCs
which do not currently support tx-internal-delay-ps (all but RK3588).
If one has a board which adds the TX delay on the PHY side, setting
phy-mode to rgmii-txid/rgmii-id in the Device Tree is technically enough
as tx-internal-delay-ps = <0> or a missing tx-internal-delay-ps will
result in the same behavior.
[1] https://etherealwake.com/2025/03/ethernet-rgmii/rgmii_2_0.pdf
[2]
https://elixir.bootlin.com/linux/v6.15/source/Documentation/devicetree/bindings/net/ethernet-controller.yaml#L287
[3]
https://lore.kernel.org/linux-rockchip/20250527131142.1100673-1-jakob.unterwurzacher@cherry.de/
Suggested-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Quentin Schulz <quentin.schulz@cherry.de>
---
.../net/ethernet/stmicro/stmmac/dwmac-rk.c | 29 +++++++++++++++----
1 file changed, 24 insertions(+), 5 deletions(-)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
index 700858ff6f7c3..63730a334f0a9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
@@ -77,6 +77,7 @@ struct rk_priv_data {
struct reset_control *phy_reset;
int tx_delay;
+ int tx_delay_from_ps;
int rx_delay;
struct regmap *grf;
@@ -1670,6 +1671,7 @@ static struct rk_priv_data *rk_gmac_setup(struct
platform_device *pdev,
{
struct rk_priv_data *bsp_priv;
struct device *dev = &pdev->dev;
+ bool of_internal_delay_ps;
struct resource *res;
int ret;
const char *strings = NULL;
@@ -1718,13 +1720,30 @@ static struct rk_priv_data *rk_gmac_setup(struct
platform_device *pdev,
bsp_priv->clock_input = false;
}
+ of_internal_delay_ps = false;
+
+ if (device_is_compatible(dev, "rockchip,rk3588-gmac")) {
+ ret = of_property_read_u32(dev->of_node, "tx-internal-delay-ps", &value);
+ if (!ret) {
+ /* Empirical data only available for RK3588 on TX path */
+ bsp_priv->tx_delay_from_ps = value / 31;
+ of_internal_delay_ps = true;
+ }
+ }
+
ret = of_property_read_u32(dev->of_node, "tx_delay", &value);
if (ret) {
bsp_priv->tx_delay = 0x30;
- dev_err(dev, "Can not read property: tx_delay.");
- dev_err(dev, "set tx_delay to 0x%x\n",
- bsp_priv->tx_delay);
+ if (!of_internal_delay_ps) {
+ dev_err(dev, "Can not read property: tx_delay.");
+ dev_err(dev, "set tx_delay to 0x%x\n",
+ bsp_priv->tx_delay);
+ }
} else {
+ if (of_internal_delay_ps) {
+ dev_err_probe(dev, -EINVAL, "Only one of tx_delay and
tx-internal-delay-ps must exist\n");
+ return ERR_PTR(-EINVAL);
+ }
dev_info(dev, "TX delay(0x%x).\n", value);
bsp_priv->tx_delay = value;
}
@@ -1821,7 +1840,7 @@ static int rk_gmac_powerup(struct rk_priv_data
*bsp_priv)
break;
case PHY_INTERFACE_MODE_RGMII_ID:
dev_info(dev, "init for RGMII_ID\n");
- bsp_priv->ops->set_to_rgmii(bsp_priv, 0, 0);
+ bsp_priv->ops->set_to_rgmii(bsp_priv, bsp_priv->tx_delay_from_ps, 0);
break;
case PHY_INTERFACE_MODE_RGMII_RXID:
dev_info(dev, "init for RGMII_RXID\n");
@@ -1829,7 +1848,7 @@ static int rk_gmac_powerup(struct rk_priv_data
*bsp_priv)
break;
case PHY_INTERFACE_MODE_RGMII_TXID:
dev_info(dev, "init for RGMII_TXID\n");
- bsp_priv->ops->set_to_rgmii(bsp_priv, 0, bsp_priv->rx_delay);
+ bsp_priv->ops->set_to_rgmii(bsp_priv, bsp_priv->tx_delay_from_ps,
bsp_priv->rx_delay);
break;
case PHY_INTERFACE_MODE_RMII:
dev_info(dev, "init for RMII\n");
"""
On Fri, Jun 13, 2025 at 04:27:54PM +0200, Quentin Schulz wrote: > Hi Andrew, > > On 5/28/25 3:09 PM, Andrew Lunn wrote: > > On Wed, May 28, 2025 at 09:56:51AM +0200, Quentin Schulz wrote: > > > Hi Andrew, > > > > > > On 5/27/25 6:18 PM, Andrew Lunn wrote: > > > > On Tue, May 27, 2025 at 03:11:42PM +0200, Jakob Unterwurzacher wrote: > > > > > > @Jakob, is this something you could check? devmem2 0xfd58c31c w 0x3c0000 > > > > > > should do the trick to disable the circuitry according to the TRM? > > > > > > > > > > I measured TXCLK vs TXD3 on an oscilloscope on gmac1: > > > > > > > > > > Setting Decimal Actual TXCLK delay (ps) > > > > > 00 0 47 > > > > > 0a 10 283 > > > > > 10 16 440 > > > > > 20 32 893 > > > > > 30 48 1385 > > > > > 40 64 1913 > > > > > 50 80 2514 > > > > > 60 96 3077 > > > > > 70 112 3565 > > > > > 7f 127 4009 > > > > > > > > > > off x -315 > > > > > > > > > > Setting = tx_delay (hex) > > > > > Decimal = tx_delay (dec) > > > > > Actual TXCLK delay (ps) = Measurement from oscilloscope > > > > > > > > > > Plotting this we can deduce that one tx_delay unit is about 31ps. > > > > > > > > Nice to see somebody actually do the measurements. Based on this, it > > > > would be good to implement: > > > > > > > > tx-internal-delay-ps: > > > > description: > > > > RGMII Transmit Clock Delay defined in pico seconds. This is used for > > > > controllers that have configurable TX internal delays. If this > > > > property is present then the MAC applies the TX delay. > > > > > > > > For the moment, please limit it to just the device you measured it on. > > > > > > > > > > What exactly do you mean with "limit it to just the device you measured it > > > on"? > > > > Nobody seems to know if rx_delay & tx_delay operate the same across > > the whole range of SoCs. I don't particularly care if these properties > > are difference between SoC, they are vendor properties, with > > undocumented magic values. However 'tx-internal-delay-ps' is > > standardised, and has a very clear meaning. I don't want it used > > unless somebody has performed a measurement and we know that 2000 > > produces a 2ns delay. > > > > > I'll need to implement reading the delay from the stmmac driver to use this > > > property, do I need to restrict reading this property to the SoC we tested > > > (RK3588)? > > > > Yes, please only allow it to be used on RK3588, and any other SoC you > > can test and verify its behaviour. > > Coming back to this topic, I'm unfortunately the bearer of some bad news. > > I implemented the suggested logic (see at the end of this mail) and then > went to validate it with Jakob's help. Unfortunately, it seems that the > delay value really isn't stable or reliable. > > We tested the same adapter with two different main boards (the same product, > just two different units). With a value of 0x40 for tx_delay (which should > be ~2000ps if we have a 31ps per tx_delay unit as empirically decided), we > have one board with 1778ps and one with 1391ps. Following a hunch, we > started to stress (or cool) the device (with stress-ng/a fan) and it did > slightly change the result too. Changing the CPU operating points (and by > extension at least CPU clocks) didn't impact the result though. Thanks for taking such a scientific approach to this. Most developers try values until it works, and call it done. It is nice to see somebody doing some real study. Russell quoted the standard, which says the delay needs to be between 1ns and 2.6ns, which is quite a wide range. So for a tx_delay value of 0x40, nominally 2000ps, your two values are within that range, and so conform to the standard. > While this could be observed with tx_delay property too, this property > doesn't claim to provide a value in picoseconds that tx-internal-delay-ps > would (but at the same time this didn't stop it to be implemented for the > DSA switch we have which claims "more than 1.5ns" and nothing more, so maybe > that would be acceptable?). > > I feel uncomfortable contributing this considering the wildly different > results across our very small test sample pool of two units and slightly > different operating temperature. I can understand that. But there is another way to look at this. I am making a big jump from just two boards, but it seems to me, tx_delay and rx_delay are pointless, if they produce such a wide range of values from what should be identical boards. They cannot be used for fine tuning because the same value has a 387ps difference, which is huge compared to the 31ps step. It seems to me, rx_delay and tx_delay should be deprecated, set to 0, and let the PHY do they delays. If i remember correctly, that is what you ended up with for you board? Andrew
Hi Andrew, On 6/15/25 4:53 PM, Andrew Lunn wrote: > On Fri, Jun 13, 2025 at 04:27:54PM +0200, Quentin Schulz wrote: >> Hi Andrew, >> >> On 5/28/25 3:09 PM, Andrew Lunn wrote: >>> On Wed, May 28, 2025 at 09:56:51AM +0200, Quentin Schulz wrote: >>>> Hi Andrew, >>>> >>>> On 5/27/25 6:18 PM, Andrew Lunn wrote: >>>>> On Tue, May 27, 2025 at 03:11:42PM +0200, Jakob Unterwurzacher wrote: [...] >>>> I'll need to implement reading the delay from the stmmac driver to use this >>>> property, do I need to restrict reading this property to the SoC we tested >>>> (RK3588)? >>> >>> Yes, please only allow it to be used on RK3588, and any other SoC you >>> can test and verify its behaviour. >> >> Coming back to this topic, I'm unfortunately the bearer of some bad news. >> >> I implemented the suggested logic (see at the end of this mail) and then >> went to validate it with Jakob's help. Unfortunately, it seems that the >> delay value really isn't stable or reliable. >> >> We tested the same adapter with two different main boards (the same product, >> just two different units). With a value of 0x40 for tx_delay (which should >> be ~2000ps if we have a 31ps per tx_delay unit as empirically decided), we >> have one board with 1778ps and one with 1391ps. Following a hunch, we >> started to stress (or cool) the device (with stress-ng/a fan) and it did >> slightly change the result too. Changing the CPU operating points (and by >> extension at least CPU clocks) didn't impact the result though. > > Thanks for taking such a scientific approach to this. Most developers > try values until it works, and call it done. It is nice to see > somebody doing some real study. > > Russell quoted the standard, which says the delay needs to be between > 1ns and 2.6ns, which is quite a wide range. So for a tx_delay value of > 0x40, nominally 2000ps, your two values are within that range, and so > conform to the standard. > If there's a source about the 2.6ns being the upper limit, I would appreciate a link to it (or the maths leading to this claim) :) My understanding is: at least 1.2ns. Considering that for 125MHz TXC (for 1GbE), the clock period is 8ns and that two >> While this could be observed with tx_delay property too, this property >> doesn't claim to provide a value in picoseconds that tx-internal-delay-ps >> would (but at the same time this didn't stop it to be implemented for the >> DSA switch we have which claims "more than 1.5ns" and nothing more, so maybe >> that would be acceptable?). >> >> I feel uncomfortable contributing this considering the wildly different >> results across our very small test sample pool of two units and slightly >> different operating temperature. > > I can understand that. But there is another way to look at this. I am > making a big jump from just two boards, but it seems to me, tx_delay > and rx_delay are pointless, if they produce such a wide range of > values from what should be identical boards. They cannot be used for > fine tuning because the same value has a 387ps difference, which is > huge compared to the 31ps step. > I'm making the same conclusion. I'm not sure though it's a good idea to force the user to implement the delay in the PHY only. I can tell you that we have two variants of RK3399 Puma, RK3588 Jaguar and RK3588 Tiger with either a KSZ9031 or a KSZ9131 PHY. As far as I understood from reading the driver, the delay is implemented differently for both PHYs. KSZ9131 adds a DLL-based clock skew (4.9.3.1 in datasheet[1]) in the appropriate direction whenever phy-mode requires a delay be added by MAC/PHY. KSZ9031 doesn't. Both KSZ9031 and KSZ9131 have per-lane skews. If I wanted the delay to be added by the PHY in the DT, I basically wouldn't be able to share the DT between both variants of our boards as the KSZ9131 would add a 2ns delay[2] based on phy-mode but not KSZ9031 where I assume I would need to use pad skews instead to add up to 1.9ns (TXC; 1.4ns for RXC) but then we would have ~4ns for KSZ9131 which is likely out of spec :/ Today it works by "chance" because our phy-mode is rgmii but the delay is added by the MAC (due to the long-standing mistake in the Rockchip GMAC driver). I have not tested any of the above claims. [1] https://ww1.microchip.com/downloads/aemDocuments/documents/UNG/ProductDocuments/DataSheets/00002841D.pdf [2] Link [1] at 4.9.3 RGMII TIMING > It seems to me, rx_delay and tx_delay should be deprecated, set to 0, > and let the PHY do they delays. If i remember correctly, that is what > you ended up with for you board? > This is what we ended up doing for our adapter board indeed. I'm trying to figure out if there's a way to not break stuff by forcing the properties to 0 and deprecating the values. tx_delay is used (by the driver) whenever the delay is added by the PCB on TXC (based on phy-mode, which is incorrect). Same for rx_delay but for RXC. So if we set phy-mode correctly (as it's being pushed lately for Rockchip boards), tx_delay/rx_delay will need to be set to 0 explicitly whenever the delay is expected to be NOT implemented by the PCB (so in addition to the phy-mode saying "delay not added by the PCB"). If the properties are missing in the DT, they are assigned a default value. So we cannot simply remove them or expect them gone (we also want DT backward compatibility so cannot change the driver behavior :/). The only way forward that I see is forcing the properties to be 0 in device tree and warn if they are non-zero or something. Will trigger a bunch of warning/errors on existing Device Trees though. And this will not allow us to use two slightly different PHYs for our products without having two separate Device Trees. Cheers, Quentin
Am Freitag, 23. Mai 2025, 18:48:42 Mitteleuropäische Sommerzeit schrieb Quentin Schulz:
> From: Quentin Schulz <quentin.schulz@cherry.de>
>
> This adds support for the Ethernet Switch adapter connected through the
> mezzanine connector on RK3588 Jaguar.
>
> This adapter has a KSZ9896 Ethernet Switch with 4 1GbE Ethernet
> connectors, two user controllable LEDs, and an M12 12-pin connector
> which exposes the following signals:
> - RS232/RS485 (max 250Kbps/500Kbps, RX pin1, TX pin2)
> - two digital inputs (pin4 routed to GPIO3_C5 on SoC, pin5 to GPIO4_B4)
> - two digital outputs (pin7 routed to GPIO3_D3 on SoC, pin8 to
> GPIO3_D1)
> - two analog inputs (pin10 to channel1 of ADS1015, pin11 to channel2)
>
> Signed-off-by: Quentin Schulz <quentin.schulz@cherry.de>
> +&i2c1 {
> + #address-cells = <1>;
> + /* ADS1015 can handle high-speed (HS) mode (up to 3.4MHz) on I2C bus,
> + but SOC can handle only up to 400kHz. */
Multi-line comment style would be
/*
* ADS1015 can handle high-speed (HS) mode (up to 3.4MHz) on I2C bus,
* but SOC can handle only up to 400kHz
*/
> + clock-frequency = <400000>;
> + #size-cells = <0>;
> + status = "okay";
> +
other than that, looks fine
Heiko
© 2016 - 2025 Red Hat, Inc.