drivers/iommu/mtk_iommu.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-)
Currently, mtk_iommu calls during probe iommu_device_register before
the hw_list from driver data is initialized. Since iommu probing issue
fix, it leads to NULL pointer dereference in mtk_iommu_device_group when
hw_list is accessed with list_first_entry (not null safe).
So, change the call order to ensure iommu_device_register is called
after the driver data are initialized.
Fixes: 9e3a2a643653 ("iommu/mediatek: Adapt sharing and non-sharing pgtable case")
Fixes: bcb81ac6ae3c ("iommu: Get DT/ACPI parsing into the proper probe path")
Reviewed-by: Yong Wu <yong.wu@mediatek.com>
Tested-by: Chen-Yu Tsai <wenst@chromium.org> # MT8183 Juniper, MT8186 Tentacruel
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Tested-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Signed-off-by: Louis-Alexis Eyraud <louisalexis.eyraud@collabora.com>
---
This patch fixes a NULL pointer dereference that occurs during the
mtk_iommu driver probe and observed at least on several Mediatek Genio boards:
```
Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
Mem abort info:
ESR = 0x0000000096000004
EC = 0x25: DABT (current EL), IL = 32 bits
SET = 0, FnV = 0
EA = 0, S1PTW = 0
FSC = 0x04: level 0 translation fault
Data abort info:
ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000
CM = 0, WnR = 0, TnD = 0, TagAccess = 0
GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
user pgtable: 4k pages, 48-bit VAs, pgdp=0000000101380000
[0000000000000000] pgd=0000000000000000, p4d=0000000000000000
Internal error: Oops: 0000000096000004 [#1] SMP
Modules linked in: btusb btrtl mt6315_regulator btintel mtk_vcodec_dec
btmtk v4l2_vp9 btbcm mtk_vcodec_enc v4l2_h264 mtk_vcodec_dbgfs
bluetooth mtk_jpeg ecdh_generic mtk_vcodec_common mtk_jpeg_enc_hw
mtk_jpeg_dec_hw ecc v4l2_mem2mem rfkill videobuf2_dma_contig
videobuf2_memops videobuf2_v4l2 videodev videobuf2_common mc
mt6360_charger mcp251xfd it5205 goodix_ts can_dev panfrost
drm_shmem_helper gpu_sched lvts_thermal mtk_svs mtk_adsp_mailbox
snd_soc_dmic mtk_scp mtk_rpmsg mtk_scp_ipi mtk_cmdq_mailbox
mediatek_cpufreq_hw spmi_mtk_pmif fuse dm_mod ip_tables x_tables
ipv6 mediatek_drm tcpci_mt6360 drm_display_helper drm_client_lib
tcpci drm_dma_helper drm_kms_helper tcpm drm mtk_mutex mtk_mmsys
typec rtc_mt6397 mtk_cmdq_helper phy_mtk_pcie pcie_mediatek_gen3
dwmac_mediatek stmmac_platform stmmac pwm_mtk_disp pcs_xpcs pwm_bl
backlight
CPU: 5 UID: 0 PID: 12 Comm: kworker/u32:0 Not tainted 6.14.0-next-20250326 #4 PREEMPT
Hardware name: MediaTek Genio 1200 EVK-P1V2-EMMC (DT)
Workqueue: events_unbound deferred_probe_work_func
pstate: 80400009 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
pc : mtk_iommu_device_group+0x2c/0xe0
lr : __iommu_probe_device+0x130/0x490
sp : ffff8000827b3970
x29: ffff8000827b3970 x28: ffff0000c0028000 x27: ffff80008164f2b8
x26: ffff0000cbc47130 x25: ffff80008096a9f0 x24: ffff80008164f5a8
x23: ffff800082684b60 x22: ffff8000827b3a80 x21: ffff0000cbc47130
x20: ffffffffffffffed x19: ffff0000c117c010 x18: 0000000000000000
x17: 6f702d616d642d64 x16: 6574636972747365 x15: 0000000000000002
x14: 0000000000000000 x13: 0000000000128d55 x12: 6f632d616d642e30
x11: 0000000000000100 x10: 0000000000000001 x9 : 0000000000000220
x8 : 0101010101010101 x7 : ffff0000c117c010 x6 : 306c766f2c727461
x5 : 0000000000000000 x4 : ffff0000c10a2de8 x3 : ffff0000c10a2e70
x2 : ffff0000c01a4600 x1 : 0000000000000000 x0 : ffff0000c65470c0
Call trace:
mtk_iommu_device_group+0x2c/0xe0 (P)
__iommu_probe_device+0x130/0x490
probe_iommu_group+0x3c/0x70
bus_for_each_dev+0x7c/0xe0
iommu_device_register+0xd8/0x22c
mtk_iommu_probe+0x270/0x53c
platform_probe+0x68/0xd8
really_probe+0xbc/0x2c0
__driver_probe_device+0x78/0x120
driver_probe_device+0x3c/0x154
__device_attach_driver+0xb8/0x140
bus_for_each_drv+0x88/0xe8
__device_attach+0xa0/0x190
device_initial_probe+0x14/0x20
bus_probe_device+0xb4/0xc0
deferred_probe_work_func+0x90/0xc8
process_one_work+0x148/0x284
worker_thread+0x2cc/0x3cc
kthread+0x12c/0x204
ret_from_fork+0x10/0x20
Code: b4000500 f9401c01 92800254 f9409821 (f9400035)
---[ end trace 0000000000000000 ]---
```
I've tested this patch on Mediatek Genio 510-EVK and 1200-EVK boards
with a kernel based on linux-next (tag: next-20250327).
---
Changes in v2:
- Fix goto label usage in device registration error case
- Add review and test trailers
- Link to v1: https://lore.kernel.org/r/20250327-fix-mtk-iommu-error-v1-1-df969158e752@collabora.com
---
drivers/iommu/mtk_iommu.c | 26 +++++++++++++-------------
1 file changed, 13 insertions(+), 13 deletions(-)
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 034b0e670384a24df10130cbbff95ce8e0bc092d..df98d0c65f5469c6803cd9d151c85ad855558cf5 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -1372,15 +1372,6 @@ static int mtk_iommu_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, data);
mutex_init(&data->mutex);
- ret = iommu_device_sysfs_add(&data->iommu, dev, NULL,
- "mtk-iommu.%pa", &ioaddr);
- if (ret)
- goto out_link_remove;
-
- ret = iommu_device_register(&data->iommu, &mtk_iommu_ops, dev);
- if (ret)
- goto out_sysfs_remove;
-
if (MTK_IOMMU_HAS_FLAG(data->plat_data, SHARE_PGTABLE)) {
list_add_tail(&data->list, data->plat_data->hw_list);
data->hw_list = data->plat_data->hw_list;
@@ -1390,19 +1381,28 @@ static int mtk_iommu_probe(struct platform_device *pdev)
data->hw_list = &data->hw_list_head;
}
+ ret = iommu_device_sysfs_add(&data->iommu, dev, NULL,
+ "mtk-iommu.%pa", &ioaddr);
+ if (ret)
+ goto out_list_del;
+
+ ret = iommu_device_register(&data->iommu, &mtk_iommu_ops, dev);
+ if (ret)
+ goto out_sysfs_remove;
+
if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) {
ret = component_master_add_with_match(dev, &mtk_iommu_com_ops, match);
if (ret)
- goto out_list_del;
+ goto out_device_unregister;
}
return ret;
-out_list_del:
- list_del(&data->list);
+out_device_unregister:
iommu_device_unregister(&data->iommu);
out_sysfs_remove:
iommu_device_sysfs_remove(&data->iommu);
-out_link_remove:
+out_list_del:
+ list_del(&data->list);
if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM))
device_link_remove(data->smicomm_dev, dev);
out_runtime_disable:
---
base-commit: bc960e3e32c8b940c10b14557271355f66ae4db1
change-id: 20250327-fix-mtk-iommu-error-af6ec347d057
Best regards,
--
Louis-Alexis Eyraud <louisalexis.eyraud@collabora.com>
On Thu, Apr 03, 2025 at 12:22:12PM +0200, Louis-Alexis Eyraud wrote: > --- > Changes in v2: > - Fix goto label usage in device registration error case > - Add review and test trailers > - Link to v1: https://lore.kernel.org/r/20250327-fix-mtk-iommu-error-v1-1-df969158e752@collabora.com > --- > drivers/iommu/mtk_iommu.c | 26 +++++++++++++------------- > 1 file changed, 13 insertions(+), 13 deletions(-) Replaced the previously applied patch with this one.
Hi,
On Thu, Apr 3, 2025 at 6:24 PM Louis-Alexis Eyraud
<louisalexis.eyraud@collabora.com> wrote:
>
> Currently, mtk_iommu calls during probe iommu_device_register before
> the hw_list from driver data is initialized. Since iommu probing issue
> fix, it leads to NULL pointer dereference in mtk_iommu_device_group when
> hw_list is accessed with list_first_entry (not null safe).
>
> So, change the call order to ensure iommu_device_register is called
> after the driver data are initialized.
>
> Fixes: 9e3a2a643653 ("iommu/mediatek: Adapt sharing and non-sharing pgtable case")
> Fixes: bcb81ac6ae3c ("iommu: Get DT/ACPI parsing into the proper probe path")
> Reviewed-by: Yong Wu <yong.wu@mediatek.com>
> Tested-by: Chen-Yu Tsai <wenst@chromium.org> # MT8183 Juniper, MT8186 Tentacruel
> Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
> Tested-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
> Signed-off-by: Louis-Alexis Eyraud <louisalexis.eyraud@collabora.com>
> ---
> This patch fixes a NULL pointer dereference that occurs during the
> mtk_iommu driver probe and observed at least on several Mediatek Genio boards:
> ```
> Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
This is a reminder to please land this and send to Linus ASAP.
This fixes the v6.15-rc1 kernel on all the MediaTek Chromebook platforms,
except for MT8188, which seems to have another issue in iommu_get_dma_domain()
used from the DRM driver:
Disabling lock debugging due to kernel taint
Unable to handle kernel NULL pointer dereference at virtual
address 0000000000000158
Mem abort info:
ESR = 0x0000000096000005
EC = 0x25: DABT (current EL), IL = 32 bits
SET = 0, FnV = 0
EA = 0, S1PTW = 0
FSC = 0x05: level 1 translation fault
Data abort info:
ISV = 0, ISS = 0x00000005, ISS2 = 0x00000000
CM = 0, WnR = 0, TnD = 0, TagAccess = 0
GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
user pgtable: 4k pages, 39-bit VAs, pgdp=00000001185ab000
[0000000000000158] pgd=0000000000000000, p4d=0000000000000000,
pud=0000000000000000
Internal error: Oops: 0000000096000005 [#1] SMP
Modules linked in: mtk_vcodec_dec_hw mtk_vcodec_dec mtk_vcodec_enc
v4l2_vp9 v4l2_h264 mtk_vcodec_dbgfs mtk_jpeg mtk_vcodec_common
cros_ec_sensors mtk_jpeg_enc_hw cros_ec_sensors_core mtk_jpeg_dec_hw
mtk_vpu v4l2_mem2mem videobuf2_v4l2 snd_sof_mt8186
videobuf2_dma_contig snd_sof_xtensa_dsp sha1_ce videobuf2_memops
mtk_adsp_common mtk_scp videobuf2_common snd_sof_of mtk_rpmsg snd_sof
rpmsg_core cros_ec_sensorhub hid_google_hammer hid_vivaldi_common
snd_sof_utils mtk_scp_ipi fuse
CPU: 6 UID: 0 PID: 12 Comm: kworker/u32:0 Tainted: G B
6.15.0-rc1-00001-gfed05d98b726 #628 PREEMPT
06e695da6360d22824958738f9ba1c9b2416be19
Tainted: [B]=BAD_PAGE
Hardware name: Google Ciri sku2 board (DT)
Workqueue: events_unbound deferred_probe_work_func
pstate: 40400009 (nZcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
pc : iommu_get_dma_domain+0x30/0x58
lr : iommu_get_dma_domain+0x30/0x58
sp : ffffffc080186ee0
x29: ffffffc080186ee0 x28: ffffff80c4c59028 x27: 0000000000000000
x26: 0000000000000014 x25: 0068000000000f0b x24: ffffff80c4c5ca28
x23: 00000000008ca000 x22: 1ffffff810030df8 x21: ffffff80c20ea010
x20: ffffff80c20ea010 x19: 0000000000000000 x18: 0000000000000000
x17: 3d3d3d3d3d3d3d3d x16: 3d3d3d3d3d3d3d3d x15: 0720072007200720
x14: 0720072007200720 x13: ffffff80c0a1bb80 x12: ffffffbae2ca45e9
x11: 1ffffffae2ca45e8 x10: ffffffbae2ca45e8 x9 : dfffffc000000000
x8 : 000000451d35ba18 x7 : ffffffd716522f47 x6 : 0000000000000001
x5 : ffffffd716522f40 x4 : ffffffbae2ca45e9 x3 : ffffffd713717e6c
x2 : 0000000000000001 x1 : ffffff80c0941dc0 x0 : 0000000000000001
Call trace:
iommu_get_dma_domain+0x30/0x58 (P)
__iommu_dma_alloc_noncontiguous+0x34/0x498
iommu_dma_alloc+0x2f0/0x3e0
dma_alloc_attrs+0x1b4/0x3b8
mtk_gem_create+0x124/0x170
mtk_gem_dumb_create+0x84/0x180
drm_mode_create_dumb+0xf8/0x128
drm_client_framebuffer_create+0x11c/0x240
drm_fbdev_dma_driver_fbdev_probe+0x154/0x5a0
__drm_fb_helper_initial_config_and_unlock+0x4f0/0x928
drm_fb_helper_initial_config+0x50/0x68
drm_fbdev_client_hotplug+0xc0/0x120
drm_client_register+0xa0/0x100
drm_fbdev_client_setup+0xc8/0x260
drm_client_setup+0x60/0xd0
mtk_drm_bind+0x4f0/0xaf0
try_to_bring_up_aggregate_device+0x258/0x2f0
__component_add+0x104/0x240
component_add+0x1c/0x38
mtk_disp_rdma_probe+0x180/0x260
platform_probe+0x98/0x128
really_probe+0x118/0x3c0
__driver_probe_device+0xc0/0x198
driver_probe_device+0x64/0x1f8
__device_attach_driver+0xf0/0x1b0
bus_for_each_drv+0xf4/0x178
__device_attach+0x120/0x240
device_initial_probe+0x1c/0x30
bus_probe_device+0xdc/0xe8
deferred_probe_work_func+0xec/0x140
process_one_work+0x428/0xa80
worker_thread+0x2c0/0x538
kthread+0x258/0x380
ret_from_fork+0x10/0x20
Code: 97e8f12c f9422673 91056260 97e8f129 (f940ae60)
---[ end trace 0000000000000000 ]---
Kernel panic - not syncing: Oops: Fatal exception
SMP: stopping secondary CPUs
Kernel Offset: 0x1693600000 from 0xffffffc080000000
PHYS_OFFSET: 0x40000000
CPU features: 0x0e00,000002e0,01202650,8200720b
Memory Limit: none
Rebooting in 30 seconds..
Thanks
ChenYu
> Mem abort info:
> ESR = 0x0000000096000004
> EC = 0x25: DABT (current EL), IL = 32 bits
> SET = 0, FnV = 0
> EA = 0, S1PTW = 0
> FSC = 0x04: level 0 translation fault
> Data abort info:
> ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000
> CM = 0, WnR = 0, TnD = 0, TagAccess = 0
> GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
> user pgtable: 4k pages, 48-bit VAs, pgdp=0000000101380000
> [0000000000000000] pgd=0000000000000000, p4d=0000000000000000
> Internal error: Oops: 0000000096000004 [#1] SMP
> Modules linked in: btusb btrtl mt6315_regulator btintel mtk_vcodec_dec
> btmtk v4l2_vp9 btbcm mtk_vcodec_enc v4l2_h264 mtk_vcodec_dbgfs
> bluetooth mtk_jpeg ecdh_generic mtk_vcodec_common mtk_jpeg_enc_hw
> mtk_jpeg_dec_hw ecc v4l2_mem2mem rfkill videobuf2_dma_contig
> videobuf2_memops videobuf2_v4l2 videodev videobuf2_common mc
> mt6360_charger mcp251xfd it5205 goodix_ts can_dev panfrost
> drm_shmem_helper gpu_sched lvts_thermal mtk_svs mtk_adsp_mailbox
> snd_soc_dmic mtk_scp mtk_rpmsg mtk_scp_ipi mtk_cmdq_mailbox
> mediatek_cpufreq_hw spmi_mtk_pmif fuse dm_mod ip_tables x_tables
> ipv6 mediatek_drm tcpci_mt6360 drm_display_helper drm_client_lib
> tcpci drm_dma_helper drm_kms_helper tcpm drm mtk_mutex mtk_mmsys
> typec rtc_mt6397 mtk_cmdq_helper phy_mtk_pcie pcie_mediatek_gen3
> dwmac_mediatek stmmac_platform stmmac pwm_mtk_disp pcs_xpcs pwm_bl
> backlight
> CPU: 5 UID: 0 PID: 12 Comm: kworker/u32:0 Not tainted 6.14.0-next-20250326 #4 PREEMPT
> Hardware name: MediaTek Genio 1200 EVK-P1V2-EMMC (DT)
> Workqueue: events_unbound deferred_probe_work_func
> pstate: 80400009 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
> pc : mtk_iommu_device_group+0x2c/0xe0
> lr : __iommu_probe_device+0x130/0x490
> sp : ffff8000827b3970
> x29: ffff8000827b3970 x28: ffff0000c0028000 x27: ffff80008164f2b8
> x26: ffff0000cbc47130 x25: ffff80008096a9f0 x24: ffff80008164f5a8
> x23: ffff800082684b60 x22: ffff8000827b3a80 x21: ffff0000cbc47130
> x20: ffffffffffffffed x19: ffff0000c117c010 x18: 0000000000000000
> x17: 6f702d616d642d64 x16: 6574636972747365 x15: 0000000000000002
> x14: 0000000000000000 x13: 0000000000128d55 x12: 6f632d616d642e30
> x11: 0000000000000100 x10: 0000000000000001 x9 : 0000000000000220
> x8 : 0101010101010101 x7 : ffff0000c117c010 x6 : 306c766f2c727461
> x5 : 0000000000000000 x4 : ffff0000c10a2de8 x3 : ffff0000c10a2e70
> x2 : ffff0000c01a4600 x1 : 0000000000000000 x0 : ffff0000c65470c0
> Call trace:
> mtk_iommu_device_group+0x2c/0xe0 (P)
> __iommu_probe_device+0x130/0x490
> probe_iommu_group+0x3c/0x70
> bus_for_each_dev+0x7c/0xe0
> iommu_device_register+0xd8/0x22c
> mtk_iommu_probe+0x270/0x53c
> platform_probe+0x68/0xd8
> really_probe+0xbc/0x2c0
> __driver_probe_device+0x78/0x120
> driver_probe_device+0x3c/0x154
> __device_attach_driver+0xb8/0x140
> bus_for_each_drv+0x88/0xe8
> __device_attach+0xa0/0x190
> device_initial_probe+0x14/0x20
> bus_probe_device+0xb4/0xc0
> deferred_probe_work_func+0x90/0xc8
> process_one_work+0x148/0x284
> worker_thread+0x2cc/0x3cc
> kthread+0x12c/0x204
> ret_from_fork+0x10/0x20
> Code: b4000500 f9401c01 92800254 f9409821 (f9400035)
> ---[ end trace 0000000000000000 ]---
> ```
>
> I've tested this patch on Mediatek Genio 510-EVK and 1200-EVK boards
> with a kernel based on linux-next (tag: next-20250327).
> ---
> Changes in v2:
> - Fix goto label usage in device registration error case
> - Add review and test trailers
> - Link to v1: https://lore.kernel.org/r/20250327-fix-mtk-iommu-error-v1-1-df969158e752@collabora.com
> ---
> drivers/iommu/mtk_iommu.c | 26 +++++++++++++-------------
> 1 file changed, 13 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
> index 034b0e670384a24df10130cbbff95ce8e0bc092d..df98d0c65f5469c6803cd9d151c85ad855558cf5 100644
> --- a/drivers/iommu/mtk_iommu.c
> +++ b/drivers/iommu/mtk_iommu.c
> @@ -1372,15 +1372,6 @@ static int mtk_iommu_probe(struct platform_device *pdev)
> platform_set_drvdata(pdev, data);
> mutex_init(&data->mutex);
>
> - ret = iommu_device_sysfs_add(&data->iommu, dev, NULL,
> - "mtk-iommu.%pa", &ioaddr);
> - if (ret)
> - goto out_link_remove;
> -
> - ret = iommu_device_register(&data->iommu, &mtk_iommu_ops, dev);
> - if (ret)
> - goto out_sysfs_remove;
> -
> if (MTK_IOMMU_HAS_FLAG(data->plat_data, SHARE_PGTABLE)) {
> list_add_tail(&data->list, data->plat_data->hw_list);
> data->hw_list = data->plat_data->hw_list;
> @@ -1390,19 +1381,28 @@ static int mtk_iommu_probe(struct platform_device *pdev)
> data->hw_list = &data->hw_list_head;
> }
>
> + ret = iommu_device_sysfs_add(&data->iommu, dev, NULL,
> + "mtk-iommu.%pa", &ioaddr);
> + if (ret)
> + goto out_list_del;
> +
> + ret = iommu_device_register(&data->iommu, &mtk_iommu_ops, dev);
> + if (ret)
> + goto out_sysfs_remove;
> +
> if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) {
> ret = component_master_add_with_match(dev, &mtk_iommu_com_ops, match);
> if (ret)
> - goto out_list_del;
> + goto out_device_unregister;
> }
> return ret;
>
> -out_list_del:
> - list_del(&data->list);
> +out_device_unregister:
> iommu_device_unregister(&data->iommu);
> out_sysfs_remove:
> iommu_device_sysfs_remove(&data->iommu);
> -out_link_remove:
> +out_list_del:
> + list_del(&data->list);
> if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM))
> device_link_remove(data->smicomm_dev, dev);
> out_runtime_disable:
>
> ---
> base-commit: bc960e3e32c8b940c10b14557271355f66ae4db1
> change-id: 20250327-fix-mtk-iommu-error-af6ec347d057
>
> Best regards,
> --
> Louis-Alexis Eyraud <louisalexis.eyraud@collabora.com>
>
On 2025-04-07 6:17 am, Chen-Yu Tsai wrote:
> Hi,
>
> On Thu, Apr 3, 2025 at 6:24 PM Louis-Alexis Eyraud
> <louisalexis.eyraud@collabora.com> wrote:
>>
>> Currently, mtk_iommu calls during probe iommu_device_register before
>> the hw_list from driver data is initialized. Since iommu probing issue
>> fix, it leads to NULL pointer dereference in mtk_iommu_device_group when
>> hw_list is accessed with list_first_entry (not null safe).
>>
>> So, change the call order to ensure iommu_device_register is called
>> after the driver data are initialized.
>>
>> Fixes: 9e3a2a643653 ("iommu/mediatek: Adapt sharing and non-sharing pgtable case")
>> Fixes: bcb81ac6ae3c ("iommu: Get DT/ACPI parsing into the proper probe path")
>> Reviewed-by: Yong Wu <yong.wu@mediatek.com>
>> Tested-by: Chen-Yu Tsai <wenst@chromium.org> # MT8183 Juniper, MT8186 Tentacruel
>> Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
>> Tested-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
>> Signed-off-by: Louis-Alexis Eyraud <louisalexis.eyraud@collabora.com>
>> ---
>> This patch fixes a NULL pointer dereference that occurs during the
>> mtk_iommu driver probe and observed at least on several Mediatek Genio boards:
>> ```
>> Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
>
> This is a reminder to please land this and send to Linus ASAP.
>
> This fixes the v6.15-rc1 kernel on all the MediaTek Chromebook platforms,
> except for MT8188, which seems to have another issue in iommu_get_dma_domain()
> used from the DRM driver:
>
> Disabling lock debugging due to kernel taint
> Unable to handle kernel NULL pointer dereference at virtual
> address 0000000000000158
From the offset and the stacktrace code dump, this would appear to be
the dereference of dev->iommu_group->default_domain, when
dev->iommu_group is NULL (and CONFIG_DEBUG_LOCK_ALLOC makes the mutex
really big). Which is a bit weird, as to get into iommu-dma at all in
that state would suggest that whatever device this is has been removed
and had its group torn down again after iommu_setup_dma_ops() has run...
but either way that implies the DRM driver is passing an arbitrary
device to the DMA API without making sure it's actualy valid.
Trying to trace the provenance of dma_dev from mtk_gem_create() back
through the rest of the driver is quite the rabbit-hole, but it seems
like in at least one case it can lead back to an
of_find_device_by_node() in ovl_adaptor_comp_init(), which definitely
looks sufficiently sketchy.
Thanks,
Robin.
> Mem abort info:
> ESR = 0x0000000096000005
> EC = 0x25: DABT (current EL), IL = 32 bits
> SET = 0, FnV = 0
> EA = 0, S1PTW = 0
> FSC = 0x05: level 1 translation fault
> Data abort info:
> ISV = 0, ISS = 0x00000005, ISS2 = 0x00000000
> CM = 0, WnR = 0, TnD = 0, TagAccess = 0
> GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
> user pgtable: 4k pages, 39-bit VAs, pgdp=00000001185ab000
> [0000000000000158] pgd=0000000000000000, p4d=0000000000000000,
> pud=0000000000000000
> Internal error: Oops: 0000000096000005 [#1] SMP
> Modules linked in: mtk_vcodec_dec_hw mtk_vcodec_dec mtk_vcodec_enc
> v4l2_vp9 v4l2_h264 mtk_vcodec_dbgfs mtk_jpeg mtk_vcodec_common
> cros_ec_sensors mtk_jpeg_enc_hw cros_ec_sensors_core mtk_jpeg_dec_hw
> mtk_vpu v4l2_mem2mem videobuf2_v4l2 snd_sof_mt8186
> videobuf2_dma_contig snd_sof_xtensa_dsp sha1_ce videobuf2_memops
> mtk_adsp_common mtk_scp videobuf2_common snd_sof_of mtk_rpmsg snd_sof
> rpmsg_core cros_ec_sensorhub hid_google_hammer hid_vivaldi_common
> snd_sof_utils mtk_scp_ipi fuse
> CPU: 6 UID: 0 PID: 12 Comm: kworker/u32:0 Tainted: G B
> 6.15.0-rc1-00001-gfed05d98b726 #628 PREEMPT
> 06e695da6360d22824958738f9ba1c9b2416be19
> Tainted: [B]=BAD_PAGE
> Hardware name: Google Ciri sku2 board (DT)
> Workqueue: events_unbound deferred_probe_work_func
> pstate: 40400009 (nZcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
> pc : iommu_get_dma_domain+0x30/0x58
> lr : iommu_get_dma_domain+0x30/0x58
> sp : ffffffc080186ee0
> x29: ffffffc080186ee0 x28: ffffff80c4c59028 x27: 0000000000000000
> x26: 0000000000000014 x25: 0068000000000f0b x24: ffffff80c4c5ca28
> x23: 00000000008ca000 x22: 1ffffff810030df8 x21: ffffff80c20ea010
> x20: ffffff80c20ea010 x19: 0000000000000000 x18: 0000000000000000
> x17: 3d3d3d3d3d3d3d3d x16: 3d3d3d3d3d3d3d3d x15: 0720072007200720
> x14: 0720072007200720 x13: ffffff80c0a1bb80 x12: ffffffbae2ca45e9
> x11: 1ffffffae2ca45e8 x10: ffffffbae2ca45e8 x9 : dfffffc000000000
> x8 : 000000451d35ba18 x7 : ffffffd716522f47 x6 : 0000000000000001
> x5 : ffffffd716522f40 x4 : ffffffbae2ca45e9 x3 : ffffffd713717e6c
> x2 : 0000000000000001 x1 : ffffff80c0941dc0 x0 : 0000000000000001
> Call trace:
> iommu_get_dma_domain+0x30/0x58 (P)
> __iommu_dma_alloc_noncontiguous+0x34/0x498
> iommu_dma_alloc+0x2f0/0x3e0
> dma_alloc_attrs+0x1b4/0x3b8
> mtk_gem_create+0x124/0x170
> mtk_gem_dumb_create+0x84/0x180
> drm_mode_create_dumb+0xf8/0x128
> drm_client_framebuffer_create+0x11c/0x240
> drm_fbdev_dma_driver_fbdev_probe+0x154/0x5a0
> __drm_fb_helper_initial_config_and_unlock+0x4f0/0x928
> drm_fb_helper_initial_config+0x50/0x68
> drm_fbdev_client_hotplug+0xc0/0x120
> drm_client_register+0xa0/0x100
> drm_fbdev_client_setup+0xc8/0x260
> drm_client_setup+0x60/0xd0
> mtk_drm_bind+0x4f0/0xaf0
> try_to_bring_up_aggregate_device+0x258/0x2f0
> __component_add+0x104/0x240
> component_add+0x1c/0x38
> mtk_disp_rdma_probe+0x180/0x260
> platform_probe+0x98/0x128
> really_probe+0x118/0x3c0
> __driver_probe_device+0xc0/0x198
> driver_probe_device+0x64/0x1f8
> __device_attach_driver+0xf0/0x1b0
> bus_for_each_drv+0xf4/0x178
> __device_attach+0x120/0x240
> device_initial_probe+0x1c/0x30
> bus_probe_device+0xdc/0xe8
> deferred_probe_work_func+0xec/0x140
> process_one_work+0x428/0xa80
> worker_thread+0x2c0/0x538
> kthread+0x258/0x380
> ret_from_fork+0x10/0x20
> Code: 97e8f12c f9422673 91056260 97e8f129 (f940ae60)
> ---[ end trace 0000000000000000 ]---
> Kernel panic - not syncing: Oops: Fatal exception
> SMP: stopping secondary CPUs
> Kernel Offset: 0x1693600000 from 0xffffffc080000000
> PHYS_OFFSET: 0x40000000
> CPU features: 0x0e00,000002e0,01202650,8200720b
> Memory Limit: none
> Rebooting in 30 seconds..
>
>
>
> Thanks
> ChenYu
>
>> Mem abort info:
>> ESR = 0x0000000096000004
>> EC = 0x25: DABT (current EL), IL = 32 bits
>> SET = 0, FnV = 0
>> EA = 0, S1PTW = 0
>> FSC = 0x04: level 0 translation fault
>> Data abort info:
>> ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000
>> CM = 0, WnR = 0, TnD = 0, TagAccess = 0
>> GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
>> user pgtable: 4k pages, 48-bit VAs, pgdp=0000000101380000
>> [0000000000000000] pgd=0000000000000000, p4d=0000000000000000
>> Internal error: Oops: 0000000096000004 [#1] SMP
>> Modules linked in: btusb btrtl mt6315_regulator btintel mtk_vcodec_dec
>> btmtk v4l2_vp9 btbcm mtk_vcodec_enc v4l2_h264 mtk_vcodec_dbgfs
>> bluetooth mtk_jpeg ecdh_generic mtk_vcodec_common mtk_jpeg_enc_hw
>> mtk_jpeg_dec_hw ecc v4l2_mem2mem rfkill videobuf2_dma_contig
>> videobuf2_memops videobuf2_v4l2 videodev videobuf2_common mc
>> mt6360_charger mcp251xfd it5205 goodix_ts can_dev panfrost
>> drm_shmem_helper gpu_sched lvts_thermal mtk_svs mtk_adsp_mailbox
>> snd_soc_dmic mtk_scp mtk_rpmsg mtk_scp_ipi mtk_cmdq_mailbox
>> mediatek_cpufreq_hw spmi_mtk_pmif fuse dm_mod ip_tables x_tables
>> ipv6 mediatek_drm tcpci_mt6360 drm_display_helper drm_client_lib
>> tcpci drm_dma_helper drm_kms_helper tcpm drm mtk_mutex mtk_mmsys
>> typec rtc_mt6397 mtk_cmdq_helper phy_mtk_pcie pcie_mediatek_gen3
>> dwmac_mediatek stmmac_platform stmmac pwm_mtk_disp pcs_xpcs pwm_bl
>> backlight
>> CPU: 5 UID: 0 PID: 12 Comm: kworker/u32:0 Not tainted 6.14.0-next-20250326 #4 PREEMPT
>> Hardware name: MediaTek Genio 1200 EVK-P1V2-EMMC (DT)
>> Workqueue: events_unbound deferred_probe_work_func
>> pstate: 80400009 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
>> pc : mtk_iommu_device_group+0x2c/0xe0
>> lr : __iommu_probe_device+0x130/0x490
>> sp : ffff8000827b3970
>> x29: ffff8000827b3970 x28: ffff0000c0028000 x27: ffff80008164f2b8
>> x26: ffff0000cbc47130 x25: ffff80008096a9f0 x24: ffff80008164f5a8
>> x23: ffff800082684b60 x22: ffff8000827b3a80 x21: ffff0000cbc47130
>> x20: ffffffffffffffed x19: ffff0000c117c010 x18: 0000000000000000
>> x17: 6f702d616d642d64 x16: 6574636972747365 x15: 0000000000000002
>> x14: 0000000000000000 x13: 0000000000128d55 x12: 6f632d616d642e30
>> x11: 0000000000000100 x10: 0000000000000001 x9 : 0000000000000220
>> x8 : 0101010101010101 x7 : ffff0000c117c010 x6 : 306c766f2c727461
>> x5 : 0000000000000000 x4 : ffff0000c10a2de8 x3 : ffff0000c10a2e70
>> x2 : ffff0000c01a4600 x1 : 0000000000000000 x0 : ffff0000c65470c0
>> Call trace:
>> mtk_iommu_device_group+0x2c/0xe0 (P)
>> __iommu_probe_device+0x130/0x490
>> probe_iommu_group+0x3c/0x70
>> bus_for_each_dev+0x7c/0xe0
>> iommu_device_register+0xd8/0x22c
>> mtk_iommu_probe+0x270/0x53c
>> platform_probe+0x68/0xd8
>> really_probe+0xbc/0x2c0
>> __driver_probe_device+0x78/0x120
>> driver_probe_device+0x3c/0x154
>> __device_attach_driver+0xb8/0x140
>> bus_for_each_drv+0x88/0xe8
>> __device_attach+0xa0/0x190
>> device_initial_probe+0x14/0x20
>> bus_probe_device+0xb4/0xc0
>> deferred_probe_work_func+0x90/0xc8
>> process_one_work+0x148/0x284
>> worker_thread+0x2cc/0x3cc
>> kthread+0x12c/0x204
>> ret_from_fork+0x10/0x20
>> Code: b4000500 f9401c01 92800254 f9409821 (f9400035)
>> ---[ end trace 0000000000000000 ]---
>> ```
>>
>> I've tested this patch on Mediatek Genio 510-EVK and 1200-EVK boards
>> with a kernel based on linux-next (tag: next-20250327).
>> ---
>> Changes in v2:
>> - Fix goto label usage in device registration error case
>> - Add review and test trailers
>> - Link to v1: https://lore.kernel.org/r/20250327-fix-mtk-iommu-error-v1-1-df969158e752@collabora.com
>> ---
>> drivers/iommu/mtk_iommu.c | 26 +++++++++++++-------------
>> 1 file changed, 13 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
>> index 034b0e670384a24df10130cbbff95ce8e0bc092d..df98d0c65f5469c6803cd9d151c85ad855558cf5 100644
>> --- a/drivers/iommu/mtk_iommu.c
>> +++ b/drivers/iommu/mtk_iommu.c
>> @@ -1372,15 +1372,6 @@ static int mtk_iommu_probe(struct platform_device *pdev)
>> platform_set_drvdata(pdev, data);
>> mutex_init(&data->mutex);
>>
>> - ret = iommu_device_sysfs_add(&data->iommu, dev, NULL,
>> - "mtk-iommu.%pa", &ioaddr);
>> - if (ret)
>> - goto out_link_remove;
>> -
>> - ret = iommu_device_register(&data->iommu, &mtk_iommu_ops, dev);
>> - if (ret)
>> - goto out_sysfs_remove;
>> -
>> if (MTK_IOMMU_HAS_FLAG(data->plat_data, SHARE_PGTABLE)) {
>> list_add_tail(&data->list, data->plat_data->hw_list);
>> data->hw_list = data->plat_data->hw_list;
>> @@ -1390,19 +1381,28 @@ static int mtk_iommu_probe(struct platform_device *pdev)
>> data->hw_list = &data->hw_list_head;
>> }
>>
>> + ret = iommu_device_sysfs_add(&data->iommu, dev, NULL,
>> + "mtk-iommu.%pa", &ioaddr);
>> + if (ret)
>> + goto out_list_del;
>> +
>> + ret = iommu_device_register(&data->iommu, &mtk_iommu_ops, dev);
>> + if (ret)
>> + goto out_sysfs_remove;
>> +
>> if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) {
>> ret = component_master_add_with_match(dev, &mtk_iommu_com_ops, match);
>> if (ret)
>> - goto out_list_del;
>> + goto out_device_unregister;
>> }
>> return ret;
>>
>> -out_list_del:
>> - list_del(&data->list);
>> +out_device_unregister:
>> iommu_device_unregister(&data->iommu);
>> out_sysfs_remove:
>> iommu_device_sysfs_remove(&data->iommu);
>> -out_link_remove:
>> +out_list_del:
>> + list_del(&data->list);
>> if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM))
>> device_link_remove(data->smicomm_dev, dev);
>> out_runtime_disable:
>>
>> ---
>> base-commit: bc960e3e32c8b940c10b14557271355f66ae4db1
>> change-id: 20250327-fix-mtk-iommu-error-af6ec347d057
>>
>> Best regards,
>> --
>> Louis-Alexis Eyraud <louisalexis.eyraud@collabora.com>
>>
On Mon, Apr 7, 2025 at 8:38 PM Robin Murphy <robin.murphy@arm.com> wrote:
>
> On 2025-04-07 6:17 am, Chen-Yu Tsai wrote:
> > Hi,
> >
> > On Thu, Apr 3, 2025 at 6:24 PM Louis-Alexis Eyraud
> > <louisalexis.eyraud@collabora.com> wrote:
> >>
> >> Currently, mtk_iommu calls during probe iommu_device_register before
> >> the hw_list from driver data is initialized. Since iommu probing issue
> >> fix, it leads to NULL pointer dereference in mtk_iommu_device_group when
> >> hw_list is accessed with list_first_entry (not null safe).
> >>
> >> So, change the call order to ensure iommu_device_register is called
> >> after the driver data are initialized.
> >>
> >> Fixes: 9e3a2a643653 ("iommu/mediatek: Adapt sharing and non-sharing pgtable case")
> >> Fixes: bcb81ac6ae3c ("iommu: Get DT/ACPI parsing into the proper probe path")
> >> Reviewed-by: Yong Wu <yong.wu@mediatek.com>
> >> Tested-by: Chen-Yu Tsai <wenst@chromium.org> # MT8183 Juniper, MT8186 Tentacruel
> >> Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
> >> Tested-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
> >> Signed-off-by: Louis-Alexis Eyraud <louisalexis.eyraud@collabora.com>
> >> ---
> >> This patch fixes a NULL pointer dereference that occurs during the
> >> mtk_iommu driver probe and observed at least on several Mediatek Genio boards:
> >> ```
> >> Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
> >
> > This is a reminder to please land this and send to Linus ASAP.
> >
> > This fixes the v6.15-rc1 kernel on all the MediaTek Chromebook platforms,
> > except for MT8188, which seems to have another issue in iommu_get_dma_domain()
> > used from the DRM driver:
> >
> > Disabling lock debugging due to kernel taint
> > Unable to handle kernel NULL pointer dereference at virtual
> > address 0000000000000158
>
> From the offset and the stacktrace code dump, this would appear to be
> the dereference of dev->iommu_group->default_domain, when
> dev->iommu_group is NULL (and CONFIG_DEBUG_LOCK_ALLOC makes the mutex
> really big). Which is a bit weird, as to get into iommu-dma at all in
> that state would suggest that whatever device this is has been removed
> and had its group torn down again after iommu_setup_dma_ops() has run...
> but either way that implies the DRM driver is passing an arbitrary
> device to the DMA API without making sure it's actualy valid.
This is indeed what has happened during the IOMMU driver probe,
in iommu_device_register():
platform 1a020000.video-encoder: Adding to iommu group 0
platform 1a030000.jpeg-encoder: Adding to iommu group 0
platform 1a040000.jpeg-decoder: Adding to iommu group 0
platform 1c000000.ovl: Adding to iommu group 1
platform 1c002000.rdma: larb device not found
platform 1c002000.rdma: error -EINVAL: IOMMU /soc/iommu@1c028000
probe_device() failed
platform 1c002000.rdma: error -EINVAL: iommu_init_device() failed
platform 1a020000.video-encoder: iommu_deinit_device()
platform 1a030000.jpeg-encoder: iommu_deinit_device()
platform 1a040000.jpeg-decoder: iommu_deinit_device()
platform 1c000000.ovl: iommu_deinit_device()
mtk-iommu 1c028000.iommu: error -EINVAL: Failed to register IOMMU
mtk-iommu 1c028000.iommu: probe with driver mtk-iommu failed with error -22
So 4 devices successfully get attached to the IOMMU, but then the fifth
one fails, so iommu_device_register() calls iommu_device_unregister(),
which tears down the group for all the devices.
I think we need to undo iommu_setup_dma_ops() in __iommu_group_remove_device(),
which is indirectly called from iommu_device_unregister(). That avoids
carrying over stale IOMMU related settings to a retry which then no
longer has the IOMMU available.
Does that make sense?
Thanks
ChenYu
> Trying to trace the provenance of dma_dev from mtk_gem_create() back
> through the rest of the driver is quite the rabbit-hole, but it seems
> like in at least one case it can lead back to an
> of_find_device_by_node() in ovl_adaptor_comp_init(), which definitely
> looks sufficiently sketchy.
>
> Thanks,
> Robin.
>
> > Mem abort info:
> > ESR = 0x0000000096000005
> > EC = 0x25: DABT (current EL), IL = 32 bits
> > SET = 0, FnV = 0
> > EA = 0, S1PTW = 0
> > FSC = 0x05: level 1 translation fault
> > Data abort info:
> > ISV = 0, ISS = 0x00000005, ISS2 = 0x00000000
> > CM = 0, WnR = 0, TnD = 0, TagAccess = 0
> > GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
> > user pgtable: 4k pages, 39-bit VAs, pgdp=00000001185ab000
> > [0000000000000158] pgd=0000000000000000, p4d=0000000000000000,
> > pud=0000000000000000
> > Internal error: Oops: 0000000096000005 [#1] SMP
> > Modules linked in: mtk_vcodec_dec_hw mtk_vcodec_dec mtk_vcodec_enc
> > v4l2_vp9 v4l2_h264 mtk_vcodec_dbgfs mtk_jpeg mtk_vcodec_common
> > cros_ec_sensors mtk_jpeg_enc_hw cros_ec_sensors_core mtk_jpeg_dec_hw
> > mtk_vpu v4l2_mem2mem videobuf2_v4l2 snd_sof_mt8186
> > videobuf2_dma_contig snd_sof_xtensa_dsp sha1_ce videobuf2_memops
> > mtk_adsp_common mtk_scp videobuf2_common snd_sof_of mtk_rpmsg snd_sof
> > rpmsg_core cros_ec_sensorhub hid_google_hammer hid_vivaldi_common
> > snd_sof_utils mtk_scp_ipi fuse
> > CPU: 6 UID: 0 PID: 12 Comm: kworker/u32:0 Tainted: G B
> > 6.15.0-rc1-00001-gfed05d98b726 #628 PREEMPT
> > 06e695da6360d22824958738f9ba1c9b2416be19
> > Tainted: [B]=BAD_PAGE
> > Hardware name: Google Ciri sku2 board (DT)
> > Workqueue: events_unbound deferred_probe_work_func
> > pstate: 40400009 (nZcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
> > pc : iommu_get_dma_domain+0x30/0x58
> > lr : iommu_get_dma_domain+0x30/0x58
> > sp : ffffffc080186ee0
> > x29: ffffffc080186ee0 x28: ffffff80c4c59028 x27: 0000000000000000
> > x26: 0000000000000014 x25: 0068000000000f0b x24: ffffff80c4c5ca28
> > x23: 00000000008ca000 x22: 1ffffff810030df8 x21: ffffff80c20ea010
> > x20: ffffff80c20ea010 x19: 0000000000000000 x18: 0000000000000000
> > x17: 3d3d3d3d3d3d3d3d x16: 3d3d3d3d3d3d3d3d x15: 0720072007200720
> > x14: 0720072007200720 x13: ffffff80c0a1bb80 x12: ffffffbae2ca45e9
> > x11: 1ffffffae2ca45e8 x10: ffffffbae2ca45e8 x9 : dfffffc000000000
> > x8 : 000000451d35ba18 x7 : ffffffd716522f47 x6 : 0000000000000001
> > x5 : ffffffd716522f40 x4 : ffffffbae2ca45e9 x3 : ffffffd713717e6c
> > x2 : 0000000000000001 x1 : ffffff80c0941dc0 x0 : 0000000000000001
> > Call trace:
> > iommu_get_dma_domain+0x30/0x58 (P)
> > __iommu_dma_alloc_noncontiguous+0x34/0x498
> > iommu_dma_alloc+0x2f0/0x3e0
> > dma_alloc_attrs+0x1b4/0x3b8
> > mtk_gem_create+0x124/0x170
> > mtk_gem_dumb_create+0x84/0x180
> > drm_mode_create_dumb+0xf8/0x128
> > drm_client_framebuffer_create+0x11c/0x240
> > drm_fbdev_dma_driver_fbdev_probe+0x154/0x5a0
> > __drm_fb_helper_initial_config_and_unlock+0x4f0/0x928
> > drm_fb_helper_initial_config+0x50/0x68
> > drm_fbdev_client_hotplug+0xc0/0x120
> > drm_client_register+0xa0/0x100
> > drm_fbdev_client_setup+0xc8/0x260
> > drm_client_setup+0x60/0xd0
> > mtk_drm_bind+0x4f0/0xaf0
> > try_to_bring_up_aggregate_device+0x258/0x2f0
> > __component_add+0x104/0x240
> > component_add+0x1c/0x38
> > mtk_disp_rdma_probe+0x180/0x260
> > platform_probe+0x98/0x128
> > really_probe+0x118/0x3c0
> > __driver_probe_device+0xc0/0x198
> > driver_probe_device+0x64/0x1f8
> > __device_attach_driver+0xf0/0x1b0
> > bus_for_each_drv+0xf4/0x178
> > __device_attach+0x120/0x240
> > device_initial_probe+0x1c/0x30
> > bus_probe_device+0xdc/0xe8
> > deferred_probe_work_func+0xec/0x140
> > process_one_work+0x428/0xa80
> > worker_thread+0x2c0/0x538
> > kthread+0x258/0x380
> > ret_from_fork+0x10/0x20
> > Code: 97e8f12c f9422673 91056260 97e8f129 (f940ae60)
> > ---[ end trace 0000000000000000 ]---
> > Kernel panic - not syncing: Oops: Fatal exception
> > SMP: stopping secondary CPUs
> > Kernel Offset: 0x1693600000 from 0xffffffc080000000
> > PHYS_OFFSET: 0x40000000
> > CPU features: 0x0e00,000002e0,01202650,8200720b
> > Memory Limit: none
> > Rebooting in 30 seconds..
> >
> >
> >
> > Thanks
> > ChenYu
> >
> >> Mem abort info:
> >> ESR = 0x0000000096000004
> >> EC = 0x25: DABT (current EL), IL = 32 bits
> >> SET = 0, FnV = 0
> >> EA = 0, S1PTW = 0
> >> FSC = 0x04: level 0 translation fault
> >> Data abort info:
> >> ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000
> >> CM = 0, WnR = 0, TnD = 0, TagAccess = 0
> >> GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
> >> user pgtable: 4k pages, 48-bit VAs, pgdp=0000000101380000
> >> [0000000000000000] pgd=0000000000000000, p4d=0000000000000000
> >> Internal error: Oops: 0000000096000004 [#1] SMP
> >> Modules linked in: btusb btrtl mt6315_regulator btintel mtk_vcodec_dec
> >> btmtk v4l2_vp9 btbcm mtk_vcodec_enc v4l2_h264 mtk_vcodec_dbgfs
> >> bluetooth mtk_jpeg ecdh_generic mtk_vcodec_common mtk_jpeg_enc_hw
> >> mtk_jpeg_dec_hw ecc v4l2_mem2mem rfkill videobuf2_dma_contig
> >> videobuf2_memops videobuf2_v4l2 videodev videobuf2_common mc
> >> mt6360_charger mcp251xfd it5205 goodix_ts can_dev panfrost
> >> drm_shmem_helper gpu_sched lvts_thermal mtk_svs mtk_adsp_mailbox
> >> snd_soc_dmic mtk_scp mtk_rpmsg mtk_scp_ipi mtk_cmdq_mailbox
> >> mediatek_cpufreq_hw spmi_mtk_pmif fuse dm_mod ip_tables x_tables
> >> ipv6 mediatek_drm tcpci_mt6360 drm_display_helper drm_client_lib
> >> tcpci drm_dma_helper drm_kms_helper tcpm drm mtk_mutex mtk_mmsys
> >> typec rtc_mt6397 mtk_cmdq_helper phy_mtk_pcie pcie_mediatek_gen3
> >> dwmac_mediatek stmmac_platform stmmac pwm_mtk_disp pcs_xpcs pwm_bl
> >> backlight
> >> CPU: 5 UID: 0 PID: 12 Comm: kworker/u32:0 Not tainted 6.14.0-next-20250326 #4 PREEMPT
> >> Hardware name: MediaTek Genio 1200 EVK-P1V2-EMMC (DT)
> >> Workqueue: events_unbound deferred_probe_work_func
> >> pstate: 80400009 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
> >> pc : mtk_iommu_device_group+0x2c/0xe0
> >> lr : __iommu_probe_device+0x130/0x490
> >> sp : ffff8000827b3970
> >> x29: ffff8000827b3970 x28: ffff0000c0028000 x27: ffff80008164f2b8
> >> x26: ffff0000cbc47130 x25: ffff80008096a9f0 x24: ffff80008164f5a8
> >> x23: ffff800082684b60 x22: ffff8000827b3a80 x21: ffff0000cbc47130
> >> x20: ffffffffffffffed x19: ffff0000c117c010 x18: 0000000000000000
> >> x17: 6f702d616d642d64 x16: 6574636972747365 x15: 0000000000000002
> >> x14: 0000000000000000 x13: 0000000000128d55 x12: 6f632d616d642e30
> >> x11: 0000000000000100 x10: 0000000000000001 x9 : 0000000000000220
> >> x8 : 0101010101010101 x7 : ffff0000c117c010 x6 : 306c766f2c727461
> >> x5 : 0000000000000000 x4 : ffff0000c10a2de8 x3 : ffff0000c10a2e70
> >> x2 : ffff0000c01a4600 x1 : 0000000000000000 x0 : ffff0000c65470c0
> >> Call trace:
> >> mtk_iommu_device_group+0x2c/0xe0 (P)
> >> __iommu_probe_device+0x130/0x490
> >> probe_iommu_group+0x3c/0x70
> >> bus_for_each_dev+0x7c/0xe0
> >> iommu_device_register+0xd8/0x22c
> >> mtk_iommu_probe+0x270/0x53c
> >> platform_probe+0x68/0xd8
> >> really_probe+0xbc/0x2c0
> >> __driver_probe_device+0x78/0x120
> >> driver_probe_device+0x3c/0x154
> >> __device_attach_driver+0xb8/0x140
> >> bus_for_each_drv+0x88/0xe8
> >> __device_attach+0xa0/0x190
> >> device_initial_probe+0x14/0x20
> >> bus_probe_device+0xb4/0xc0
> >> deferred_probe_work_func+0x90/0xc8
> >> process_one_work+0x148/0x284
> >> worker_thread+0x2cc/0x3cc
> >> kthread+0x12c/0x204
> >> ret_from_fork+0x10/0x20
> >> Code: b4000500 f9401c01 92800254 f9409821 (f9400035)
> >> ---[ end trace 0000000000000000 ]---
> >> ```
> >>
> >> I've tested this patch on Mediatek Genio 510-EVK and 1200-EVK boards
> >> with a kernel based on linux-next (tag: next-20250327).
> >> ---
> >> Changes in v2:
> >> - Fix goto label usage in device registration error case
> >> - Add review and test trailers
> >> - Link to v1: https://lore.kernel.org/r/20250327-fix-mtk-iommu-error-v1-1-df969158e752@collabora.com
> >> ---
> >> drivers/iommu/mtk_iommu.c | 26 +++++++++++++-------------
> >> 1 file changed, 13 insertions(+), 13 deletions(-)
> >>
> >> diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
> >> index 034b0e670384a24df10130cbbff95ce8e0bc092d..df98d0c65f5469c6803cd9d151c85ad855558cf5 100644
> >> --- a/drivers/iommu/mtk_iommu.c
> >> +++ b/drivers/iommu/mtk_iommu.c
> >> @@ -1372,15 +1372,6 @@ static int mtk_iommu_probe(struct platform_device *pdev)
> >> platform_set_drvdata(pdev, data);
> >> mutex_init(&data->mutex);
> >>
> >> - ret = iommu_device_sysfs_add(&data->iommu, dev, NULL,
> >> - "mtk-iommu.%pa", &ioaddr);
> >> - if (ret)
> >> - goto out_link_remove;
> >> -
> >> - ret = iommu_device_register(&data->iommu, &mtk_iommu_ops, dev);
> >> - if (ret)
> >> - goto out_sysfs_remove;
> >> -
> >> if (MTK_IOMMU_HAS_FLAG(data->plat_data, SHARE_PGTABLE)) {
> >> list_add_tail(&data->list, data->plat_data->hw_list);
> >> data->hw_list = data->plat_data->hw_list;
> >> @@ -1390,19 +1381,28 @@ static int mtk_iommu_probe(struct platform_device *pdev)
> >> data->hw_list = &data->hw_list_head;
> >> }
> >>
> >> + ret = iommu_device_sysfs_add(&data->iommu, dev, NULL,
> >> + "mtk-iommu.%pa", &ioaddr);
> >> + if (ret)
> >> + goto out_list_del;
> >> +
> >> + ret = iommu_device_register(&data->iommu, &mtk_iommu_ops, dev);
> >> + if (ret)
> >> + goto out_sysfs_remove;
> >> +
> >> if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) {
> >> ret = component_master_add_with_match(dev, &mtk_iommu_com_ops, match);
> >> if (ret)
> >> - goto out_list_del;
> >> + goto out_device_unregister;
> >> }
> >> return ret;
> >>
> >> -out_list_del:
> >> - list_del(&data->list);
> >> +out_device_unregister:
> >> iommu_device_unregister(&data->iommu);
> >> out_sysfs_remove:
> >> iommu_device_sysfs_remove(&data->iommu);
> >> -out_link_remove:
> >> +out_list_del:
> >> + list_del(&data->list);
> >> if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM))
> >> device_link_remove(data->smicomm_dev, dev);
> >> out_runtime_disable:
> >>
> >> ---
> >> base-commit: bc960e3e32c8b940c10b14557271355f66ae4db1
> >> change-id: 20250327-fix-mtk-iommu-error-af6ec347d057
> >>
> >> Best regards,
> >> --
> >> Louis-Alexis Eyraud <louisalexis.eyraud@collabora.com>
> >>
>
On Mon, Apr 7, 2025 at 8:38 PM Robin Murphy <robin.murphy@arm.com> wrote:
>
> On 2025-04-07 6:17 am, Chen-Yu Tsai wrote:
> > Hi,
> >
> > On Thu, Apr 3, 2025 at 6:24 PM Louis-Alexis Eyraud
> > <louisalexis.eyraud@collabora.com> wrote:
> >>
> >> Currently, mtk_iommu calls during probe iommu_device_register before
> >> the hw_list from driver data is initialized. Since iommu probing issue
> >> fix, it leads to NULL pointer dereference in mtk_iommu_device_group when
> >> hw_list is accessed with list_first_entry (not null safe).
> >>
> >> So, change the call order to ensure iommu_device_register is called
> >> after the driver data are initialized.
> >>
> >> Fixes: 9e3a2a643653 ("iommu/mediatek: Adapt sharing and non-sharing pgtable case")
> >> Fixes: bcb81ac6ae3c ("iommu: Get DT/ACPI parsing into the proper probe path")
> >> Reviewed-by: Yong Wu <yong.wu@mediatek.com>
> >> Tested-by: Chen-Yu Tsai <wenst@chromium.org> # MT8183 Juniper, MT8186 Tentacruel
> >> Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
> >> Tested-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
> >> Signed-off-by: Louis-Alexis Eyraud <louisalexis.eyraud@collabora.com>
> >> ---
> >> This patch fixes a NULL pointer dereference that occurs during the
> >> mtk_iommu driver probe and observed at least on several Mediatek Genio boards:
> >> ```
> >> Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
> >
> > This is a reminder to please land this and send to Linus ASAP.
> >
> > This fixes the v6.15-rc1 kernel on all the MediaTek Chromebook platforms,
> > except for MT8188, which seems to have another issue in iommu_get_dma_domain()
> > used from the DRM driver:
> >
> > Disabling lock debugging due to kernel taint
> > Unable to handle kernel NULL pointer dereference at virtual
> > address 0000000000000158
>
> From the offset and the stacktrace code dump, this would appear to be
> the dereference of dev->iommu_group->default_domain, when
> dev->iommu_group is NULL (and CONFIG_DEBUG_LOCK_ALLOC makes the mutex
> really big). Which is a bit weird, as to get into iommu-dma at all in
> that state would suggest that whatever device this is has been removed
> and had its group torn down again after iommu_setup_dma_ops() has run...
> but either way that implies the DRM driver is passing an arbitrary
> device to the DMA API without making sure it's actualy valid.
>
> Trying to trace the provenance of dma_dev from mtk_gem_create() back
> through the rest of the driver is quite the rabbit-hole, but it seems
> like in at least one case it can lead back to an
> of_find_device_by_node() in ovl_adaptor_comp_init(), which definitely
> looks sufficiently sketchy.
It kind of makes sense since the "display controller" is composed of
many individual hardware blocks. The struct device tied to the DRM
driver is more or less just a place holder. Only the first block,
either the OVL (overlay compositing engine) or RDMA (scanout engine)
accesses memory, so I think it makes sense to use that as the dma_dev.
With some more logs, I did find something else fishy. Here the IOMMU
for the second display pipeline fails to probe:
mtk-iommu 1c028000.iommu: error -EINVAL: Failed to register IOMMU
mtk-iommu 1c028000.iommu: probe with driver mtk-iommu failed with error -22
Then later on, deferred probe times out, and the display pipeline is
brought up regardless:
mediatek-disp-ovl 1c000000.ovl: deferred probe timeout, ignoring dependency
mediatek-disp-ovl 1c000000.ovl: Adding to IOMMU failed: -110
mediatek-disp-rdma 1c002000.rdma: deferred probe timeout, ignoring
dependency
mediatek-disp-rdma 1c002000.rdma: Adding to IOMMU failed: -110
(repeats for all the individual components of the display pipeline)
mediatek-drm mediatek-drm.16.auto: bound 1c000000.ovl (ops
mtk_disp_ovl_component_ops)
mediatek-drm mediatek-drm.16.auto: bound 1c002000.rdma (ops
mtk_disp_rdma_component_ops)
(repeats for all the individual components of the display pipeline)
mediatek-drm mediatek-drm.16.auto: DMA device is 1c000000.ovl
[drm] Initialized mediatek 1.0.0 for mediatek-drm.16.auto on minor 1
And all without a functional IOMMU.
So I think this brings up two more questions:
1. Why is the IOMMU failing to probe?
2. Why is the core code still going the IOMMU DMA alloc path if there
is no usable IOMMU?
I'll look into the first question first. Insights welcome for the second
one.
Thanks
ChenYu
> Thanks,
> Robin.
>
> > Mem abort info:
> > ESR = 0x0000000096000005
> > EC = 0x25: DABT (current EL), IL = 32 bits
> > SET = 0, FnV = 0
> > EA = 0, S1PTW = 0
> > FSC = 0x05: level 1 translation fault
> > Data abort info:
> > ISV = 0, ISS = 0x00000005, ISS2 = 0x00000000
> > CM = 0, WnR = 0, TnD = 0, TagAccess = 0
> > GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
> > user pgtable: 4k pages, 39-bit VAs, pgdp=00000001185ab000
> > [0000000000000158] pgd=0000000000000000, p4d=0000000000000000,
> > pud=0000000000000000
> > Internal error: Oops: 0000000096000005 [#1] SMP
> > Modules linked in: mtk_vcodec_dec_hw mtk_vcodec_dec mtk_vcodec_enc
> > v4l2_vp9 v4l2_h264 mtk_vcodec_dbgfs mtk_jpeg mtk_vcodec_common
> > cros_ec_sensors mtk_jpeg_enc_hw cros_ec_sensors_core mtk_jpeg_dec_hw
> > mtk_vpu v4l2_mem2mem videobuf2_v4l2 snd_sof_mt8186
> > videobuf2_dma_contig snd_sof_xtensa_dsp sha1_ce videobuf2_memops
> > mtk_adsp_common mtk_scp videobuf2_common snd_sof_of mtk_rpmsg snd_sof
> > rpmsg_core cros_ec_sensorhub hid_google_hammer hid_vivaldi_common
> > snd_sof_utils mtk_scp_ipi fuse
> > CPU: 6 UID: 0 PID: 12 Comm: kworker/u32:0 Tainted: G B
> > 6.15.0-rc1-00001-gfed05d98b726 #628 PREEMPT
> > 06e695da6360d22824958738f9ba1c9b2416be19
> > Tainted: [B]=BAD_PAGE
> > Hardware name: Google Ciri sku2 board (DT)
> > Workqueue: events_unbound deferred_probe_work_func
> > pstate: 40400009 (nZcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
> > pc : iommu_get_dma_domain+0x30/0x58
> > lr : iommu_get_dma_domain+0x30/0x58
> > sp : ffffffc080186ee0
> > x29: ffffffc080186ee0 x28: ffffff80c4c59028 x27: 0000000000000000
> > x26: 0000000000000014 x25: 0068000000000f0b x24: ffffff80c4c5ca28
> > x23: 00000000008ca000 x22: 1ffffff810030df8 x21: ffffff80c20ea010
> > x20: ffffff80c20ea010 x19: 0000000000000000 x18: 0000000000000000
> > x17: 3d3d3d3d3d3d3d3d x16: 3d3d3d3d3d3d3d3d x15: 0720072007200720
> > x14: 0720072007200720 x13: ffffff80c0a1bb80 x12: ffffffbae2ca45e9
> > x11: 1ffffffae2ca45e8 x10: ffffffbae2ca45e8 x9 : dfffffc000000000
> > x8 : 000000451d35ba18 x7 : ffffffd716522f47 x6 : 0000000000000001
> > x5 : ffffffd716522f40 x4 : ffffffbae2ca45e9 x3 : ffffffd713717e6c
> > x2 : 0000000000000001 x1 : ffffff80c0941dc0 x0 : 0000000000000001
> > Call trace:
> > iommu_get_dma_domain+0x30/0x58 (P)
> > __iommu_dma_alloc_noncontiguous+0x34/0x498
> > iommu_dma_alloc+0x2f0/0x3e0
> > dma_alloc_attrs+0x1b4/0x3b8
> > mtk_gem_create+0x124/0x170
> > mtk_gem_dumb_create+0x84/0x180
> > drm_mode_create_dumb+0xf8/0x128
> > drm_client_framebuffer_create+0x11c/0x240
> > drm_fbdev_dma_driver_fbdev_probe+0x154/0x5a0
> > __drm_fb_helper_initial_config_and_unlock+0x4f0/0x928
> > drm_fb_helper_initial_config+0x50/0x68
> > drm_fbdev_client_hotplug+0xc0/0x120
> > drm_client_register+0xa0/0x100
> > drm_fbdev_client_setup+0xc8/0x260
> > drm_client_setup+0x60/0xd0
> > mtk_drm_bind+0x4f0/0xaf0
> > try_to_bring_up_aggregate_device+0x258/0x2f0
> > __component_add+0x104/0x240
> > component_add+0x1c/0x38
> > mtk_disp_rdma_probe+0x180/0x260
> > platform_probe+0x98/0x128
> > really_probe+0x118/0x3c0
> > __driver_probe_device+0xc0/0x198
> > driver_probe_device+0x64/0x1f8
> > __device_attach_driver+0xf0/0x1b0
> > bus_for_each_drv+0xf4/0x178
> > __device_attach+0x120/0x240
> > device_initial_probe+0x1c/0x30
> > bus_probe_device+0xdc/0xe8
> > deferred_probe_work_func+0xec/0x140
> > process_one_work+0x428/0xa80
> > worker_thread+0x2c0/0x538
> > kthread+0x258/0x380
> > ret_from_fork+0x10/0x20
> > Code: 97e8f12c f9422673 91056260 97e8f129 (f940ae60)
> > ---[ end trace 0000000000000000 ]---
> > Kernel panic - not syncing: Oops: Fatal exception
> > SMP: stopping secondary CPUs
> > Kernel Offset: 0x1693600000 from 0xffffffc080000000
> > PHYS_OFFSET: 0x40000000
> > CPU features: 0x0e00,000002e0,01202650,8200720b
> > Memory Limit: none
> > Rebooting in 30 seconds..
> >
> >
> >
> > Thanks
> > ChenYu
> >
> >> Mem abort info:
> >> ESR = 0x0000000096000004
> >> EC = 0x25: DABT (current EL), IL = 32 bits
> >> SET = 0, FnV = 0
> >> EA = 0, S1PTW = 0
> >> FSC = 0x04: level 0 translation fault
> >> Data abort info:
> >> ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000
> >> CM = 0, WnR = 0, TnD = 0, TagAccess = 0
> >> GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
> >> user pgtable: 4k pages, 48-bit VAs, pgdp=0000000101380000
> >> [0000000000000000] pgd=0000000000000000, p4d=0000000000000000
> >> Internal error: Oops: 0000000096000004 [#1] SMP
> >> Modules linked in: btusb btrtl mt6315_regulator btintel mtk_vcodec_dec
> >> btmtk v4l2_vp9 btbcm mtk_vcodec_enc v4l2_h264 mtk_vcodec_dbgfs
> >> bluetooth mtk_jpeg ecdh_generic mtk_vcodec_common mtk_jpeg_enc_hw
> >> mtk_jpeg_dec_hw ecc v4l2_mem2mem rfkill videobuf2_dma_contig
> >> videobuf2_memops videobuf2_v4l2 videodev videobuf2_common mc
> >> mt6360_charger mcp251xfd it5205 goodix_ts can_dev panfrost
> >> drm_shmem_helper gpu_sched lvts_thermal mtk_svs mtk_adsp_mailbox
> >> snd_soc_dmic mtk_scp mtk_rpmsg mtk_scp_ipi mtk_cmdq_mailbox
> >> mediatek_cpufreq_hw spmi_mtk_pmif fuse dm_mod ip_tables x_tables
> >> ipv6 mediatek_drm tcpci_mt6360 drm_display_helper drm_client_lib
> >> tcpci drm_dma_helper drm_kms_helper tcpm drm mtk_mutex mtk_mmsys
> >> typec rtc_mt6397 mtk_cmdq_helper phy_mtk_pcie pcie_mediatek_gen3
> >> dwmac_mediatek stmmac_platform stmmac pwm_mtk_disp pcs_xpcs pwm_bl
> >> backlight
> >> CPU: 5 UID: 0 PID: 12 Comm: kworker/u32:0 Not tainted 6.14.0-next-20250326 #4 PREEMPT
> >> Hardware name: MediaTek Genio 1200 EVK-P1V2-EMMC (DT)
> >> Workqueue: events_unbound deferred_probe_work_func
> >> pstate: 80400009 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
> >> pc : mtk_iommu_device_group+0x2c/0xe0
> >> lr : __iommu_probe_device+0x130/0x490
> >> sp : ffff8000827b3970
> >> x29: ffff8000827b3970 x28: ffff0000c0028000 x27: ffff80008164f2b8
> >> x26: ffff0000cbc47130 x25: ffff80008096a9f0 x24: ffff80008164f5a8
> >> x23: ffff800082684b60 x22: ffff8000827b3a80 x21: ffff0000cbc47130
> >> x20: ffffffffffffffed x19: ffff0000c117c010 x18: 0000000000000000
> >> x17: 6f702d616d642d64 x16: 6574636972747365 x15: 0000000000000002
> >> x14: 0000000000000000 x13: 0000000000128d55 x12: 6f632d616d642e30
> >> x11: 0000000000000100 x10: 0000000000000001 x9 : 0000000000000220
> >> x8 : 0101010101010101 x7 : ffff0000c117c010 x6 : 306c766f2c727461
> >> x5 : 0000000000000000 x4 : ffff0000c10a2de8 x3 : ffff0000c10a2e70
> >> x2 : ffff0000c01a4600 x1 : 0000000000000000 x0 : ffff0000c65470c0
> >> Call trace:
> >> mtk_iommu_device_group+0x2c/0xe0 (P)
> >> __iommu_probe_device+0x130/0x490
> >> probe_iommu_group+0x3c/0x70
> >> bus_for_each_dev+0x7c/0xe0
> >> iommu_device_register+0xd8/0x22c
> >> mtk_iommu_probe+0x270/0x53c
> >> platform_probe+0x68/0xd8
> >> really_probe+0xbc/0x2c0
> >> __driver_probe_device+0x78/0x120
> >> driver_probe_device+0x3c/0x154
> >> __device_attach_driver+0xb8/0x140
> >> bus_for_each_drv+0x88/0xe8
> >> __device_attach+0xa0/0x190
> >> device_initial_probe+0x14/0x20
> >> bus_probe_device+0xb4/0xc0
> >> deferred_probe_work_func+0x90/0xc8
> >> process_one_work+0x148/0x284
> >> worker_thread+0x2cc/0x3cc
> >> kthread+0x12c/0x204
> >> ret_from_fork+0x10/0x20
> >> Code: b4000500 f9401c01 92800254 f9409821 (f9400035)
> >> ---[ end trace 0000000000000000 ]---
> >> ```
> >>
> >> I've tested this patch on Mediatek Genio 510-EVK and 1200-EVK boards
> >> with a kernel based on linux-next (tag: next-20250327).
> >> ---
> >> Changes in v2:
> >> - Fix goto label usage in device registration error case
> >> - Add review and test trailers
> >> - Link to v1: https://lore.kernel.org/r/20250327-fix-mtk-iommu-error-v1-1-df969158e752@collabora.com
> >> ---
> >> drivers/iommu/mtk_iommu.c | 26 +++++++++++++-------------
> >> 1 file changed, 13 insertions(+), 13 deletions(-)
> >>
> >> diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
> >> index 034b0e670384a24df10130cbbff95ce8e0bc092d..df98d0c65f5469c6803cd9d151c85ad855558cf5 100644
> >> --- a/drivers/iommu/mtk_iommu.c
> >> +++ b/drivers/iommu/mtk_iommu.c
> >> @@ -1372,15 +1372,6 @@ static int mtk_iommu_probe(struct platform_device *pdev)
> >> platform_set_drvdata(pdev, data);
> >> mutex_init(&data->mutex);
> >>
> >> - ret = iommu_device_sysfs_add(&data->iommu, dev, NULL,
> >> - "mtk-iommu.%pa", &ioaddr);
> >> - if (ret)
> >> - goto out_link_remove;
> >> -
> >> - ret = iommu_device_register(&data->iommu, &mtk_iommu_ops, dev);
> >> - if (ret)
> >> - goto out_sysfs_remove;
> >> -
> >> if (MTK_IOMMU_HAS_FLAG(data->plat_data, SHARE_PGTABLE)) {
> >> list_add_tail(&data->list, data->plat_data->hw_list);
> >> data->hw_list = data->plat_data->hw_list;
> >> @@ -1390,19 +1381,28 @@ static int mtk_iommu_probe(struct platform_device *pdev)
> >> data->hw_list = &data->hw_list_head;
> >> }
> >>
> >> + ret = iommu_device_sysfs_add(&data->iommu, dev, NULL,
> >> + "mtk-iommu.%pa", &ioaddr);
> >> + if (ret)
> >> + goto out_list_del;
> >> +
> >> + ret = iommu_device_register(&data->iommu, &mtk_iommu_ops, dev);
> >> + if (ret)
> >> + goto out_sysfs_remove;
> >> +
> >> if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) {
> >> ret = component_master_add_with_match(dev, &mtk_iommu_com_ops, match);
> >> if (ret)
> >> - goto out_list_del;
> >> + goto out_device_unregister;
> >> }
> >> return ret;
> >>
> >> -out_list_del:
> >> - list_del(&data->list);
> >> +out_device_unregister:
> >> iommu_device_unregister(&data->iommu);
> >> out_sysfs_remove:
> >> iommu_device_sysfs_remove(&data->iommu);
> >> -out_link_remove:
> >> +out_list_del:
> >> + list_del(&data->list);
> >> if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM))
> >> device_link_remove(data->smicomm_dev, dev);
> >> out_runtime_disable:
> >>
> >> ---
> >> base-commit: bc960e3e32c8b940c10b14557271355f66ae4db1
> >> change-id: 20250327-fix-mtk-iommu-error-af6ec347d057
> >>
> >> Best regards,
> >> --
> >> Louis-Alexis Eyraud <louisalexis.eyraud@collabora.com>
> >>
>
On 08/04/2025 5:46 am, Chen-Yu Tsai wrote:
> On Mon, Apr 7, 2025 at 8:38 PM Robin Murphy <robin.murphy@arm.com> wrote:
>>
>> On 2025-04-07 6:17 am, Chen-Yu Tsai wrote:
>>> Hi,
>>>
>>> On Thu, Apr 3, 2025 at 6:24 PM Louis-Alexis Eyraud
>>> <louisalexis.eyraud@collabora.com> wrote:
>>>>
>>>> Currently, mtk_iommu calls during probe iommu_device_register before
>>>> the hw_list from driver data is initialized. Since iommu probing issue
>>>> fix, it leads to NULL pointer dereference in mtk_iommu_device_group when
>>>> hw_list is accessed with list_first_entry (not null safe).
>>>>
>>>> So, change the call order to ensure iommu_device_register is called
>>>> after the driver data are initialized.
>>>>
>>>> Fixes: 9e3a2a643653 ("iommu/mediatek: Adapt sharing and non-sharing pgtable case")
>>>> Fixes: bcb81ac6ae3c ("iommu: Get DT/ACPI parsing into the proper probe path")
>>>> Reviewed-by: Yong Wu <yong.wu@mediatek.com>
>>>> Tested-by: Chen-Yu Tsai <wenst@chromium.org> # MT8183 Juniper, MT8186 Tentacruel
>>>> Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
>>>> Tested-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
>>>> Signed-off-by: Louis-Alexis Eyraud <louisalexis.eyraud@collabora.com>
>>>> ---
>>>> This patch fixes a NULL pointer dereference that occurs during the
>>>> mtk_iommu driver probe and observed at least on several Mediatek Genio boards:
>>>> ```
>>>> Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
>>>
>>> This is a reminder to please land this and send to Linus ASAP.
>>>
>>> This fixes the v6.15-rc1 kernel on all the MediaTek Chromebook platforms,
>>> except for MT8188, which seems to have another issue in iommu_get_dma_domain()
>>> used from the DRM driver:
>>>
>>> Disabling lock debugging due to kernel taint
>>> Unable to handle kernel NULL pointer dereference at virtual
>>> address 0000000000000158
>>
>> From the offset and the stacktrace code dump, this would appear to be
>> the dereference of dev->iommu_group->default_domain, when
>> dev->iommu_group is NULL (and CONFIG_DEBUG_LOCK_ALLOC makes the mutex
>> really big). Which is a bit weird, as to get into iommu-dma at all in
>> that state would suggest that whatever device this is has been removed
>> and had its group torn down again after iommu_setup_dma_ops() has run...
>> but either way that implies the DRM driver is passing an arbitrary
>> device to the DMA API without making sure it's actualy valid.
>>
>> Trying to trace the provenance of dma_dev from mtk_gem_create() back
>> through the rest of the driver is quite the rabbit-hole, but it seems
>> like in at least one case it can lead back to an
>> of_find_device_by_node() in ovl_adaptor_comp_init(), which definitely
>> looks sufficiently sketchy.
>
> It kind of makes sense since the "display controller" is composed of
> many individual hardware blocks. The struct device tied to the DRM
> driver is more or less just a place holder. Only the first block,
> either the OVL (overlay compositing engine) or RDMA (scanout engine)
> accesses memory, so I think it makes sense to use that as the dma_dev.
I'm not disputing whether the choice of dma_dev is semantically
appropriate, I'm just saying that the method of pulling a struct device
reference out of the DT topology shows no *obvious* guarantee that that
specific device will have a driver bound and be validly configured
before that dma_dev can be used via any other path. Especially when it's
all happening off the back of another fake device created by the fake
DRM device itself.
Maybe there's some hidden magic in all the component stuff which makes
it work out fine, I don't know. This was just an observation since I
went looking for potential bugs and found something which at first
glance *looks* rather fragile, compared to, say, if mtk_mdp_rdma's own
probe() or bind() were to directly set itself as the DMA device.
> With some more logs, I did find something else fishy. Here the IOMMU
> for the second display pipeline fails to probe:
>
> mtk-iommu 1c028000.iommu: error -EINVAL: Failed to register IOMMU
> mtk-iommu 1c028000.iommu: probe with driver mtk-iommu failed with error -22
>
> Then later on, deferred probe times out, and the display pipeline is
> brought up regardless:
>
> mediatek-disp-ovl 1c000000.ovl: deferred probe timeout, ignoring dependency
> mediatek-disp-ovl 1c000000.ovl: Adding to IOMMU failed: -110
> mediatek-disp-rdma 1c002000.rdma: deferred probe timeout, ignoring
> dependency
> mediatek-disp-rdma 1c002000.rdma: Adding to IOMMU failed: -110
> (repeats for all the individual components of the display pipeline)
> mediatek-drm mediatek-drm.16.auto: bound 1c000000.ovl (ops
> mtk_disp_ovl_component_ops)
> mediatek-drm mediatek-drm.16.auto: bound 1c002000.rdma (ops
> mtk_disp_rdma_component_ops)
> (repeats for all the individual components of the display pipeline)
> mediatek-drm mediatek-drm.16.auto: DMA device is 1c000000.ovl
> [drm] Initialized mediatek 1.0.0 for mediatek-drm.16.auto on minor 1
>
> And all without a functional IOMMU.
Ah, that I was not expecting, and it does indeed explain how things get
into that state.
> So I think this brings up two more questions:
>
> 1. Why is the IOMMU failing to probe?
> 2. Why is the core code still going the IOMMU DMA alloc path if there
> is no usable IOMMU?
>
> I'll look into the first question first. Insights welcome for the second
> one.
I had a think about it, and although there's very much a historical mess
in this area, I'm inclined to agree that we could do better. Patch sent.
Cheers,
Robin.
On Tue, Apr 8, 2025 at 12:46 PM Chen-Yu Tsai <wenst@chromium.org> wrote:
>
> On Mon, Apr 7, 2025 at 8:38 PM Robin Murphy <robin.murphy@arm.com> wrote:
> >
> > On 2025-04-07 6:17 am, Chen-Yu Tsai wrote:
> > > Hi,
> > >
> > > On Thu, Apr 3, 2025 at 6:24 PM Louis-Alexis Eyraud
> > > <louisalexis.eyraud@collabora.com> wrote:
> > >>
> > >> Currently, mtk_iommu calls during probe iommu_device_register before
> > >> the hw_list from driver data is initialized. Since iommu probing issue
> > >> fix, it leads to NULL pointer dereference in mtk_iommu_device_group when
> > >> hw_list is accessed with list_first_entry (not null safe).
> > >>
> > >> So, change the call order to ensure iommu_device_register is called
> > >> after the driver data are initialized.
> > >>
> > >> Fixes: 9e3a2a643653 ("iommu/mediatek: Adapt sharing and non-sharing pgtable case")
> > >> Fixes: bcb81ac6ae3c ("iommu: Get DT/ACPI parsing into the proper probe path")
> > >> Reviewed-by: Yong Wu <yong.wu@mediatek.com>
> > >> Tested-by: Chen-Yu Tsai <wenst@chromium.org> # MT8183 Juniper, MT8186 Tentacruel
> > >> Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
> > >> Tested-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
> > >> Signed-off-by: Louis-Alexis Eyraud <louisalexis.eyraud@collabora.com>
> > >> ---
> > >> This patch fixes a NULL pointer dereference that occurs during the
> > >> mtk_iommu driver probe and observed at least on several Mediatek Genio boards:
> > >> ```
> > >> Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
> > >
> > > This is a reminder to please land this and send to Linus ASAP.
> > >
> > > This fixes the v6.15-rc1 kernel on all the MediaTek Chromebook platforms,
> > > except for MT8188, which seems to have another issue in iommu_get_dma_domain()
> > > used from the DRM driver:
> > >
> > > Disabling lock debugging due to kernel taint
> > > Unable to handle kernel NULL pointer dereference at virtual
> > > address 0000000000000158
> >
> > From the offset and the stacktrace code dump, this would appear to be
> > the dereference of dev->iommu_group->default_domain, when
> > dev->iommu_group is NULL (and CONFIG_DEBUG_LOCK_ALLOC makes the mutex
> > really big). Which is a bit weird, as to get into iommu-dma at all in
> > that state would suggest that whatever device this is has been removed
> > and had its group torn down again after iommu_setup_dma_ops() has run...
> > but either way that implies the DRM driver is passing an arbitrary
> > device to the DMA API without making sure it's actualy valid.
> >
> > Trying to trace the provenance of dma_dev from mtk_gem_create() back
> > through the rest of the driver is quite the rabbit-hole, but it seems
> > like in at least one case it can lead back to an
> > of_find_device_by_node() in ovl_adaptor_comp_init(), which definitely
> > looks sufficiently sketchy.
>
> It kind of makes sense since the "display controller" is composed of
> many individual hardware blocks. The struct device tied to the DRM
> driver is more or less just a place holder. Only the first block,
> either the OVL (overlay compositing engine) or RDMA (scanout engine)
> accesses memory, so I think it makes sense to use that as the dma_dev.
>
> With some more logs, I did find something else fishy. Here the IOMMU
> for the second display pipeline fails to probe:
>
> mtk-iommu 1c028000.iommu: error -EINVAL: Failed to register IOMMU
> mtk-iommu 1c028000.iommu: probe with driver mtk-iommu failed with error -22
This looks like it's caused by an incorrect IOMMU description for this
device. It's referencing the wrong IOMMU device for the port it is tied
to.
I'll send a patch for this.
> Then later on, deferred probe times out, and the display pipeline is
> brought up regardless:
>
> mediatek-disp-ovl 1c000000.ovl: deferred probe timeout, ignoring dependency
> mediatek-disp-ovl 1c000000.ovl: Adding to IOMMU failed: -110
> mediatek-disp-rdma 1c002000.rdma: deferred probe timeout, ignoring
> dependency
> mediatek-disp-rdma 1c002000.rdma: Adding to IOMMU failed: -110
> (repeats for all the individual components of the display pipeline)
> mediatek-drm mediatek-drm.16.auto: bound 1c000000.ovl (ops
> mtk_disp_ovl_component_ops)
> mediatek-drm mediatek-drm.16.auto: bound 1c002000.rdma (ops
> mtk_disp_rdma_component_ops)
> (repeats for all the individual components of the display pipeline)
> mediatek-drm mediatek-drm.16.auto: DMA device is 1c000000.ovl
> [drm] Initialized mediatek 1.0.0 for mediatek-drm.16.auto on minor 1
>
> And all without a functional IOMMU.
>
> So I think this brings up two more questions:
>
> 1. Why is the IOMMU failing to probe?
> 2. Why is the core code still going the IOMMU DMA alloc path if there
> is no usable IOMMU?
>
> I'll look into the first question first. Insights welcome for the second
> one.
>
>
> Thanks
> ChenYu
>
>
> > Thanks,
> > Robin.
> >
> > > Mem abort info:
> > > ESR = 0x0000000096000005
> > > EC = 0x25: DABT (current EL), IL = 32 bits
> > > SET = 0, FnV = 0
> > > EA = 0, S1PTW = 0
> > > FSC = 0x05: level 1 translation fault
> > > Data abort info:
> > > ISV = 0, ISS = 0x00000005, ISS2 = 0x00000000
> > > CM = 0, WnR = 0, TnD = 0, TagAccess = 0
> > > GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
> > > user pgtable: 4k pages, 39-bit VAs, pgdp=00000001185ab000
> > > [0000000000000158] pgd=0000000000000000, p4d=0000000000000000,
> > > pud=0000000000000000
> > > Internal error: Oops: 0000000096000005 [#1] SMP
> > > Modules linked in: mtk_vcodec_dec_hw mtk_vcodec_dec mtk_vcodec_enc
> > > v4l2_vp9 v4l2_h264 mtk_vcodec_dbgfs mtk_jpeg mtk_vcodec_common
> > > cros_ec_sensors mtk_jpeg_enc_hw cros_ec_sensors_core mtk_jpeg_dec_hw
> > > mtk_vpu v4l2_mem2mem videobuf2_v4l2 snd_sof_mt8186
> > > videobuf2_dma_contig snd_sof_xtensa_dsp sha1_ce videobuf2_memops
> > > mtk_adsp_common mtk_scp videobuf2_common snd_sof_of mtk_rpmsg snd_sof
> > > rpmsg_core cros_ec_sensorhub hid_google_hammer hid_vivaldi_common
> > > snd_sof_utils mtk_scp_ipi fuse
> > > CPU: 6 UID: 0 PID: 12 Comm: kworker/u32:0 Tainted: G B
> > > 6.15.0-rc1-00001-gfed05d98b726 #628 PREEMPT
> > > 06e695da6360d22824958738f9ba1c9b2416be19
> > > Tainted: [B]=BAD_PAGE
> > > Hardware name: Google Ciri sku2 board (DT)
> > > Workqueue: events_unbound deferred_probe_work_func
> > > pstate: 40400009 (nZcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
> > > pc : iommu_get_dma_domain+0x30/0x58
> > > lr : iommu_get_dma_domain+0x30/0x58
> > > sp : ffffffc080186ee0
> > > x29: ffffffc080186ee0 x28: ffffff80c4c59028 x27: 0000000000000000
> > > x26: 0000000000000014 x25: 0068000000000f0b x24: ffffff80c4c5ca28
> > > x23: 00000000008ca000 x22: 1ffffff810030df8 x21: ffffff80c20ea010
> > > x20: ffffff80c20ea010 x19: 0000000000000000 x18: 0000000000000000
> > > x17: 3d3d3d3d3d3d3d3d x16: 3d3d3d3d3d3d3d3d x15: 0720072007200720
> > > x14: 0720072007200720 x13: ffffff80c0a1bb80 x12: ffffffbae2ca45e9
> > > x11: 1ffffffae2ca45e8 x10: ffffffbae2ca45e8 x9 : dfffffc000000000
> > > x8 : 000000451d35ba18 x7 : ffffffd716522f47 x6 : 0000000000000001
> > > x5 : ffffffd716522f40 x4 : ffffffbae2ca45e9 x3 : ffffffd713717e6c
> > > x2 : 0000000000000001 x1 : ffffff80c0941dc0 x0 : 0000000000000001
> > > Call trace:
> > > iommu_get_dma_domain+0x30/0x58 (P)
> > > __iommu_dma_alloc_noncontiguous+0x34/0x498
> > > iommu_dma_alloc+0x2f0/0x3e0
> > > dma_alloc_attrs+0x1b4/0x3b8
> > > mtk_gem_create+0x124/0x170
> > > mtk_gem_dumb_create+0x84/0x180
> > > drm_mode_create_dumb+0xf8/0x128
> > > drm_client_framebuffer_create+0x11c/0x240
> > > drm_fbdev_dma_driver_fbdev_probe+0x154/0x5a0
> > > __drm_fb_helper_initial_config_and_unlock+0x4f0/0x928
> > > drm_fb_helper_initial_config+0x50/0x68
> > > drm_fbdev_client_hotplug+0xc0/0x120
> > > drm_client_register+0xa0/0x100
> > > drm_fbdev_client_setup+0xc8/0x260
> > > drm_client_setup+0x60/0xd0
> > > mtk_drm_bind+0x4f0/0xaf0
> > > try_to_bring_up_aggregate_device+0x258/0x2f0
> > > __component_add+0x104/0x240
> > > component_add+0x1c/0x38
> > > mtk_disp_rdma_probe+0x180/0x260
> > > platform_probe+0x98/0x128
> > > really_probe+0x118/0x3c0
> > > __driver_probe_device+0xc0/0x198
> > > driver_probe_device+0x64/0x1f8
> > > __device_attach_driver+0xf0/0x1b0
> > > bus_for_each_drv+0xf4/0x178
> > > __device_attach+0x120/0x240
> > > device_initial_probe+0x1c/0x30
> > > bus_probe_device+0xdc/0xe8
> > > deferred_probe_work_func+0xec/0x140
> > > process_one_work+0x428/0xa80
> > > worker_thread+0x2c0/0x538
> > > kthread+0x258/0x380
> > > ret_from_fork+0x10/0x20
> > > Code: 97e8f12c f9422673 91056260 97e8f129 (f940ae60)
> > > ---[ end trace 0000000000000000 ]---
> > > Kernel panic - not syncing: Oops: Fatal exception
> > > SMP: stopping secondary CPUs
> > > Kernel Offset: 0x1693600000 from 0xffffffc080000000
> > > PHYS_OFFSET: 0x40000000
> > > CPU features: 0x0e00,000002e0,01202650,8200720b
> > > Memory Limit: none
> > > Rebooting in 30 seconds..
> > >
> > >
> > >
> > > Thanks
> > > ChenYu
> > >
> > >> Mem abort info:
> > >> ESR = 0x0000000096000004
> > >> EC = 0x25: DABT (current EL), IL = 32 bits
> > >> SET = 0, FnV = 0
> > >> EA = 0, S1PTW = 0
> > >> FSC = 0x04: level 0 translation fault
> > >> Data abort info:
> > >> ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000
> > >> CM = 0, WnR = 0, TnD = 0, TagAccess = 0
> > >> GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
> > >> user pgtable: 4k pages, 48-bit VAs, pgdp=0000000101380000
> > >> [0000000000000000] pgd=0000000000000000, p4d=0000000000000000
> > >> Internal error: Oops: 0000000096000004 [#1] SMP
> > >> Modules linked in: btusb btrtl mt6315_regulator btintel mtk_vcodec_dec
> > >> btmtk v4l2_vp9 btbcm mtk_vcodec_enc v4l2_h264 mtk_vcodec_dbgfs
> > >> bluetooth mtk_jpeg ecdh_generic mtk_vcodec_common mtk_jpeg_enc_hw
> > >> mtk_jpeg_dec_hw ecc v4l2_mem2mem rfkill videobuf2_dma_contig
> > >> videobuf2_memops videobuf2_v4l2 videodev videobuf2_common mc
> > >> mt6360_charger mcp251xfd it5205 goodix_ts can_dev panfrost
> > >> drm_shmem_helper gpu_sched lvts_thermal mtk_svs mtk_adsp_mailbox
> > >> snd_soc_dmic mtk_scp mtk_rpmsg mtk_scp_ipi mtk_cmdq_mailbox
> > >> mediatek_cpufreq_hw spmi_mtk_pmif fuse dm_mod ip_tables x_tables
> > >> ipv6 mediatek_drm tcpci_mt6360 drm_display_helper drm_client_lib
> > >> tcpci drm_dma_helper drm_kms_helper tcpm drm mtk_mutex mtk_mmsys
> > >> typec rtc_mt6397 mtk_cmdq_helper phy_mtk_pcie pcie_mediatek_gen3
> > >> dwmac_mediatek stmmac_platform stmmac pwm_mtk_disp pcs_xpcs pwm_bl
> > >> backlight
> > >> CPU: 5 UID: 0 PID: 12 Comm: kworker/u32:0 Not tainted 6.14.0-next-20250326 #4 PREEMPT
> > >> Hardware name: MediaTek Genio 1200 EVK-P1V2-EMMC (DT)
> > >> Workqueue: events_unbound deferred_probe_work_func
> > >> pstate: 80400009 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
> > >> pc : mtk_iommu_device_group+0x2c/0xe0
> > >> lr : __iommu_probe_device+0x130/0x490
> > >> sp : ffff8000827b3970
> > >> x29: ffff8000827b3970 x28: ffff0000c0028000 x27: ffff80008164f2b8
> > >> x26: ffff0000cbc47130 x25: ffff80008096a9f0 x24: ffff80008164f5a8
> > >> x23: ffff800082684b60 x22: ffff8000827b3a80 x21: ffff0000cbc47130
> > >> x20: ffffffffffffffed x19: ffff0000c117c010 x18: 0000000000000000
> > >> x17: 6f702d616d642d64 x16: 6574636972747365 x15: 0000000000000002
> > >> x14: 0000000000000000 x13: 0000000000128d55 x12: 6f632d616d642e30
> > >> x11: 0000000000000100 x10: 0000000000000001 x9 : 0000000000000220
> > >> x8 : 0101010101010101 x7 : ffff0000c117c010 x6 : 306c766f2c727461
> > >> x5 : 0000000000000000 x4 : ffff0000c10a2de8 x3 : ffff0000c10a2e70
> > >> x2 : ffff0000c01a4600 x1 : 0000000000000000 x0 : ffff0000c65470c0
> > >> Call trace:
> > >> mtk_iommu_device_group+0x2c/0xe0 (P)
> > >> __iommu_probe_device+0x130/0x490
> > >> probe_iommu_group+0x3c/0x70
> > >> bus_for_each_dev+0x7c/0xe0
> > >> iommu_device_register+0xd8/0x22c
> > >> mtk_iommu_probe+0x270/0x53c
> > >> platform_probe+0x68/0xd8
> > >> really_probe+0xbc/0x2c0
> > >> __driver_probe_device+0x78/0x120
> > >> driver_probe_device+0x3c/0x154
> > >> __device_attach_driver+0xb8/0x140
> > >> bus_for_each_drv+0x88/0xe8
> > >> __device_attach+0xa0/0x190
> > >> device_initial_probe+0x14/0x20
> > >> bus_probe_device+0xb4/0xc0
> > >> deferred_probe_work_func+0x90/0xc8
> > >> process_one_work+0x148/0x284
> > >> worker_thread+0x2cc/0x3cc
> > >> kthread+0x12c/0x204
> > >> ret_from_fork+0x10/0x20
> > >> Code: b4000500 f9401c01 92800254 f9409821 (f9400035)
> > >> ---[ end trace 0000000000000000 ]---
> > >> ```
> > >>
> > >> I've tested this patch on Mediatek Genio 510-EVK and 1200-EVK boards
> > >> with a kernel based on linux-next (tag: next-20250327).
> > >> ---
> > >> Changes in v2:
> > >> - Fix goto label usage in device registration error case
> > >> - Add review and test trailers
> > >> - Link to v1: https://lore.kernel.org/r/20250327-fix-mtk-iommu-error-v1-1-df969158e752@collabora.com
> > >> ---
> > >> drivers/iommu/mtk_iommu.c | 26 +++++++++++++-------------
> > >> 1 file changed, 13 insertions(+), 13 deletions(-)
> > >>
> > >> diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
> > >> index 034b0e670384a24df10130cbbff95ce8e0bc092d..df98d0c65f5469c6803cd9d151c85ad855558cf5 100644
> > >> --- a/drivers/iommu/mtk_iommu.c
> > >> +++ b/drivers/iommu/mtk_iommu.c
> > >> @@ -1372,15 +1372,6 @@ static int mtk_iommu_probe(struct platform_device *pdev)
> > >> platform_set_drvdata(pdev, data);
> > >> mutex_init(&data->mutex);
> > >>
> > >> - ret = iommu_device_sysfs_add(&data->iommu, dev, NULL,
> > >> - "mtk-iommu.%pa", &ioaddr);
> > >> - if (ret)
> > >> - goto out_link_remove;
> > >> -
> > >> - ret = iommu_device_register(&data->iommu, &mtk_iommu_ops, dev);
> > >> - if (ret)
> > >> - goto out_sysfs_remove;
> > >> -
> > >> if (MTK_IOMMU_HAS_FLAG(data->plat_data, SHARE_PGTABLE)) {
> > >> list_add_tail(&data->list, data->plat_data->hw_list);
> > >> data->hw_list = data->plat_data->hw_list;
> > >> @@ -1390,19 +1381,28 @@ static int mtk_iommu_probe(struct platform_device *pdev)
> > >> data->hw_list = &data->hw_list_head;
> > >> }
> > >>
> > >> + ret = iommu_device_sysfs_add(&data->iommu, dev, NULL,
> > >> + "mtk-iommu.%pa", &ioaddr);
> > >> + if (ret)
> > >> + goto out_list_del;
> > >> +
> > >> + ret = iommu_device_register(&data->iommu, &mtk_iommu_ops, dev);
> > >> + if (ret)
> > >> + goto out_sysfs_remove;
> > >> +
> > >> if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) {
> > >> ret = component_master_add_with_match(dev, &mtk_iommu_com_ops, match);
> > >> if (ret)
> > >> - goto out_list_del;
> > >> + goto out_device_unregister;
> > >> }
> > >> return ret;
> > >>
> > >> -out_list_del:
> > >> - list_del(&data->list);
> > >> +out_device_unregister:
> > >> iommu_device_unregister(&data->iommu);
> > >> out_sysfs_remove:
> > >> iommu_device_sysfs_remove(&data->iommu);
> > >> -out_link_remove:
> > >> +out_list_del:
> > >> + list_del(&data->list);
> > >> if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM))
> > >> device_link_remove(data->smicomm_dev, dev);
> > >> out_runtime_disable:
> > >>
> > >> ---
> > >> base-commit: bc960e3e32c8b940c10b14557271355f66ae4db1
> > >> change-id: 20250327-fix-mtk-iommu-error-af6ec347d057
> > >>
> > >> Best regards,
> > >> --
> > >> Louis-Alexis Eyraud <louisalexis.eyraud@collabora.com>
> > >>
> >
© 2016 - 2026 Red Hat, Inc.