Add a driver for the DesignWare Mobile Storage Host Controller (DWCMSHC)
SDHCI controller found in Black Sesame Technologies C1200 SoCs.
The driver provides specialized clock configuration, tuning, voltage
switching, and power management for the BST DWCMSHC controller. It also
includes support for eMMC boot and memory-mapped I/O for CRM registers.
---
Changes for v2:
1. Dependency Simplification :
- Removed COMMON_CLK dependency from Kconfig (MMC_SDHCI_BST)
- Add ARCH_BST || COMPILE_TEST dependency from Kconfig (MMC_SDHCI_BST)
2. Resource Management Improvements :
- Replaced temporary ioremap with persistent mapping
* Mapped CRM registers once during probe instead of per-access
* Added proper cleanup in remove callback
- Refactored bounce buffer allocation:
* Simplified error handling and memory management
* Removed unnecessary DMA configuration layers
3. Code Cleanup & Optimization :
- Pruned unused headers and legacy vendor debug code
- Removed deprecated sdhci_bst_print_vendor() export
- Converted internal functions to static scope
- Standardized naming conventions:
* Renamed DRIVER_NAME to match kernel standards
* Changed default_max_freq to DEFAULT_MAX_FREQ
- Optimized clock configuration routines
4. Hardware Integration Fixes :
- Fixed register access macros for EMMC_CTRL
* Added proper offset calculation via SDHCI_VENDOR_PTR_R
- Corrected device tree compatibility string to:
"bst,c1200-dwcmshc-sdhci"
5. Error Handling Enhancements :
- Added robust ioremap error checking
- Improved bounce buffer allocation failure handling
- Streamlined probe/remove flow
6. Maintainability :
- Updated MODULE_DESCRIPTION and AUTHOR fields
- Added explanatory comments for hardware limitations
- Removed redundant multi-host setup infrastructure
7. fix build warnings from lkp
| Reported-by: kernel test robot <lkp@intel.com>
| Closes:
https://lore.kernel.org/oe-kbuild-all/202505290615.GZzN5rNL-lkp@intel.com/
Signed-off-by: Albert Yang <yangzh0906@thundersoft.com>
Signed-off-by: Ge Gordon <gordon.ge@bst.ai>
---
drivers/mmc/host/Kconfig | 11 +
drivers/mmc/host/Makefile | 1 +
drivers/mmc/host/sdhci-of-bst-c1200.c | 557 ++++++++++++++++++++++++++
3 files changed, 569 insertions(+)
create mode 100644 drivers/mmc/host/sdhci-of-bst-c1200.c
diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index c3f0f41a426d..a93ea150dcbf 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -1126,3 +1126,14 @@ config MMC_LITEX
module will be called litex_mmc.
If unsure, say N.
+
+config MMC_SDHCI_BST
+ tristate "SDHCI OF support for the BST DWC MSHC"
+ depends on ARCH_BST || COMPILE_TEST
+ depends on MMC_SDHCI_PLTFM
+ depends on OF
+ help
+ This selects Synopsys DesignWare Cores Mobile Storage Controller
+ support.
+ If you have a controller with this interface, say Y or M here.
+ If unsure, say N.
diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile
index 75bafc7b162b..bb5df05c3174 100644
--- a/drivers/mmc/host/Makefile
+++ b/drivers/mmc/host/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_MMC_MXS) += mxs-mmc.o
obj-$(CONFIG_MMC_SDHCI) += sdhci.o
obj-$(CONFIG_MMC_SDHCI_UHS2) += sdhci-uhs2.o
obj-$(CONFIG_MMC_SDHCI_PCI) += sdhci-pci.o
+obj-$(CONFIG_MMC_SDHCI_BST) += sdhci-of-bst-c1200.o
sdhci-pci-y += sdhci-pci-core.o sdhci-pci-o2micro.o sdhci-pci-arasan.o \
sdhci-pci-dwc-mshc.o sdhci-pci-gli.o
obj-$(CONFIG_MMC_SDHCI_ACPI) += sdhci-acpi.o
diff --git a/drivers/mmc/host/sdhci-of-bst-c1200.c b/drivers/mmc/host/sdhci-of-bst-c1200.c
new file mode 100644
index 000000000000..233ad959e6e5
--- /dev/null
+++ b/drivers/mmc/host/sdhci-of-bst-c1200.c
@@ -0,0 +1,557 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Black Sesame Technologies SDHCI driver
+ *
+ * Copyright (C) 2024 Black Sesame Technologies. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/ioport.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_reserved_mem.h>
+#include <linux/platform_device.h>
+#include "sdhci.h"
+#include "sdhci-pltfm.h"
+
+struct dwcmshc_priv {
+ void __iomem *crm_reg_base;
+ u32 phy_crm_reg_base;
+ u32 phy_crm_reg_size;
+};
+
+#define SDHCI_CLOCK_PLL_EN 0x0008
+#define SDHCI_TUNING_COUNT 0x20
+#define SDHCI_VENDOR_PTR_R 0xE8
+#define MBIU_CTRL 0x510
+#define BURST_INCR16_EN BIT(3)
+#define BURST_INCR8_EN BIT(2)
+#define BURST_INCR4_EN BIT(1)
+#define BURST_EN (BURST_INCR16_EN | BURST_INCR8_EN | BURST_INCR4_EN)
+
+/* Synopsys vendor specific registers */
+#define SDHC_EMMC_CTRL_R_OFFSET 0x2C
+
+#define SDEMMC_CRM_BCLK_DIV_CTRL 0x08
+#define SDEMMC_CRM_RX_CLK_CTRL 0x14
+#define SDEMMC_CRM_TIMER_DIV_CTRL 0x0C
+#define SDEMMC_CRM_VOL_CTRL 0x1C
+#define REG_WR_PROTECT 0x88
+#define REG_WR_PROTECT_KEY 0x1234abcd
+#define DELAY_CHAIN_SEL 0x94
+#define BST_VOL_STABLE_ON BIT(7)
+#define DEFAULT_MAX_FREQ 200000UL
+
+static u32 bst_read_phys_bst(void __iomem *addr)
+{
+ return ioread32(addr);
+}
+
+static void bst_write_phys_bst(void __iomem *addr, u32 value)
+{
+ iowrite32(value, addr);
+}
+
+static unsigned int bst_get_max_clock(struct sdhci_host *host)
+{
+ return host->mmc->f_max;
+}
+
+static unsigned int bst_get_min_clock(struct sdhci_host *host)
+{
+ return host->mmc->f_min;
+}
+
+struct rx_ctrl {
+ struct {
+ u32 rx_revert:1;
+ u32 rx_clk_sel_sec:1;
+ u32 rx_clk_div:4;
+ u32 rx_clk_phase_inner:2;
+ u32 rx_clk_sel_first:1;
+ u32 rx_clk_phase_out:2;
+ u32 rx_clk_en:1;
+ u32 res0:20;
+ } bit;
+ u32 reg;
+};
+
+struct sdmmc_iocfg {
+ struct {
+ u32 res0:16;
+ u32 SC_SDMMC0_PVDD18POCSD0:2;
+ u32 SC_SDMMC0_PVDD18POCSD1:2;
+ u32 SC_SDMMC0_PVDD18POCSD2:2;
+ u32 SC_SDMMC1_PVDD18POCSD0:2;
+ u32 SC_SDMMC1_PVDD18POCSD1:2;
+ u32 SC_SDMMC1_PVDD18POCSD2:2;
+ u32 res1:4;
+ } bit;
+ u32 reg;
+};
+
+static void sdhci_enable_bst_clk(struct sdhci_host *host, unsigned int clk)
+{
+ struct sdhci_pltfm_host *pltfm_host;
+ struct dwcmshc_priv *priv;
+ unsigned int div;
+ u32 val;
+ struct rx_ctrl rx_reg;
+
+ pltfm_host = sdhci_priv(host);
+ priv = sdhci_pltfm_priv(pltfm_host);
+ if (clk == 0) {
+ div = clk;
+ } else if (clk > DEFAULT_MAX_FREQ) {
+ div = clk / 1000;
+ div = DEFAULT_MAX_FREQ / div;
+ } else if (clk < 1500) {
+ div = clk;
+ } else {
+ div = DEFAULT_MAX_FREQ * 100;
+ div = div / clk;
+ div /= 100;
+ }
+
+ clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
+ clk &= ~SDHCI_CLOCK_CARD_EN;
+ sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL);
+
+ clk &= ~SDHCI_CLOCK_PLL_EN;
+ sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL);
+
+ val = bst_read_phys_bst(priv->crm_reg_base + SDEMMC_CRM_TIMER_DIV_CTRL);
+ val &= ~BIT(8);
+ bst_write_phys_bst(priv->crm_reg_base + SDEMMC_CRM_TIMER_DIV_CTRL, val);
+
+ val = bst_read_phys_bst(priv->crm_reg_base + SDEMMC_CRM_TIMER_DIV_CTRL);
+ val &= ~0xff;
+ val |= 0x20;
+ bst_write_phys_bst(priv->crm_reg_base + SDEMMC_CRM_TIMER_DIV_CTRL, val);
+
+ val = bst_read_phys_bst(priv->crm_reg_base + SDEMMC_CRM_TIMER_DIV_CTRL);
+ val |= BIT(8);
+ bst_write_phys_bst(priv->crm_reg_base + SDEMMC_CRM_TIMER_DIV_CTRL, val);
+
+ val = bst_read_phys_bst(priv->crm_reg_base + SDEMMC_CRM_RX_CLK_CTRL);
+ val &= ~BIT(11);
+ bst_write_phys_bst(priv->crm_reg_base + SDEMMC_CRM_RX_CLK_CTRL, val);
+
+ rx_reg.reg = bst_read_phys_bst(priv->crm_reg_base + SDEMMC_CRM_RX_CLK_CTRL);
+
+ rx_reg.bit.rx_revert = 0;
+ rx_reg.bit.rx_clk_sel_sec = 1;
+ rx_reg.bit.rx_clk_div = 4;
+ rx_reg.bit.rx_clk_phase_inner = 2;
+ rx_reg.bit.rx_clk_sel_first = 0;
+ rx_reg.bit.rx_clk_phase_out = 2;
+
+ bst_write_phys_bst(priv->crm_reg_base + SDEMMC_CRM_RX_CLK_CTRL, rx_reg.reg);
+
+ val = bst_read_phys_bst(priv->crm_reg_base + SDEMMC_CRM_RX_CLK_CTRL);
+ val |= BIT(11);
+ bst_write_phys_bst(priv->crm_reg_base + SDEMMC_CRM_RX_CLK_CTRL, val);
+
+ /* Disable clock first */
+ val = bst_read_phys_bst(priv->crm_reg_base + SDEMMC_CRM_BCLK_DIV_CTRL);
+ val &= ~BIT(10);
+ bst_write_phys_bst(priv->crm_reg_base + SDEMMC_CRM_BCLK_DIV_CTRL, val);
+
+ /* Setup clock divider */
+ val = bst_read_phys_bst(priv->crm_reg_base + SDEMMC_CRM_BCLK_DIV_CTRL);
+ val &= ~GENMASK(9, 0);
+ val |= div;
+ bst_write_phys_bst(priv->crm_reg_base + SDEMMC_CRM_BCLK_DIV_CTRL, val);
+
+ /* Enable clock */
+ val = bst_read_phys_bst(priv->crm_reg_base + SDEMMC_CRM_BCLK_DIV_CTRL);
+ val |= BIT(10);
+ bst_write_phys_bst(priv->crm_reg_base + SDEMMC_CRM_BCLK_DIV_CTRL, val);
+
+ sdhci_writew(host, (div & 0xff) << 8, SDHCI_CLOCK_CONTROL);
+
+ sdhci_writew(host, (div & 0xff) << 8, SDHCI_CLOCK_CONTROL);
+ clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
+ clk |= SDHCI_CLOCK_PLL_EN;
+ sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL);
+
+ clk |= SDHCI_CLOCK_CARD_EN;
+ sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL);
+
+ clk |= SDHCI_CLOCK_INT_EN;
+ sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL);
+}
+
+static void sdhci_set_bst_clock(struct sdhci_host *host, unsigned int clock)
+{
+ if (clock == 0)
+ return;
+ sdhci_enable_bst_clk(host, clock);
+}
+
+/**
+ * sdhci_bst_reset - Reset the SDHCI host controller
+ * @host: SDHCI host controller
+ * @mask: Reset mask
+ *
+ * Performs a reset of the SDHCI host controller with special handling for eMMC.
+ */
+static void sdhci_bst_reset(struct sdhci_host *host, u8 mask)
+{
+ u16 vendor_ptr, emmc_ctrl_reg;
+
+ if (host->mmc->caps2 & MMC_CAP2_NO_SD) {
+ vendor_ptr = sdhci_readw(host, SDHCI_VENDOR_PTR_R);
+ emmc_ctrl_reg = vendor_ptr + SDHC_EMMC_CTRL_R_OFFSET;
+
+ sdhci_writew(host,
+ sdhci_readw(host, emmc_ctrl_reg) & (~BIT(2)),
+ emmc_ctrl_reg);
+ sdhci_reset(host, mask);
+ usleep_range(10, 20);
+ sdhci_writew(host,
+ sdhci_readw(host, emmc_ctrl_reg) | BIT(2),
+ emmc_ctrl_reg);
+ } else {
+ sdhci_reset(host, mask);
+ }
+}
+
+/**
+ * sdhci_bst_timeout - Set timeout value for commands
+ * @host: SDHCI host controller
+ * @cmd: MMC command
+ *
+ * Sets the timeout control register to maximum value (0xE).
+ */
+static void sdhci_bst_timeout(struct sdhci_host *host, struct mmc_command *cmd)
+{
+ sdhci_writeb(host, 0xE, SDHCI_TIMEOUT_CONTROL);
+}
+
+/**
+ * sdhci_bst_set_power - Set power mode and voltage
+ * @host: SDHCI host controller
+ * @mode: Power mode to set
+ * @vdd: Voltage to set
+ *
+ * Sets power mode and voltage, also configures MBIU control register.
+ */
+static void sdhci_bst_set_power(struct sdhci_host *host, unsigned char mode,
+ unsigned short vdd)
+{
+ sdhci_set_power(host, mode, vdd);
+ sdhci_writeb(host, 0xF, SDHCI_POWER_CONTROL);
+ sdhci_writew(host,
+ (sdhci_readw(host, MBIU_CTRL) & (~0xf)) | BURST_EN,
+ MBIU_CTRL);
+}
+
+/**
+ * bst_sdhci_execute_tuning - Execute tuning procedure
+ * @host: SDHCI host controller
+ * @opcode: Opcode to use for tuning
+ *
+ * Performs tuning procedure by trying different values and selecting the best one.
+ *
+ * Return: 0 on success, negative errno on failure
+ */
+static int bst_sdhci_execute_tuning(struct sdhci_host *host, u32 opcode)
+{
+ struct sdhci_pltfm_host *pltfm_host;
+ struct dwcmshc_priv *priv;
+ unsigned int clk = 0, timeout;
+ int ret = 0, error;
+ int start0 = -1, end0 = -1, best = 0;
+ int start1 = -1, end1 = -1, flag = 0;
+ int i;
+
+ pltfm_host = sdhci_priv(host);
+ priv = sdhci_pltfm_priv(pltfm_host);
+
+ for (i = 0; i < SDHCI_TUNING_COUNT; i++) {
+ /* Protected write */
+ bst_write_phys_bst(priv->crm_reg_base + REG_WR_PROTECT, REG_WR_PROTECT_KEY);
+ /* Write tuning value */
+ bst_write_phys_bst(priv->crm_reg_base + DELAY_CHAIN_SEL,
+ (1ul << i) - 1);
+
+ timeout = 20;
+ while (!((clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL)) &
+ SDHCI_CLOCK_INT_STABLE)) {
+ if (timeout == 0) {
+ dev_err(mmc_dev(host->mmc), "Internal clock never stabilised\n");
+ return -EBUSY;
+ }
+ timeout--;
+ usleep_range(1000, 1100);
+ }
+
+ ret = mmc_send_tuning(host->mmc, opcode, &error);
+ if (ret != 0) {
+ flag = 1;
+ } else {
+ if (flag == 0) {
+ if (start0 == -1)
+ start0 = i;
+ end0 = i;
+ } else {
+ if (start1 == -1)
+ start1 = i;
+ end1 = i;
+ }
+ }
+ }
+
+ /* Calculate best tuning value */
+ if (end0 - start0 >= end1 - start1)
+ best = ((end0 - start0) >> 1) + start0;
+ else
+ best = ((end1 - start1) >> 1) + start1;
+
+ if (best < 0)
+ best = 0;
+
+ bst_write_phys_bst(priv->crm_reg_base + DELAY_CHAIN_SEL, (1ul << best) - 1);
+ timeout = 20;
+
+ while (!((clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL)) &
+ SDHCI_CLOCK_INT_STABLE)) {
+ if (timeout == 0) {
+ dev_err(mmc_dev(host->mmc), "Internal clock never stabilised\n");
+ return -EBUSY;
+ }
+ timeout--;
+ usleep_range(1000, 1100);
+ }
+
+ return 0;
+}
+
+/**
+ * sdhci_bst_voltage_switch - Perform voltage switch
+ * @host: SDHCI host controller
+ *
+ * Enables voltage stable power.
+ */
+static void sdhci_bst_voltage_switch(struct sdhci_host *host)
+{
+ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+ struct dwcmshc_priv *priv = sdhci_pltfm_priv(pltfm_host);
+
+ /* vol stable power on */
+ bst_write_phys_bst(priv->crm_reg_base + SDEMMC_CRM_VOL_CTRL,
+ BST_VOL_STABLE_ON);
+}
+
+static const struct sdhci_ops sdhci_dwcmshc_ops = {
+ .set_clock = sdhci_set_bst_clock,
+ .set_bus_width = sdhci_set_bus_width,
+ .set_uhs_signaling = sdhci_set_uhs_signaling,
+ .get_min_clock = bst_get_min_clock,
+ .get_max_clock = bst_get_max_clock,
+ .reset = sdhci_bst_reset,
+ .set_power = sdhci_bst_set_power,
+ .set_timeout = sdhci_bst_timeout,
+ .platform_execute_tuning = bst_sdhci_execute_tuning,
+ .voltage_switch = sdhci_bst_voltage_switch,
+};
+
+static const struct sdhci_pltfm_data sdhci_dwcmshc_pdata = {
+ .ops = &sdhci_dwcmshc_ops,
+ .quirks = SDHCI_QUIRK_DELAY_AFTER_POWER |
+ SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN |
+ SDHCI_QUIRK_INVERTED_WRITE_PROTECT,
+ .quirks2 = SDHCI_QUIRK2_BROKEN_DDR50 |
+ SDHCI_QUIRK2_TUNING_WORK_AROUND |
+ SDHCI_QUIRK2_ACMD23_BROKEN,
+};
+
+static int bst_sdhci_reallocate_bounce_buffer(struct sdhci_host *host)
+{
+ struct mmc_host *mmc = host->mmc;
+ unsigned int max_blocks;
+ unsigned int bounce_size;
+ int ret;
+
+ /*
+ * Cap the bounce buffer at 64KB. Using a bigger bounce buffer
+ * has diminishing returns, this is probably because SD/MMC
+ * cards are usually optimized to handle this size of requests.
+ */
+ bounce_size = SZ_32K;
+ /*
+ * Adjust downwards to maximum request size if this is less
+ * than our segment size, else hammer down the maximum
+ * request size to the maximum buffer size.
+ */
+ if (mmc->max_req_size < bounce_size)
+ bounce_size = mmc->max_req_size;
+ max_blocks = bounce_size / 512;
+
+ ret = of_reserved_mem_device_init_by_idx(mmc_dev(mmc), mmc_dev(mmc)->of_node, 0);
+ if (ret) {
+ dev_err(mmc_dev(mmc), "Failed to initialize reserved memory\n");
+ return ret;
+ }
+
+ host->bounce_buffer = dma_alloc_coherent(mmc_dev(mmc), bounce_size,
+ &host->bounce_addr, GFP_KERNEL);
+ if (!host->bounce_buffer)
+ return -ENOMEM;
+
+ host->bounce_buffer_size = bounce_size;
+
+ /* Lie about this since we're bouncing */
+ mmc->max_segs = max_blocks;
+ mmc->max_seg_size = bounce_size;
+ mmc->max_req_size = bounce_size;
+
+ dev_info(mmc_dev(mmc), "BST reallocate %s bounce up to %u segments into one, max segment size %u bytes\n",
+ mmc_hostname(mmc), max_blocks, bounce_size);
+
+ return 0;
+}
+
+/**
+ * dwcmshc_probe - Platform driver probe
+ * @pdev: Platform device
+ *
+ * Initializes the SDHCI host controller and registers it.
+ *
+ * Return: 0 on success, negative errno on failure
+ */
+static int dwcmshc_probe(struct platform_device *pdev)
+{
+ struct sdhci_pltfm_host *pltfm_host;
+ struct sdhci_host *host;
+ struct dwcmshc_priv *priv;
+ struct resource *crm_res;
+ int err;
+
+ host = sdhci_pltfm_init(pdev, &sdhci_dwcmshc_pdata,
+ sizeof(struct dwcmshc_priv));
+ if (IS_ERR(host))
+ return PTR_ERR(host);
+
+ pltfm_host = sdhci_priv(host);
+ priv = sdhci_pltfm_priv(pltfm_host);
+
+ err = mmc_of_parse(host->mmc);
+ if (err)
+ goto err;
+
+ sdhci_get_of_property(pdev);
+
+ /* Get CRM registers from the second reg entry */
+ crm_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+ if (!crm_res) {
+ dev_err(&pdev->dev, "Failed to get CRM register resource\n");
+ err = -ENODEV;
+ goto err;
+ }
+
+ priv->phy_crm_reg_base = crm_res->start;
+ priv->phy_crm_reg_size = resource_size(crm_res);
+
+ priv->crm_reg_base = ioremap(priv->phy_crm_reg_base, priv->phy_crm_reg_size);
+ if (!priv->crm_reg_base) {
+ dev_err(&pdev->dev, "Failed to ioremap CRM registers\n");
+ err = -ENOMEM;
+ goto err;
+ }
+
+ err = sdhci_add_host(host);
+ if (err)
+ goto err_iounmap;
+
+ /*
+ * Hardware limitation workaround:
+ *
+ * Our platform supports 64-bit physical addressing, but the eMMC
+ * controller's SRAM-based DMA engine is constrained to a 32-bit
+ * address space. When using the standard SDHCI interface, which
+ * allocates DDR-based DMA buffers with 64-bit addresses, the
+ * dma_map_single() operation fails because the DMA engine cannot
+ * handle addresses beyond 32 bits.
+ *
+ * To resolve this hardware limitation, we implement a bounce buffer
+ * allocated via dma_alloc_coherent() to satisfy DMA addressing
+ * constraints.
+ */
+ err = bst_sdhci_reallocate_bounce_buffer(host);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to allocate bounce buffer: %d\n", err);
+ goto err_remove_host;
+ }
+
+ return 0;
+
+err_remove_host:
+ sdhci_remove_host(host, 1);
+err_iounmap:
+ if (priv->crm_reg_base)
+ iounmap(priv->crm_reg_base);
+err:
+ sdhci_pltfm_free(pdev);
+ return err;
+}
+
+/**
+ * dwcmshc_remove - Platform driver remove
+ * @pdev: Platform device
+ *
+ * Removes the SDHCI host controller.
+ *
+ * Return: 0 on success
+ */
+static void dwcmshc_remove(struct platform_device *pdev)
+{
+ struct sdhci_host *host = platform_get_drvdata(pdev);
+ struct sdhci_pltfm_host *pltfm_host;
+ struct dwcmshc_priv *priv;
+
+ pltfm_host = sdhci_priv(host);
+ priv = sdhci_pltfm_priv(pltfm_host);
+
+ /* Free bounce buffer if allocated */
+ if (host->bounce_buffer) {
+ dma_free_coherent(mmc_dev(host->mmc), host->bounce_buffer_size,
+ host->bounce_buffer, host->bounce_addr);
+ host->bounce_buffer = NULL;
+ }
+
+ /* Release reserved memory */
+ of_reserved_mem_device_release(mmc_dev(host->mmc));
+
+ iounmap(priv->crm_reg_base);
+
+ sdhci_remove_host(host, 0);
+ sdhci_pltfm_free(pdev);
+}
+
+static const struct of_device_id sdhci_dwcmshc_dt_ids[] = {
+ { .compatible = "bst,c1200-dwcmshc-sdhci" },
+ {}
+};
+MODULE_DEVICE_TABLE(of, sdhci_dwcmshc_dt_ids);
+
+static struct platform_driver sdhci_dwcmshc_driver = {
+ .driver = {
+ .name = "sdhci-dwcmshc",
+ .of_match_table = sdhci_dwcmshc_dt_ids,
+ },
+ .probe = dwcmshc_probe,
+ .remove = dwcmshc_remove,
+};
+module_platform_driver(sdhci_dwcmshc_driver);
+
+MODULE_DESCRIPTION("Black Sesame Technologies DWCMSHC SDHCI driver");
+MODULE_AUTHOR("Black Sesame Technologies Co., Ltd.");
+MODULE_LICENSE("GPL");
--
2.25.1
On 02/07/2025 11:44, Albert Yang wrote: > Add a driver for the DesignWare Mobile Storage Host Controller (DWCMSHC) > SDHCI controller found in Black Sesame Technologies C1200 SoCs. > > The driver provides specialized clock configuration, tuning, voltage > switching, and power management for the BST DWCMSHC controller. It also > includes support for eMMC boot and memory-mapped I/O for CRM registers. > Missing SoB. ... > + > +static int bst_sdhci_reallocate_bounce_buffer(struct sdhci_host *host) > +{ > + struct mmc_host *mmc = host->mmc; > + unsigned int max_blocks; > + unsigned int bounce_size; > + int ret; > + > + /* > + * Cap the bounce buffer at 64KB. Using a bigger bounce buffer > + * has diminishing returns, this is probably because SD/MMC > + * cards are usually optimized to handle this size of requests. > + */ > + bounce_size = SZ_32K; > + /* > + * Adjust downwards to maximum request size if this is less > + * than our segment size, else hammer down the maximum > + * request size to the maximum buffer size. > + */ > + if (mmc->max_req_size < bounce_size) > + bounce_size = mmc->max_req_size; > + max_blocks = bounce_size / 512; > + > + ret = of_reserved_mem_device_init_by_idx(mmc_dev(mmc), mmc_dev(mmc)->of_node, 0); > + if (ret) { > + dev_err(mmc_dev(mmc), "Failed to initialize reserved memory\n"); > + return ret; > + } > + > + host->bounce_buffer = dma_alloc_coherent(mmc_dev(mmc), bounce_size, > + &host->bounce_addr, GFP_KERNEL); > + if (!host->bounce_buffer) > + return -ENOMEM; > + > + host->bounce_buffer_size = bounce_size; > + > + /* Lie about this since we're bouncing */ > + mmc->max_segs = max_blocks; > + mmc->max_seg_size = bounce_size; > + mmc->max_req_size = bounce_size; > + > + dev_info(mmc_dev(mmc), "BST reallocate %s bounce up to %u segments into one, max segment size %u bytes\n", > + mmc_hostname(mmc), max_blocks, bounce_size); Devices are supposed to be silent on success. > + ... > +/** > + * dwcmshc_remove - Platform driver remove > + * @pdev: Platform device > + * > + * Removes the SDHCI host controller. > + * > + * Return: 0 on success > + */ Drop all such fake comments, not helpful. We all now what is the purpose of the function and saying that platform driver remove callback is "platform driver remove" which "Removes the SDHCI host controller." is not only redundant, but actually harming because later you have: "Return: 0 on success" which is impossible. Such redundant comments are not kernel coding style. Provide USEFUL comments, useful kerneldoc, not something to satisfy line-counters. Best regards, Krzysztof
On Wed, Jul 2, 2025, at 11:44, Albert Yang wrote: > + > +config MMC_SDHCI_BST > + tristate "SDHCI OF support for the BST DWC MSHC" > + depends on ARCH_BST || COMPILE_TEST > + depends on MMC_SDHCI_PLTFM > + depends on OF > + help > + This selects Synopsys DesignWare Cores Mobile Storage Controller > + support. The description does not mention the actual device it's for but only DesignWare. Try to keep this sorted alphabetically between the other CONFIG_MMC_SDHCI_* backends > + > +struct dwcmshc_priv { > + void __iomem *crm_reg_base; > + u32 phy_crm_reg_base; > + u32 phy_crm_reg_size; > +}; You are only using the first member here, the phy_crm_reg_base and phy_crm_reg_size are assigned during probe but not referenced later. devm_platform_ioremap_resource() should help simplify that code further. > + > +static void bst_write_phys_bst(void __iomem *addr, u32 value) > +{ > + iowrite32(value, addr); > +} You always pass priv->crm_reg_base into this helper, so it would be simpler to make it take the sdhci_pltfm_host pointer and the offset instead of the address. > +static int bst_sdhci_reallocate_bounce_buffer(struct sdhci_host *host) > +{ > + struct mmc_host *mmc = host->mmc; > + unsigned int max_blocks; > + unsigned int bounce_size; > + int ret; > + > + /* > + * Cap the bounce buffer at 64KB. Using a bigger bounce buffer > + * has diminishing returns, this is probably because SD/MMC > + * cards are usually optimized to handle this size of requests. > + */ > + bounce_size = SZ_32K; The comment says 64K, but the size you use is 32K. > + /* Get CRM registers from the second reg entry */ > + crm_res = platform_get_resource(pdev, IORESOURCE_MEM, 1); devm_platform_ioremap_resource() > + /* > + * Hardware limitation workaround: > + * > + * Our platform supports 64-bit physical addressing, but the eMMC > + * controller's SRAM-based DMA engine is constrained to a 32-bit > + * address space. When using the standard SDHCI interface, which > + * allocates DDR-based DMA buffers with 64-bit addresses, the > + * dma_map_single() operation fails because the DMA engine cannot > + * handle addresses beyond 32 bits. > + * > + * To resolve this hardware limitation, we implement a bounce buffer > + * allocated via dma_alloc_coherent() to satisfy DMA addressing > + * constraints. > + */ > + err = bst_sdhci_reallocate_bounce_buffer(host); Having an explanation here makes sense, but I don't think this captures what is actually going on, in particular: - dma_alloc_coherent() being backed by an SRAM that is under the 4GB boundary - the problem that the SoC is configured that all of DRAM is outside of ZONE_DMA32 - The type of hardware bug that leads to 64-bit DMA being broken in this SoC. I still have some hope that the hardware is not actually that broken and you can get it working normally, in one of these ways: - enabling 64-bit addressing in the parent bus - enabling SMMU translation for the parent bus - configuring the parent bus or the sdhci itself to access the first 4GB of RAM, and describing the offset in dma-ranges - moving the start of RAM in a global SoC config It is rather unlikely that the SoC designer chose to integrate a 32-bit-only device without adding some way to configure it to access RAM. Arnd
Hi Arnd, Thank you for your detailed review and suggestions. I have addressed all the issues you raised in v2 of the patch series. On Wed, Jul 2, 2025, at 11:44, Arnd Bergmann wrote: > The description does not mention the actual device it's for > but only DesignWare. > > Try to keep this sorted alphabetically between the other > CONFIG_MMC_SDHCI_* backends Fixed. Updated the Kconfig description to mention "Black Sesame Technologies BST C1200 controller" and moved the config entry to the correct alphabetical position between MMC_SDHCI_BCM_KONA and MMC_SDHCI_F_SDH30. > You are only using the first member here, the phy_crm_reg_base > and phy_crm_reg_size are assigned during probe but not referenced > later. devm_platform_ioremap_resource() should help simplify > that code further. Agreed. Removed the unused phy_crm_reg_base and phy_crm_reg_size fields from dwcmshc_priv structure and replaced the manual platform_get_resource() + ioremap() calls with devm_platform_ioremap_resource(). > You always pass priv->crm_reg_base into this helper, so > it would be simpler to make it take the sdhci_pltfm_host > pointer and the offset instead of the address. Good suggestion. Replaced bst_write_phys_bst() and bst_read_phys_bst() with bst_crm_write() and bst_crm_read() that take sdhci_pltfm_host pointer and offset parameters for better encapsulation. > The comment says 64K, but the size you use is 32K. Fixed the comment to correctly state 32KB. > Having an explanation here makes sense, but I don't think this > captures what is actually going on, in particular: > > - dma_alloc_coherent() being backed by an SRAM that is under > the 4GB boundary > - the problem that the SoC is configured that all of DRAM > is outside of ZONE_DMA32 > - The type of hardware bug that leads to 64-bit DMA being > broken in this SoC. You're absolutely right. I've enhanced the comment with a detailed explanation of our specific hardware constraints: 1. System memory uses 64-bit bus, eMMC controller uses 32-bit bus 2. eMMC controller cannot access memory through SMMU due to hardware bug 3. Our SRAM-based bounce buffer solution works within 32-bit address space > I still have some hope that the hardware is not actually > that broken and you can get it working normally, in one > of these ways: > - enabling 64-bit addressing in the parent bus > - enabling SMMU translation for the parent bus > - configuring the parent bus or the sdhci itself to > access the first 4GB of RAM, and describing the > offset in dma-ranges > - moving the start of RAM in a global SoC config I appreciate your optimism about finding alternative solutions. Unfortunately, we have thoroughly investigated these approaches: Regarding these last two suggestions, I'm not very familiar with the implementation details. Could you provide more guidance on: 1. **dma-ranges approach**: We tried adding these properties to the device tree: ``` dma-ranges = <0x00000000 0x00000000 0x100000000>; dma-mask = <0xffffffff>; ``` However, we still encounter DMA addressing issues. Are there specific examples in other drivers we could reference for similar hardware constraints? 2. **Moving RAM start position**: Which component would control this configuration? Would this require bootloader parameter changes or SoC-level configuration? We're certainly interested in exploring these alternatives if they could provide a more elegant solution than our current bounce buffer approach. The v3 patch addresses all your code structure and documentation concerns while providing a clear explanation of why this approach is necessary for our platform. I have also fixed compilation warnings about unused variables that resulted from the refactoring. **Current DMA Issues**: Despite setting DMA32_ZONE, we still encounter DMA addressing warnings. Key error messages include: ``` DMA addr 0x00000008113e2200+512 overflow (mask ffffffff, bus limit 0) software IO TLB: swiotlb_memblock_alloc: Failed to allocate [various sizes] ``` This confirms our hardware limitation where the eMMC controller cannot access memory above 32-bit boundaries, even with ZONE_DMA32 configured. The complete kernel log with detailed DMA allocation failures and warnings is available at: https://pastebin.com/eJgtuHDh Please let me know if you need any additional information or have suggestions for alternative approaches. Best regards, Albert
On Fri, Jul 11, 2025, at 07:55, Albert Yang wrote: > On Wed, Jul 2, 2025, at 11:44, Arnd Bergmann wrote: > Regarding these last two suggestions, I'm not very familiar with the > implementation > details. Could you provide more guidance on: > > 1. **dma-ranges approach**: We tried adding these properties to the > device tree: > ``` > dma-ranges = <0x00000000 0x00000000 0x100000000>; > dma-mask = <0xffffffff>; > ``` > However, we still encounter DMA addressing issues. Are there > specific > examples in other drivers we could reference for similar hardware > constraints? The way that 'dma-ranges' is supposed to work is that it accurately describes in the bus node how a child device accesses the parent view of the physical address space. The 'dma-mask' property above is not a regular property, but the dma-ranges you have looks similar to what you have for a 1:1 mapping of the first 4GB, matching what you describe is the actual hardware mistake, but you are using the wrong address/size cells for this bus: + soc: soc@0 { + compatible = "simple-bus"; + #address-cells = <2>; + #size-cells = <2>; + ranges = <0x0 0x0 0x0 0x0 0xffffffff 0xffffffff>; What your dts file describes is that there is a single 64-bit bus named 'soc@0' that contains the entire 64-bit address space. You have given an explicit 'ranges' property that maps everything except for the final byte in the downstream direction, which is equivalent to having an empty 'ranges' property. You have left out the upstream 'dma-ranges' translation, which I think means you just get the default that matches 'ranges'. Presumably neither of them are correct. In most SoCs there is more than one bus, and you have already said above that the actual bus that the sdhci device is on only uses 32-bit addressing, which for a 1:1 translation would look like + soc: soc@0 { + compatible = "simple-bus"; + #address-cells = <2>; + #size-cells = <2>; + ranges = <0x0 0x0 0x0 0x0 0x1 0x00000000>; + dma-ranges = <0x0 0x0 0x0 0x0 0x1 0x00000000>; meaning that doing an mmio access on the first 4GB of CPU address space gets routed to this bus at the same address, and that any device trying to do a DMA only works on the first 4GB as well, which then correspond to the bus itself, not to RAM. If that is your setup, that would explain how the SRAM and SDHCI devices are both on the same bus and the SDHCI can do DMA internally on the bus, but cannot actually reach RAM. A more sensible setup would be to have the DMA access routed to the memory controller. Since memory on your system appears to start at 0x8.00000000, that would look like dma-ranges = <0x0 0x0 0x08 0x00000000 0x1 0x00000000>; which means that a DMA to the first 4GB of the bus address space gets routed outside of the bus into the first 4GB of physical memory. Obviously you cannot just change the dts to pretend this is the correct mapping, you have to also program the bus controller to use it. The datasheet for the chip should tell you specifically what type of bus this is (AXI, AHB, OPB or something else), and what registers are used to program this mapping. It is possible that this cannot be reprogrammed, but more likely there is a hidden register that is made available to the boot loader but is not intended to be reconfigured by the OS. > 2. **Moving RAM start position**: Which component would control this > configuration? > Would this require bootloader parameter changes or SoC-level > configuration? This would be in the early stages of the boot loader that set up the memory controller, as it is hard to reconfigure the RAM location after you are already running from DRAM. > The v3 patch addresses all your code structure and documentation concerns > while providing a clear explanation of why this approach is necessary for > our platform. I have also fixed compilation warnings about unused variables > that resulted from the refactoring. > > **Current DMA Issues**: Despite setting DMA32_ZONE, we still encounter DMA > addressing warnings. Key error messages include: > ``` > DMA addr 0x00000008113e2200+512 overflow (mask ffffffff, bus limit 0) > software IO TLB: swiotlb_memblock_alloc: Failed to allocate [various sizes] > ``` > > This confirms our hardware limitation where the eMMC controller cannot access > memory above 32-bit boundaries, even with ZONE_DMA32 configured. All that this confirms is that Linux observes an impossible configuration where RAM starts above the DMA32 zone. Arnd
Hi Arnd, Thank you for your detailed review and technical guidance on the dma-ranges configuration and hardware address mapping solutions. On Fri, Jul 11, 2025, at 08:55, Arnd Bergmann wrote: >A more sensible setup would be to have the DMA access >routed to the memory controller. Since memory on your >system appears to start at 0x8.00000000, that would look >like > > dma-ranges = <0x0 0x0 0x08 0x00000000 0x1 0x00000000>; > >which means that a DMA to the first 4GB of the bus >address space gets routed outside of the bus into the >first 4GB of physical memory. Obviously you cannot just >change the dts to pretend this is the correct mapping, you >have to also program the bus controller to use it. > >The datasheet for the chip should tell you specifically >what type of bus this is (AXI, AHB, OPB or something else), >and what registers are used to program this mapping. It >is possible that this cannot be reprogrammed, but more likely >there is a hidden register that is made available to the boot >loader but is not intended to be reconfigured by the OS. After investigating the approaches you suggested, I need to clarify the hardware constraints we face: The BST C1200 SoC has fundamental hardware limitations: - System RAM starts at 0x8.00000000 (above 32-bit addressable range) - The eMMC controller's DMA engine is limited to 32-bit addressing - The SMMU does not function for address translation in this path due to hardware design flaws - These limitations were finalized during silicon design and cannot be modified in software Regarding your suggestion about programming the bus controller mapping, we have thoroughly reviewed the BST C1200 datasheet and confirmed: - No accessible registers exist to reprogram the address mapping - The 32-bit DMA limitation is a hard constraint of the controller IP Given these silicon-level constraints, the current bounce buffer approach represents the only viable solution to provide eMMC functionality on this platform. We understand this is not an ideal architectural solution and appreciate your patience in reviewing this hardware-constrained implementation. I want to assure you that these design limitations have been addressed in our subsequent chip generations. We would appreciate your consideration of this approach given the documented hardware constraints. Thank you again for your time and expertise in reviewing this patch. Best regards, Albert Yang
On Fri, Aug 8, 2025, at 10:39, Albert Yang wrote: > On Fri, Jul 11, 2025, at 08:55, Arnd Bergmann wrote: > > After investigating the approaches you suggested, I need to clarify the > hardware constraints we face: > > The BST C1200 SoC has fundamental hardware limitations: > - System RAM starts at 0x8.00000000 (above 32-bit addressable range) > - The eMMC controller's DMA engine is limited to 32-bit addressing > - The SMMU does not function for address translation in this path due to > hardware design flaws > - These limitations were finalized during silicon design and cannot be > modified in software > > Regarding your suggestion about programming the bus controller mapping, > we have thoroughly reviewed the BST C1200 datasheet and confirmed: > - No accessible registers exist to reprogram the address mapping > - The 32-bit DMA limitation is a hard constraint of the controller IP > > Given these silicon-level constraints, the current bounce buffer approach > represents the only viable solution to provide eMMC functionality on this > platform. Thanks for checking and confirming. I think what you had in v2 is probably close to what we want then. Please update the comments in the code that explain the workaround and send it as v3 so we can have another look. Arnd
© 2016 - 2025 Red Hat, Inc.