From nobody Wed Apr 8 06:25:26 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id A26BEC28D13 for ; Tue, 23 Aug 2022 03:06:49 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S239892AbiHWDGq (ORCPT ); Mon, 22 Aug 2022 23:06:46 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:56394 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S239861AbiHWDGT (ORCPT ); Mon, 22 Aug 2022 23:06:19 -0400 Received: from sin.source.kernel.org (sin.source.kernel.org [IPv6:2604:1380:40e1:4800::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id AB7F42CC93; Mon, 22 Aug 2022 20:03:39 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by sin.source.kernel.org (Postfix) with ESMTPS id E8558CE188A; Tue, 23 Aug 2022 03:03:36 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 8BE77C433D6; Tue, 23 Aug 2022 03:03:32 +0000 (UTC) From: Huacai Chen To: Arnd Bergmann , Huacai Chen Cc: loongarch@lists.linux.dev, linux-arch@vger.kernel.org, Xuefeng Li , Guo Ren , Xuerui Wang , Jiaxun Yang , linux-kernel@vger.kernel.org, Huacai Chen Subject: [PATCH V2] LoongArch: Use TLB for ioremap() Date: Tue, 23 Aug 2022 11:03:19 +0800 Message-Id: <20220823030319.3872957-1-chenhuacai@loongson.cn> X-Mailer: git-send-email 2.31.1 MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" We can support more cache attributes (e.g., CC, SUC and WUC) and page protection when we use TLB for ioremap(). The implementation is based on GENERIC_IOREMAP. The existing simple ioremap() implementation has better performance so we keep it and introduce ARCH_IOREMAP to control the selection. We move pagetable_init() earlier to make early ioremap() works, and we modify the PCI ecam mapping because the TLB-based version of ioremap() will actually take the size into account. Signed-off-by: Huacai Chen Reported-by: kernel test robot --- V2: Based on GENERIC_IOREMAP. arch/loongarch/Kconfig | 7 +++ arch/loongarch/include/asm/fixmap.h | 15 +++++ arch/loongarch/include/asm/io.h | 69 ++++++-------------- arch/loongarch/include/asm/pgtable-bits.h | 3 + arch/loongarch/kernel/setup.c | 2 +- arch/loongarch/mm/init.c | 64 +++++++++++++++++++ arch/loongarch/pci/acpi.c | 76 +++++++++++++++++++++-- 7 files changed, 180 insertions(+), 56 deletions(-) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 3594f0ba7581..5b89386d6972 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -61,6 +61,7 @@ config LOONGARCH select GENERIC_CPU_AUTOPROBE select GENERIC_ENTRY select GENERIC_GETTIMEOFDAY + select GENERIC_IOREMAP if !ARCH_IOREMAP select GENERIC_IRQ_MULTI_HANDLER select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW @@ -168,6 +169,9 @@ config MACH_LOONGSON32 config MACH_LOONGSON64 def_bool 64BIT =20 +config FIX_EARLYCON_MEM + def_bool y + config PAGE_SIZE_4KB bool =20 @@ -421,6 +425,9 @@ config SECCOMP =20 endmenu =20 +config ARCH_IOREMAP + bool + config ARCH_SELECT_MEMORY_MODEL def_bool y =20 diff --git a/arch/loongarch/include/asm/fixmap.h b/arch/loongarch/include/a= sm/fixmap.h index b3541dfa2013..d2e55ae55bb9 100644 --- a/arch/loongarch/include/asm/fixmap.h +++ b/arch/loongarch/include/asm/fixmap.h @@ -10,4 +10,19 @@ =20 #define NR_FIX_BTMAPS 64 =20 +enum fixed_addresses { + FIX_HOLE, + FIX_EARLYCON_MEM_BASE, + __end_of_fixed_addresses +}; + +#define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) +#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) +#define FIXMAP_PAGE_IO PAGE_KERNEL_SUC + +extern void __set_fixmap(enum fixed_addresses idx, + phys_addr_t phys, pgprot_t flags); + +#include + #endif diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/i= o.h index 999944ea1cea..87a88eb792c1 100644 --- a/arch/loongarch/include/asm/io.h +++ b/arch/loongarch/include/asm/io.h @@ -27,71 +27,38 @@ extern void __init early_iounmap(void __iomem *addr, un= signed long size); #define early_memremap early_ioremap #define early_memunmap early_iounmap =20 +#ifdef CONFIG_ARCH_IOREMAP + static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long= size, unsigned long prot_val) { - if (prot_val =3D=3D _CACHE_CC) + if (prot_val & _CACHE_CC) return (void __iomem *)(unsigned long)(CACHE_BASE + offset); else return (void __iomem *)(unsigned long)(UNCACHE_BASE + offset); } =20 -/* - * ioremap - map bus memory into CPU space - * @offset: bus address of the memory - * @size: size of the resource to map - * - * ioremap performs a platform specific sequence of operations to - * make bus memory CPU accessible via the readb/readw/readl/writeb/ - * writew/writel functions and the other mmio helpers. The returned - * address is not guaranteed to be usable directly as a virtual - * address. - */ -#define ioremap(offset, size) \ - ioremap_prot((offset), (size), _CACHE_SUC) +#define ioremap(offset, size) \ + ioremap_prot((offset), (size), pgprot_val(PAGE_KERNEL_SUC)) =20 -/* - * ioremap_wc - map bus memory into CPU space - * @offset: bus address of the memory - * @size: size of the resource to map - * - * ioremap_wc performs a platform specific sequence of operations to - * make bus memory CPU accessible via the readb/readw/readl/writeb/ - * writew/writel functions and the other mmio helpers. The returned - * address is not guaranteed to be usable directly as a virtual - * address. - * - * This version of ioremap ensures that the memory is marked uncachable - * but accelerated by means of write-combining feature. It is specifically - * useful for PCIe prefetchable windows, which may vastly improve a - * communications performance. If it was determined on boot stage, what - * CPU CCA doesn't support WUC, the method shall fall-back to the - * _CACHE_SUC option (see cpu_probe() method). - */ -#define ioremap_wc(offset, size) \ - ioremap_prot((offset), (size), _CACHE_WUC) +#define iounmap(addr) do { } while (0) + +#endif =20 /* - * ioremap_cache - map bus memory into CPU space - * @offset: bus address of the memory - * @size: size of the resource to map - * - * ioremap_cache performs a platform specific sequence of operations to - * make bus memory CPU accessible via the readb/readw/readl/writeb/ - * writew/writel functions and the other mmio helpers. The returned - * address is not guaranteed to be usable directly as a virtual - * address. + * On LoongArch, ioremap() has two variants, ioremap_wc() and ioremap_cach= e(). + * They map bus memory into CPU space, the mapped memory is marked uncacha= ble + * (_CACHE_SUC), uncachable but accelerated by write-combine (_CACHE_WUC) = and + * cachable (_CACHE_CC) respectively for CPU access. * - * This version of ioremap ensures that the memory is marked cachable by - * the CPU. Also enables full write-combining. Useful for some - * memory-like regions on I/O busses. + * @offset: bus address of the memory + * @size: size of the resource to map */ -#define ioremap_cache(offset, size) \ - ioremap_prot((offset), (size), _CACHE_CC) +#define ioremap_wc(offset, size) \ + ioremap_prot((offset), (size), pgprot_val(PAGE_KERNEL_WUC)) =20 -static inline void iounmap(const volatile void __iomem *addr) -{ -} +#define ioremap_cache(offset, size) \ + ioremap_prot((offset), (size), pgprot_val(PAGE_KERNEL)) =20 #define mmiowb() asm volatile ("dbar 0" ::: "memory") =20 diff --git a/arch/loongarch/include/asm/pgtable-bits.h b/arch/loongarch/inc= lude/asm/pgtable-bits.h index 9ca147a29bab..3d1e0a69975a 100644 --- a/arch/loongarch/include/asm/pgtable-bits.h +++ b/arch/loongarch/include/asm/pgtable-bits.h @@ -83,8 +83,11 @@ _PAGE_GLOBAL | _PAGE_KERN | _CACHE_SUC) #define PAGE_KERNEL_WUC __pgprot(_PAGE_PRESENT | __READABLE | __WRITEABLE = | \ _PAGE_GLOBAL | _PAGE_KERN | _CACHE_WUC) + #ifndef __ASSEMBLY__ =20 +#define _PAGE_IOREMAP pgprot_val(PAGE_KERNEL_SUC) + #define pgprot_noncached pgprot_noncached =20 static inline pgprot_t pgprot_noncached(pgprot_t _prot) diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 2b4e98170bc1..f938aae3e92c 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -349,10 +349,10 @@ void __init setup_arch(char **cmdline_p) =20 init_environ(); memblock_init(); + pagetable_init(); parse_early_param(); =20 platform_init(); - pagetable_init(); arch_mem_init(cmdline_p); =20 resource_init(); diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index 88c935344034..63e19b7a6644 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -201,6 +201,70 @@ void vmemmap_free(unsigned long start, unsigned long e= nd, #endif #endif =20 +static pte_t *fixmap_pte(unsigned long addr) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + pgd =3D pgd_offset_k(addr); + p4d =3D p4d_offset(pgd, addr); + + if (pgd_none(*pgd)) { + pud_t *new; + + new =3D memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); + pgd_populate(&init_mm, pgd, new); +#ifndef __PAGETABLE_PUD_FOLDED + pud_init(new); +#endif + } + + pud =3D pud_offset(p4d, addr); + if (pud_none(*pud)) { + pmd_t *new; + + new =3D memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); + pud_populate(&init_mm, pud, new); +#ifndef __PAGETABLE_PMD_FOLDED + pmd_init(new); +#endif + } + + pmd =3D pmd_offset(pud, addr); + if (pmd_none(*pmd)) { + pte_t *new; + + new =3D memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); + pmd_populate_kernel(&init_mm, pmd, new); + } + + return pte_offset_kernel(pmd, addr); +} + +void __init __set_fixmap(enum fixed_addresses idx, + phys_addr_t phys, pgprot_t flags) +{ + unsigned long addr =3D __fix_to_virt(idx); + pte_t *ptep; + + BUG_ON(idx <=3D FIX_HOLE || idx >=3D __end_of_fixed_addresses); + + ptep =3D fixmap_pte(addr); + if (!pte_none(*ptep)) { + pte_ERROR(*ptep); + return; + } + + if (pgprot_val(flags)) + set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, flags)); + else { + pte_clear(&init_mm, addr, ptep); + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); + } +} + /* * Align swapper_pg_dir in to 64K, allows its address to be loaded * with a single LUI instruction in the TLB handlers. If we used diff --git a/arch/loongarch/pci/acpi.c b/arch/loongarch/pci/acpi.c index bf921487333c..ac18ca7a900a 100644 --- a/arch/loongarch/pci/acpi.c +++ b/arch/loongarch/pci/acpi.c @@ -82,6 +82,69 @@ static int acpi_prepare_root_resources(struct acpi_pci_r= oot_info *ci) return 0; } =20 +/* + * Create a PCI config space window + * - reserve mem region + * - alloc struct pci_config_window with space for all mappings + * - ioremap the config space + */ +struct pci_config_window *arch_pci_ecam_create(struct device *dev, + struct resource *cfgres, struct resource *busr, const struct pci_ecam_op= s *ops) +{ + int bsz, bus_range, err; + struct resource *conflict; + struct pci_config_window *cfg; + + if (busr->start > busr->end) + return ERR_PTR(-EINVAL); + + cfg =3D kzalloc(sizeof(*cfg), GFP_KERNEL); + if (!cfg) + return ERR_PTR(-ENOMEM); + + cfg->parent =3D dev; + cfg->ops =3D ops; + cfg->busr.start =3D busr->start; + cfg->busr.end =3D busr->end; + cfg->busr.flags =3D IORESOURCE_BUS; + bus_range =3D resource_size(cfgres) >> ops->bus_shift; + + bsz =3D 1 << ops->bus_shift; + + cfg->res.start =3D cfgres->start; + cfg->res.end =3D cfgres->end; + cfg->res.flags =3D IORESOURCE_MEM | IORESOURCE_BUSY; + cfg->res.name =3D "PCI ECAM"; + + conflict =3D request_resource_conflict(&iomem_resource, &cfg->res); + if (conflict) { + err =3D -EBUSY; + dev_err(dev, "can't claim ECAM area %pR: address conflict with %s %pR\n", + &cfg->res, conflict->name, conflict); + goto err_exit; + } + + cfg->win =3D pci_remap_cfgspace(cfgres->start, bus_range * bsz); + if (!cfg->win) + goto err_exit_iomap; + + if (ops->init) { + err =3D ops->init(cfg); + if (err) + goto err_exit; + } + dev_info(dev, "ECAM at %pR for %pR\n", &cfg->res, &cfg->busr); + + return cfg; + +err_exit_iomap: + err =3D -ENOMEM; + dev_err(dev, "ECAM ioremap failed\n"); +err_exit: + pci_ecam_free(cfg); + return ERR_PTR(err); +} + /* * Lookup the bus range for the domain in MCFG, and set up config space * mapping. @@ -106,11 +169,16 @@ pci_acpi_setup_ecam_mapping(struct acpi_pci_root *roo= t) =20 bus_shift =3D ecam_ops->bus_shift ? : 20; =20 - cfgres.start =3D root->mcfg_addr + (bus_res->start << bus_shift); - cfgres.end =3D cfgres.start + (resource_size(bus_res) << bus_shift) - 1; - cfgres.flags =3D IORESOURCE_MEM; + if (bus_shift =3D=3D 20) + cfg =3D pci_ecam_create(dev, &cfgres, bus_res, ecam_ops); + else { + cfgres.start =3D root->mcfg_addr + (bus_res->start << bus_shift); + cfgres.end =3D cfgres.start + (resource_size(bus_res) << bus_shift) - 1; + cfgres.end |=3D BIT(28) + (((PCI_CFG_SPACE_EXP_SIZE - 1) & 0xf00) << 16); + cfgres.flags =3D IORESOURCE_MEM; + cfg =3D arch_pci_ecam_create(dev, &cfgres, bus_res, ecam_ops); + } =20 - cfg =3D pci_ecam_create(dev, &cfgres, bus_res, ecam_ops); if (IS_ERR(cfg)) { dev_err(dev, "%04x:%pR error %ld mapping ECAM\n", seg, bus_res, PTR_ERR(= cfg)); return NULL; --=20 2.31.1