Zone device pages are used to represent various type of device memory
managed by device drivers. Currently compound zone device pages are
not supported. This is because MEMORY_DEVICE_FS_DAX pages are the only
user of higher order zone device pages and have their own page
reference counting.
A future change will unify FS DAX reference counting with normal page
reference counting rules and remove the special FS DAX reference
counting. Supporting that requires compound zone device pages.
Supporting compound zone device pages requires compound_head() to
distinguish between head and tail pages whilst still preserving the
special struct page fields that are specific to zone device pages.
A tail page is distinguished by having bit zero being set in
page->compound_head, with the remaining bits pointing to the head
page. For zone device pages page->compound_head is shared with
page->pgmap.
The page->pgmap field is common to all pages within a memory section.
Therefore pgmap is the same for both head and tail pages and can be
moved into the folio and we can use the standard scheme to find
compound_head from a tail page.
Signed-off-by: Alistair Popple <apopple@nvidia.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
---
Changes since v1:
- Move pgmap to the folio as suggested by Matthew Wilcox
---
drivers/gpu/drm/nouveau/nouveau_dmem.c | 3 ++-
drivers/pci/p2pdma.c | 6 +++---
include/linux/memremap.h | 6 +++---
include/linux/migrate.h | 4 ++--
include/linux/mm_types.h | 9 +++++++--
include/linux/mmzone.h | 8 +++++++-
lib/test_hmm.c | 3 ++-
mm/hmm.c | 2 +-
mm/memory.c | 4 +++-
mm/memremap.c | 14 +++++++-------
mm/migrate_device.c | 7 +++++--
mm/mm_init.c | 2 +-
12 files changed, 43 insertions(+), 25 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c
index 6fb65b0..58d308c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
@@ -88,7 +88,8 @@ struct nouveau_dmem {
static struct nouveau_dmem_chunk *nouveau_page_to_chunk(struct page *page)
{
- return container_of(page->pgmap, struct nouveau_dmem_chunk, pagemap);
+ return container_of(page_dev_pagemap(page), struct nouveau_dmem_chunk,
+ pagemap);
}
static struct nouveau_drm *page_to_drm(struct page *page)
diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
index 210b9f4..a58f2c1 100644
--- a/drivers/pci/p2pdma.c
+++ b/drivers/pci/p2pdma.c
@@ -199,7 +199,7 @@ static const struct attribute_group p2pmem_group = {
static void p2pdma_page_free(struct page *page)
{
- struct pci_p2pdma_pagemap *pgmap = to_p2p_pgmap(page->pgmap);
+ struct pci_p2pdma_pagemap *pgmap = to_p2p_pgmap(page_dev_pagemap(page));
/* safe to dereference while a reference is held to the percpu ref */
struct pci_p2pdma *p2pdma =
rcu_dereference_protected(pgmap->provider->p2pdma, 1);
@@ -1022,8 +1022,8 @@ enum pci_p2pdma_map_type
pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev,
struct scatterlist *sg)
{
- if (state->pgmap != sg_page(sg)->pgmap) {
- state->pgmap = sg_page(sg)->pgmap;
+ if (state->pgmap != page_dev_pagemap(sg_page(sg))) {
+ state->pgmap = page_dev_pagemap(sg_page(sg));
state->map = pci_p2pdma_map_type(state->pgmap, dev);
state->bus_off = to_p2p_pgmap(state->pgmap)->bus_offset;
}
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 3f7143a..14273e6 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -161,7 +161,7 @@ static inline bool is_device_private_page(const struct page *page)
{
return IS_ENABLED(CONFIG_DEVICE_PRIVATE) &&
is_zone_device_page(page) &&
- page->pgmap->type == MEMORY_DEVICE_PRIVATE;
+ page_dev_pagemap(page)->type == MEMORY_DEVICE_PRIVATE;
}
static inline bool folio_is_device_private(const struct folio *folio)
@@ -173,13 +173,13 @@ static inline bool is_pci_p2pdma_page(const struct page *page)
{
return IS_ENABLED(CONFIG_PCI_P2PDMA) &&
is_zone_device_page(page) &&
- page->pgmap->type == MEMORY_DEVICE_PCI_P2PDMA;
+ page_dev_pagemap(page)->type == MEMORY_DEVICE_PCI_P2PDMA;
}
static inline bool is_device_coherent_page(const struct page *page)
{
return is_zone_device_page(page) &&
- page->pgmap->type == MEMORY_DEVICE_COHERENT;
+ page_dev_pagemap(page)->type == MEMORY_DEVICE_COHERENT;
}
static inline bool folio_is_device_coherent(const struct folio *folio)
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 002e49b..9a85a82 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -207,8 +207,8 @@ struct migrate_vma {
unsigned long end;
/*
- * Set to the owner value also stored in page->pgmap->owner for
- * migrating out of device private memory. The flags also need to
+ * Set to the owner value also stored in page_dev_pagemap(page)->owner
+ * for migrating out of device private memory. The flags also need to
* be set to MIGRATE_VMA_SELECT_DEVICE_PRIVATE.
* The caller should always set this field when using mmu notifier
* callbacks to avoid device MMU invalidations for device private
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 6e3bdf8..c2f1d53 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -129,8 +129,11 @@ struct page {
unsigned long compound_head; /* Bit zero is set */
};
struct { /* ZONE_DEVICE pages */
- /** @pgmap: Points to the hosting device page map. */
- struct dev_pagemap *pgmap;
+ /*
+ * The first word is used for compound_head or folio
+ * pgmap
+ */
+ void *_unused;
void *zone_device_data;
/*
* ZONE_DEVICE private pages are counted as being
@@ -299,6 +302,7 @@ typedef struct {
* @_refcount: Do not access this member directly. Use folio_ref_count()
* to find how many references there are to this folio.
* @memcg_data: Memory Control Group data.
+ * @pgmap: Metadata for ZONE_DEVICE mappings
* @virtual: Virtual address in the kernel direct map.
* @_last_cpupid: IDs of last CPU and last process that accessed the folio.
* @_entire_mapcount: Do not use directly, call folio_entire_mapcount().
@@ -337,6 +341,7 @@ struct folio {
/* private: */
};
/* public: */
+ struct dev_pagemap *pgmap;
};
struct address_space *mapping;
pgoff_t index;
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 17506e4..e191434 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1134,6 +1134,12 @@ static inline bool is_zone_device_page(const struct page *page)
return page_zonenum(page) == ZONE_DEVICE;
}
+static inline struct dev_pagemap *page_dev_pagemap(const struct page *page)
+{
+ WARN_ON(!is_zone_device_page(page));
+ return page_folio(page)->pgmap;
+}
+
/*
* Consecutive zone device pages should not be merged into the same sgl
* or bvec segment with other types of pages or if they belong to different
@@ -1149,7 +1155,7 @@ static inline bool zone_device_pages_have_same_pgmap(const struct page *a,
return false;
if (!is_zone_device_page(a))
return true;
- return a->pgmap == b->pgmap;
+ return page_dev_pagemap(a) == page_dev_pagemap(b);
}
extern void memmap_init_zone_device(struct zone *, unsigned long,
diff --git a/lib/test_hmm.c b/lib/test_hmm.c
index 056f2e4..b072ca9 100644
--- a/lib/test_hmm.c
+++ b/lib/test_hmm.c
@@ -195,7 +195,8 @@ static int dmirror_fops_release(struct inode *inode, struct file *filp)
static struct dmirror_chunk *dmirror_page_to_chunk(struct page *page)
{
- return container_of(page->pgmap, struct dmirror_chunk, pagemap);
+ return container_of(page_dev_pagemap(page), struct dmirror_chunk,
+ pagemap);
}
static struct dmirror_device *dmirror_page_to_device(struct page *page)
diff --git a/mm/hmm.c b/mm/hmm.c
index 7e0229a..a11807c 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -248,7 +248,7 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
* just report the PFN.
*/
if (is_device_private_entry(entry) &&
- pfn_swap_entry_to_page(entry)->pgmap->owner ==
+ page_dev_pagemap(pfn_swap_entry_to_page(entry))->owner ==
range->dev_private_owner) {
cpu_flags = HMM_PFN_VALID;
if (is_writable_device_private_entry(entry))
diff --git a/mm/memory.c b/mm/memory.c
index c31ea30..d2785fb 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4024,6 +4024,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
vmf->page = pfn_swap_entry_to_page(entry);
ret = remove_device_exclusive_entry(vmf);
} else if (is_device_private_entry(entry)) {
+ struct dev_pagemap *pgmap;
if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
/*
* migrate_to_ram is not yet ready to operate
@@ -4048,7 +4049,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
*/
get_page(vmf->page);
pte_unmap_unlock(vmf->pte, vmf->ptl);
- ret = vmf->page->pgmap->ops->migrate_to_ram(vmf);
+ pgmap = page_dev_pagemap(vmf->page);
+ ret = pgmap->ops->migrate_to_ram(vmf);
put_page(vmf->page);
} else if (is_hwpoison_entry(entry)) {
ret = VM_FAULT_HWPOISON;
diff --git a/mm/memremap.c b/mm/memremap.c
index 07bbe0e..e885bc9 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -458,8 +458,8 @@ EXPORT_SYMBOL_GPL(get_dev_pagemap);
void free_zone_device_folio(struct folio *folio)
{
- if (WARN_ON_ONCE(!folio->page.pgmap->ops ||
- !folio->page.pgmap->ops->page_free))
+ if (WARN_ON_ONCE(!folio->pgmap->ops ||
+ !folio->pgmap->ops->page_free))
return;
mem_cgroup_uncharge(folio);
@@ -486,12 +486,12 @@ void free_zone_device_folio(struct folio *folio)
* to clear folio->mapping.
*/
folio->mapping = NULL;
- folio->page.pgmap->ops->page_free(folio_page(folio, 0));
+ folio->pgmap->ops->page_free(folio_page(folio, 0));
- switch (folio->page.pgmap->type) {
+ switch (folio->pgmap->type) {
case MEMORY_DEVICE_PRIVATE:
case MEMORY_DEVICE_COHERENT:
- put_dev_pagemap(folio->page.pgmap);
+ put_dev_pagemap(folio->pgmap);
break;
case MEMORY_DEVICE_FS_DAX:
@@ -514,7 +514,7 @@ void zone_device_page_init(struct page *page)
* Drivers shouldn't be allocating pages after calling
* memunmap_pages().
*/
- WARN_ON_ONCE(!percpu_ref_tryget_live(&page->pgmap->ref));
+ WARN_ON_ONCE(!percpu_ref_tryget_live(&page_dev_pagemap(page)->ref));
set_page_count(page, 1);
lock_page(page);
}
@@ -523,7 +523,7 @@ EXPORT_SYMBOL_GPL(zone_device_page_init);
#ifdef CONFIG_FS_DAX
bool __put_devmap_managed_folio_refs(struct folio *folio, int refs)
{
- if (folio->page.pgmap->type != MEMORY_DEVICE_FS_DAX)
+ if (folio->pgmap->type != MEMORY_DEVICE_FS_DAX)
return false;
/*
diff --git a/mm/migrate_device.c b/mm/migrate_device.c
index 6d66dc1..9d30107 100644
--- a/mm/migrate_device.c
+++ b/mm/migrate_device.c
@@ -106,6 +106,7 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
arch_enter_lazy_mmu_mode();
for (; addr < end; addr += PAGE_SIZE, ptep++) {
+ struct dev_pagemap *pgmap;
unsigned long mpfn = 0, pfn;
struct folio *folio;
struct page *page;
@@ -133,9 +134,10 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
goto next;
page = pfn_swap_entry_to_page(entry);
+ pgmap = page_dev_pagemap(page);
if (!(migrate->flags &
MIGRATE_VMA_SELECT_DEVICE_PRIVATE) ||
- page->pgmap->owner != migrate->pgmap_owner)
+ pgmap->owner != migrate->pgmap_owner)
goto next;
mpfn = migrate_pfn(page_to_pfn(page)) |
@@ -151,12 +153,13 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
goto next;
}
page = vm_normal_page(migrate->vma, addr, pte);
+ pgmap = page_dev_pagemap(page);
if (page && !is_zone_device_page(page) &&
!(migrate->flags & MIGRATE_VMA_SELECT_SYSTEM))
goto next;
else if (page && is_device_coherent_page(page) &&
(!(migrate->flags & MIGRATE_VMA_SELECT_DEVICE_COHERENT) ||
- page->pgmap->owner != migrate->pgmap_owner))
+ pgmap->owner != migrate->pgmap_owner))
goto next;
mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE;
mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0;
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 0489820..3d0611e 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -996,7 +996,7 @@ static void __ref __init_zone_device_page(struct page *page, unsigned long pfn,
* and zone_device_data. It is a bug if a ZONE_DEVICE page is
* ever freed or placed on a driver-private list.
*/
- page->pgmap = pgmap;
+ page_folio(page)->pgmap = pgmap;
page->zone_device_data = NULL;
/*
--
git-series 0.9.1
Alistair Popple wrote: > Zone device pages are used to represent various type of device memory > managed by device drivers. Currently compound zone device pages are > not supported. This is because MEMORY_DEVICE_FS_DAX pages are the only > user of higher order zone device pages and have their own page > reference counting. > > A future change will unify FS DAX reference counting with normal page > reference counting rules and remove the special FS DAX reference > counting. Supporting that requires compound zone device pages. > > Supporting compound zone device pages requires compound_head() to > distinguish between head and tail pages whilst still preserving the > special struct page fields that are specific to zone device pages. > > A tail page is distinguished by having bit zero being set in > page->compound_head, with the remaining bits pointing to the head > page. For zone device pages page->compound_head is shared with > page->pgmap. > > The page->pgmap field is common to all pages within a memory section. > Therefore pgmap is the same for both head and tail pages and can be > moved into the folio and we can use the standard scheme to find > compound_head from a tail page. > > Signed-off-by: Alistair Popple <apopple@nvidia.com> > Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> > > --- > > Changes since v1: > > - Move pgmap to the folio as suggested by Matthew Wilcox > --- > drivers/gpu/drm/nouveau/nouveau_dmem.c | 3 ++- > drivers/pci/p2pdma.c | 6 +++--- > include/linux/memremap.h | 6 +++--- > include/linux/migrate.h | 4 ++-- > include/linux/mm_types.h | 9 +++++++-- > include/linux/mmzone.h | 8 +++++++- > lib/test_hmm.c | 3 ++- > mm/hmm.c | 2 +- > mm/memory.c | 4 +++- > mm/memremap.c | 14 +++++++------- > mm/migrate_device.c | 7 +++++-- > mm/mm_init.c | 2 +- > 12 files changed, 43 insertions(+), 25 deletions(-) > > diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c > index 6fb65b0..58d308c 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c > +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c > @@ -88,7 +88,8 @@ struct nouveau_dmem { > > static struct nouveau_dmem_chunk *nouveau_page_to_chunk(struct page *page) > { > - return container_of(page->pgmap, struct nouveau_dmem_chunk, pagemap); > + return container_of(page_dev_pagemap(page), struct nouveau_dmem_chunk, page_dev_pagemap() feels like a mouthful. I would be ok with page_pgmap() since that is the most common idenifier for struct struct dev_pagemap instances. > + pagemap); > } > > static struct nouveau_drm *page_to_drm(struct page *page) > diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c > index 210b9f4..a58f2c1 100644 > --- a/drivers/pci/p2pdma.c > +++ b/drivers/pci/p2pdma.c > @@ -199,7 +199,7 @@ static const struct attribute_group p2pmem_group = { > > static void p2pdma_page_free(struct page *page) > { > - struct pci_p2pdma_pagemap *pgmap = to_p2p_pgmap(page->pgmap); > + struct pci_p2pdma_pagemap *pgmap = to_p2p_pgmap(page_dev_pagemap(page)); > /* safe to dereference while a reference is held to the percpu ref */ > struct pci_p2pdma *p2pdma = > rcu_dereference_protected(pgmap->provider->p2pdma, 1); > @@ -1022,8 +1022,8 @@ enum pci_p2pdma_map_type > pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev, > struct scatterlist *sg) > { > - if (state->pgmap != sg_page(sg)->pgmap) { > - state->pgmap = sg_page(sg)->pgmap; > + if (state->pgmap != page_dev_pagemap(sg_page(sg))) { > + state->pgmap = page_dev_pagemap(sg_page(sg)); > state->map = pci_p2pdma_map_type(state->pgmap, dev); > state->bus_off = to_p2p_pgmap(state->pgmap)->bus_offset; > } > diff --git a/include/linux/memremap.h b/include/linux/memremap.h > index 3f7143a..14273e6 100644 > --- a/include/linux/memremap.h > +++ b/include/linux/memremap.h > @@ -161,7 +161,7 @@ static inline bool is_device_private_page(const struct page *page) > { > return IS_ENABLED(CONFIG_DEVICE_PRIVATE) && > is_zone_device_page(page) && > - page->pgmap->type == MEMORY_DEVICE_PRIVATE; > + page_dev_pagemap(page)->type == MEMORY_DEVICE_PRIVATE; > } > > static inline bool folio_is_device_private(const struct folio *folio) > @@ -173,13 +173,13 @@ static inline bool is_pci_p2pdma_page(const struct page *page) > { > return IS_ENABLED(CONFIG_PCI_P2PDMA) && > is_zone_device_page(page) && > - page->pgmap->type == MEMORY_DEVICE_PCI_P2PDMA; > + page_dev_pagemap(page)->type == MEMORY_DEVICE_PCI_P2PDMA; > } > > static inline bool is_device_coherent_page(const struct page *page) > { > return is_zone_device_page(page) && > - page->pgmap->type == MEMORY_DEVICE_COHERENT; > + page_dev_pagemap(page)->type == MEMORY_DEVICE_COHERENT; > } > > static inline bool folio_is_device_coherent(const struct folio *folio) > diff --git a/include/linux/migrate.h b/include/linux/migrate.h > index 002e49b..9a85a82 100644 > --- a/include/linux/migrate.h > +++ b/include/linux/migrate.h > @@ -207,8 +207,8 @@ struct migrate_vma { > unsigned long end; > > /* > - * Set to the owner value also stored in page->pgmap->owner for > - * migrating out of device private memory. The flags also need to > + * Set to the owner value also stored in page_dev_pagemap(page)->owner > + * for migrating out of device private memory. The flags also need to > * be set to MIGRATE_VMA_SELECT_DEVICE_PRIVATE. > * The caller should always set this field when using mmu notifier > * callbacks to avoid device MMU invalidations for device private > diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h > index 6e3bdf8..c2f1d53 100644 > --- a/include/linux/mm_types.h > +++ b/include/linux/mm_types.h > @@ -129,8 +129,11 @@ struct page { > unsigned long compound_head; /* Bit zero is set */ > }; > struct { /* ZONE_DEVICE pages */ > - /** @pgmap: Points to the hosting device page map. */ > - struct dev_pagemap *pgmap; > + /* > + * The first word is used for compound_head or folio > + * pgmap > + */ > + void *_unused; I would feel better with "_unused_pgmap_compound_head", similar to how _unused_slab_obj_exts in 'struct foio' indicates the placeholer contents. > void *zone_device_data; > /* > * ZONE_DEVICE private pages are counted as being > @@ -299,6 +302,7 @@ typedef struct { > * @_refcount: Do not access this member directly. Use folio_ref_count() > * to find how many references there are to this folio. > * @memcg_data: Memory Control Group data. > + * @pgmap: Metadata for ZONE_DEVICE mappings > * @virtual: Virtual address in the kernel direct map. > * @_last_cpupid: IDs of last CPU and last process that accessed the folio. > * @_entire_mapcount: Do not use directly, call folio_entire_mapcount(). > @@ -337,6 +341,7 @@ struct folio { > /* private: */ > }; > /* public: */ > + struct dev_pagemap *pgmap; > }; > struct address_space *mapping; > pgoff_t index; > diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h > index 17506e4..e191434 100644 > --- a/include/linux/mmzone.h > +++ b/include/linux/mmzone.h > @@ -1134,6 +1134,12 @@ static inline bool is_zone_device_page(const struct page *page) > return page_zonenum(page) == ZONE_DEVICE; > } > > +static inline struct dev_pagemap *page_dev_pagemap(const struct page *page) > +{ > + WARN_ON(!is_zone_device_page(page)); VM_WARN_ON()? With the above fixups: Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Hi Alistair, kernel test robot noticed the following build errors: [auto build test ERROR on 6f1833b8208c3b9e59eff10792667b6639365146] url: https://github.com/intel-lab-lkp/linux/commits/Alistair-Popple/mm-gup-c-Remove-redundant-check-for-PCI-P2PDMA-page/20240910-121806 base: 6f1833b8208c3b9e59eff10792667b6639365146 patch link: https://lore.kernel.org/r/c7026449473790e2844bb82012216c57047c7639.1725941415.git-series.apopple%40nvidia.com patch subject: [PATCH 04/12] mm: Allow compound zone device pages config: csky-defconfig (https://download.01.org/0day-ci/archive/20240912/202409122024.PPIwP6vb-lkp@intel.com/config) compiler: csky-linux-gcc (GCC) 14.1.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240912/202409122024.PPIwP6vb-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202409122024.PPIwP6vb-lkp@intel.com/ All error/warnings (new ones prefixed by >>): In file included from include/linux/mm.h:32, from mm/gup.c:7: include/linux/memremap.h: In function 'is_device_private_page': include/linux/memremap.h:164:17: error: implicit declaration of function 'page_dev_pagemap' [-Wimplicit-function-declaration] 164 | page_dev_pagemap(page)->type == MEMORY_DEVICE_PRIVATE; | ^~~~~~~~~~~~~~~~ include/linux/memremap.h:164:39: error: invalid type argument of '->' (have 'int') 164 | page_dev_pagemap(page)->type == MEMORY_DEVICE_PRIVATE; | ^~ include/linux/memremap.h: In function 'is_pci_p2pdma_page': include/linux/memremap.h:176:39: error: invalid type argument of '->' (have 'int') 176 | page_dev_pagemap(page)->type == MEMORY_DEVICE_PCI_P2PDMA; | ^~ include/linux/memremap.h: In function 'is_device_coherent_page': include/linux/memremap.h:182:39: error: invalid type argument of '->' (have 'int') 182 | page_dev_pagemap(page)->type == MEMORY_DEVICE_COHERENT; | ^~ include/linux/memremap.h: In function 'is_pci_p2pdma_page': >> include/linux/memremap.h:177:1: warning: control reaches end of non-void function [-Wreturn-type] 177 | } | ^ include/linux/memremap.h: In function 'is_device_coherent_page': include/linux/memremap.h:183:1: warning: control reaches end of non-void function [-Wreturn-type] 183 | } | ^ -- In file included from include/linux/mm.h:32, from mm/memory.c:44: include/linux/memremap.h: In function 'is_device_private_page': include/linux/memremap.h:164:17: error: implicit declaration of function 'page_dev_pagemap' [-Wimplicit-function-declaration] 164 | page_dev_pagemap(page)->type == MEMORY_DEVICE_PRIVATE; | ^~~~~~~~~~~~~~~~ include/linux/memremap.h:164:39: error: invalid type argument of '->' (have 'int') 164 | page_dev_pagemap(page)->type == MEMORY_DEVICE_PRIVATE; | ^~ include/linux/memremap.h: In function 'is_pci_p2pdma_page': include/linux/memremap.h:176:39: error: invalid type argument of '->' (have 'int') 176 | page_dev_pagemap(page)->type == MEMORY_DEVICE_PCI_P2PDMA; | ^~ include/linux/memremap.h: In function 'is_device_coherent_page': include/linux/memremap.h:182:39: error: invalid type argument of '->' (have 'int') 182 | page_dev_pagemap(page)->type == MEMORY_DEVICE_COHERENT; | ^~ mm/memory.c: In function 'do_swap_page': >> mm/memory.c:4052:31: error: assignment to 'struct dev_pagemap *' from 'int' makes pointer from integer without a cast [-Wint-conversion] 4052 | pgmap = page_dev_pagemap(vmf->page); | ^ include/linux/memremap.h: In function 'is_device_private_page': include/linux/memremap.h:165:1: warning: control reaches end of non-void function [-Wreturn-type] 165 | } | ^ vim +4052 mm/memory.c 3988 3989 /* 3990 * We enter with non-exclusive mmap_lock (to exclude vma changes, 3991 * but allow concurrent faults), and pte mapped but not yet locked. 3992 * We return with pte unmapped and unlocked. 3993 * 3994 * We return with the mmap_lock locked or unlocked in the same cases 3995 * as does filemap_fault(). 3996 */ 3997 vm_fault_t do_swap_page(struct vm_fault *vmf) 3998 { 3999 struct vm_area_struct *vma = vmf->vma; 4000 struct folio *swapcache, *folio = NULL; 4001 struct page *page; 4002 struct swap_info_struct *si = NULL; 4003 rmap_t rmap_flags = RMAP_NONE; 4004 bool need_clear_cache = false; 4005 bool exclusive = false; 4006 swp_entry_t entry; 4007 pte_t pte; 4008 vm_fault_t ret = 0; 4009 void *shadow = NULL; 4010 int nr_pages; 4011 unsigned long page_idx; 4012 unsigned long address; 4013 pte_t *ptep; 4014 4015 if (!pte_unmap_same(vmf)) 4016 goto out; 4017 4018 entry = pte_to_swp_entry(vmf->orig_pte); 4019 if (unlikely(non_swap_entry(entry))) { 4020 if (is_migration_entry(entry)) { 4021 migration_entry_wait(vma->vm_mm, vmf->pmd, 4022 vmf->address); 4023 } else if (is_device_exclusive_entry(entry)) { 4024 vmf->page = pfn_swap_entry_to_page(entry); 4025 ret = remove_device_exclusive_entry(vmf); 4026 } else if (is_device_private_entry(entry)) { 4027 struct dev_pagemap *pgmap; 4028 if (vmf->flags & FAULT_FLAG_VMA_LOCK) { 4029 /* 4030 * migrate_to_ram is not yet ready to operate 4031 * under VMA lock. 4032 */ 4033 vma_end_read(vma); 4034 ret = VM_FAULT_RETRY; 4035 goto out; 4036 } 4037 4038 vmf->page = pfn_swap_entry_to_page(entry); 4039 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, 4040 vmf->address, &vmf->ptl); 4041 if (unlikely(!vmf->pte || 4042 !pte_same(ptep_get(vmf->pte), 4043 vmf->orig_pte))) 4044 goto unlock; 4045 4046 /* 4047 * Get a page reference while we know the page can't be 4048 * freed. 4049 */ 4050 get_page(vmf->page); 4051 pte_unmap_unlock(vmf->pte, vmf->ptl); > 4052 pgmap = page_dev_pagemap(vmf->page); 4053 ret = pgmap->ops->migrate_to_ram(vmf); 4054 put_page(vmf->page); 4055 } else if (is_hwpoison_entry(entry)) { 4056 ret = VM_FAULT_HWPOISON; 4057 } else if (is_pte_marker_entry(entry)) { 4058 ret = handle_pte_marker(vmf); 4059 } else { 4060 print_bad_pte(vma, vmf->address, vmf->orig_pte, NULL); 4061 ret = VM_FAULT_SIGBUS; 4062 } 4063 goto out; 4064 } 4065 4066 /* Prevent swapoff from happening to us. */ 4067 si = get_swap_device(entry); 4068 if (unlikely(!si)) 4069 goto out; 4070 4071 folio = swap_cache_get_folio(entry, vma, vmf->address); 4072 if (folio) 4073 page = folio_file_page(folio, swp_offset(entry)); 4074 swapcache = folio; 4075 4076 if (!folio) { 4077 if (data_race(si->flags & SWP_SYNCHRONOUS_IO) && 4078 __swap_count(entry) == 1) { 4079 /* 4080 * Prevent parallel swapin from proceeding with 4081 * the cache flag. Otherwise, another thread may 4082 * finish swapin first, free the entry, and swapout 4083 * reusing the same entry. It's undetectable as 4084 * pte_same() returns true due to entry reuse. 4085 */ 4086 if (swapcache_prepare(entry, 1)) { 4087 /* Relax a bit to prevent rapid repeated page faults */ 4088 schedule_timeout_uninterruptible(1); 4089 goto out; 4090 } 4091 need_clear_cache = true; 4092 4093 /* skip swapcache */ 4094 folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, 4095 vma, vmf->address, false); 4096 if (folio) { 4097 __folio_set_locked(folio); 4098 __folio_set_swapbacked(folio); 4099 4100 if (mem_cgroup_swapin_charge_folio(folio, 4101 vma->vm_mm, GFP_KERNEL, 4102 entry)) { 4103 ret = VM_FAULT_OOM; 4104 goto out_page; 4105 } 4106 mem_cgroup_swapin_uncharge_swap(entry); 4107 4108 shadow = get_shadow_from_swap_cache(entry); 4109 if (shadow) 4110 workingset_refault(folio, shadow); 4111 4112 folio_add_lru(folio); 4113 4114 /* To provide entry to swap_read_folio() */ 4115 folio->swap = entry; 4116 swap_read_folio(folio, NULL); 4117 folio->private = NULL; 4118 } 4119 } else { 4120 folio = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE, 4121 vmf); 4122 swapcache = folio; 4123 } 4124 4125 if (!folio) { 4126 /* 4127 * Back out if somebody else faulted in this pte 4128 * while we released the pte lock. 4129 */ 4130 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, 4131 vmf->address, &vmf->ptl); 4132 if (likely(vmf->pte && 4133 pte_same(ptep_get(vmf->pte), vmf->orig_pte))) 4134 ret = VM_FAULT_OOM; 4135 goto unlock; 4136 } 4137 4138 /* Had to read the page from swap area: Major fault */ 4139 ret = VM_FAULT_MAJOR; 4140 count_vm_event(PGMAJFAULT); 4141 count_memcg_event_mm(vma->vm_mm, PGMAJFAULT); 4142 page = folio_file_page(folio, swp_offset(entry)); 4143 } else if (PageHWPoison(page)) { 4144 /* 4145 * hwpoisoned dirty swapcache pages are kept for killing 4146 * owner processes (which may be unknown at hwpoison time) 4147 */ 4148 ret = VM_FAULT_HWPOISON; 4149 goto out_release; 4150 } 4151 4152 ret |= folio_lock_or_retry(folio, vmf); 4153 if (ret & VM_FAULT_RETRY) 4154 goto out_release; 4155 4156 if (swapcache) { 4157 /* 4158 * Make sure folio_free_swap() or swapoff did not release the 4159 * swapcache from under us. The page pin, and pte_same test 4160 * below, are not enough to exclude that. Even if it is still 4161 * swapcache, we need to check that the page's swap has not 4162 * changed. 4163 */ 4164 if (unlikely(!folio_test_swapcache(folio) || 4165 page_swap_entry(page).val != entry.val)) 4166 goto out_page; 4167 4168 /* 4169 * KSM sometimes has to copy on read faults, for example, if 4170 * page->index of !PageKSM() pages would be nonlinear inside the 4171 * anon VMA -- PageKSM() is lost on actual swapout. 4172 */ 4173 folio = ksm_might_need_to_copy(folio, vma, vmf->address); 4174 if (unlikely(!folio)) { 4175 ret = VM_FAULT_OOM; 4176 folio = swapcache; 4177 goto out_page; 4178 } else if (unlikely(folio == ERR_PTR(-EHWPOISON))) { 4179 ret = VM_FAULT_HWPOISON; 4180 folio = swapcache; 4181 goto out_page; 4182 } 4183 if (folio != swapcache) 4184 page = folio_page(folio, 0); 4185 4186 /* 4187 * If we want to map a page that's in the swapcache writable, we 4188 * have to detect via the refcount if we're really the exclusive 4189 * owner. Try removing the extra reference from the local LRU 4190 * caches if required. 4191 */ 4192 if ((vmf->flags & FAULT_FLAG_WRITE) && folio == swapcache && 4193 !folio_test_ksm(folio) && !folio_test_lru(folio)) 4194 lru_add_drain(); 4195 } 4196 4197 folio_throttle_swaprate(folio, GFP_KERNEL); 4198 4199 /* 4200 * Back out if somebody else already faulted in this pte. 4201 */ 4202 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, 4203 &vmf->ptl); 4204 if (unlikely(!vmf->pte || !pte_same(ptep_get(vmf->pte), vmf->orig_pte))) 4205 goto out_nomap; 4206 4207 if (unlikely(!folio_test_uptodate(folio))) { 4208 ret = VM_FAULT_SIGBUS; 4209 goto out_nomap; 4210 } 4211 4212 nr_pages = 1; 4213 page_idx = 0; 4214 address = vmf->address; 4215 ptep = vmf->pte; 4216 if (folio_test_large(folio) && folio_test_swapcache(folio)) { 4217 int nr = folio_nr_pages(folio); 4218 unsigned long idx = folio_page_idx(folio, page); 4219 unsigned long folio_start = address - idx * PAGE_SIZE; 4220 unsigned long folio_end = folio_start + nr * PAGE_SIZE; 4221 pte_t *folio_ptep; 4222 pte_t folio_pte; 4223 4224 if (unlikely(folio_start < max(address & PMD_MASK, vma->vm_start))) 4225 goto check_folio; 4226 if (unlikely(folio_end > pmd_addr_end(address, vma->vm_end))) 4227 goto check_folio; 4228 4229 folio_ptep = vmf->pte - idx; 4230 folio_pte = ptep_get(folio_ptep); 4231 if (!pte_same(folio_pte, pte_move_swp_offset(vmf->orig_pte, -idx)) || 4232 swap_pte_batch(folio_ptep, nr, folio_pte) != nr) 4233 goto check_folio; 4234 4235 page_idx = idx; 4236 address = folio_start; 4237 ptep = folio_ptep; 4238 nr_pages = nr; 4239 entry = folio->swap; 4240 page = &folio->page; 4241 } 4242 4243 check_folio: 4244 /* 4245 * PG_anon_exclusive reuses PG_mappedtodisk for anon pages. A swap pte 4246 * must never point at an anonymous page in the swapcache that is 4247 * PG_anon_exclusive. Sanity check that this holds and especially, that 4248 * no filesystem set PG_mappedtodisk on a page in the swapcache. Sanity 4249 * check after taking the PT lock and making sure that nobody 4250 * concurrently faulted in this page and set PG_anon_exclusive. 4251 */ 4252 BUG_ON(!folio_test_anon(folio) && folio_test_mappedtodisk(folio)); 4253 BUG_ON(folio_test_anon(folio) && PageAnonExclusive(page)); 4254 4255 /* 4256 * Check under PT lock (to protect against concurrent fork() sharing 4257 * the swap entry concurrently) for certainly exclusive pages. 4258 */ 4259 if (!folio_test_ksm(folio)) { 4260 exclusive = pte_swp_exclusive(vmf->orig_pte); 4261 if (folio != swapcache) { 4262 /* 4263 * We have a fresh page that is not exposed to the 4264 * swapcache -> certainly exclusive. 4265 */ 4266 exclusive = true; 4267 } else if (exclusive && folio_test_writeback(folio) && 4268 data_race(si->flags & SWP_STABLE_WRITES)) { 4269 /* 4270 * This is tricky: not all swap backends support 4271 * concurrent page modifications while under writeback. 4272 * 4273 * So if we stumble over such a page in the swapcache 4274 * we must not set the page exclusive, otherwise we can 4275 * map it writable without further checks and modify it 4276 * while still under writeback. 4277 * 4278 * For these problematic swap backends, simply drop the 4279 * exclusive marker: this is perfectly fine as we start 4280 * writeback only if we fully unmapped the page and 4281 * there are no unexpected references on the page after 4282 * unmapping succeeded. After fully unmapped, no 4283 * further GUP references (FOLL_GET and FOLL_PIN) can 4284 * appear, so dropping the exclusive marker and mapping 4285 * it only R/O is fine. 4286 */ 4287 exclusive = false; 4288 } 4289 } 4290 4291 /* 4292 * Some architectures may have to restore extra metadata to the page 4293 * when reading from swap. This metadata may be indexed by swap entry 4294 * so this must be called before swap_free(). 4295 */ 4296 arch_swap_restore(folio_swap(entry, folio), folio); 4297 4298 /* 4299 * Remove the swap entry and conditionally try to free up the swapcache. 4300 * We're already holding a reference on the page but haven't mapped it 4301 * yet. 4302 */ 4303 swap_free_nr(entry, nr_pages); 4304 if (should_try_to_free_swap(folio, vma, vmf->flags)) 4305 folio_free_swap(folio); 4306 4307 add_mm_counter(vma->vm_mm, MM_ANONPAGES, nr_pages); 4308 add_mm_counter(vma->vm_mm, MM_SWAPENTS, -nr_pages); 4309 pte = mk_pte(page, vma->vm_page_prot); 4310 if (pte_swp_soft_dirty(vmf->orig_pte)) 4311 pte = pte_mksoft_dirty(pte); 4312 if (pte_swp_uffd_wp(vmf->orig_pte)) 4313 pte = pte_mkuffd_wp(pte); 4314 4315 /* 4316 * Same logic as in do_wp_page(); however, optimize for pages that are 4317 * certainly not shared either because we just allocated them without 4318 * exposing them to the swapcache or because the swap entry indicates 4319 * exclusivity. 4320 */ 4321 if (!folio_test_ksm(folio) && 4322 (exclusive || folio_ref_count(folio) == 1)) { 4323 if ((vma->vm_flags & VM_WRITE) && !userfaultfd_pte_wp(vma, pte) && 4324 !pte_needs_soft_dirty_wp(vma, pte)) { 4325 pte = pte_mkwrite(pte, vma); 4326 if (vmf->flags & FAULT_FLAG_WRITE) { 4327 pte = pte_mkdirty(pte); 4328 vmf->flags &= ~FAULT_FLAG_WRITE; 4329 } 4330 } 4331 rmap_flags |= RMAP_EXCLUSIVE; 4332 } 4333 folio_ref_add(folio, nr_pages - 1); 4334 flush_icache_pages(vma, page, nr_pages); 4335 vmf->orig_pte = pte_advance_pfn(pte, page_idx); 4336 4337 /* ksm created a completely new copy */ 4338 if (unlikely(folio != swapcache && swapcache)) { 4339 folio_add_new_anon_rmap(folio, vma, address, RMAP_EXCLUSIVE); 4340 folio_add_lru_vma(folio, vma); 4341 } else if (!folio_test_anon(folio)) { 4342 /* 4343 * We currently only expect small !anon folios, which are either 4344 * fully exclusive or fully shared. If we ever get large folios 4345 * here, we have to be careful. 4346 */ 4347 VM_WARN_ON_ONCE(folio_test_large(folio)); 4348 VM_WARN_ON_FOLIO(!folio_test_locked(folio), folio); 4349 folio_add_new_anon_rmap(folio, vma, address, rmap_flags); 4350 } else { 4351 folio_add_anon_rmap_ptes(folio, page, nr_pages, vma, address, 4352 rmap_flags); 4353 } 4354 4355 VM_BUG_ON(!folio_test_anon(folio) || 4356 (pte_write(pte) && !PageAnonExclusive(page))); 4357 set_ptes(vma->vm_mm, address, ptep, pte, nr_pages); 4358 arch_do_swap_page_nr(vma->vm_mm, vma, address, 4359 pte, pte, nr_pages); 4360 4361 folio_unlock(folio); 4362 if (folio != swapcache && swapcache) { 4363 /* 4364 * Hold the lock to avoid the swap entry to be reused 4365 * until we take the PT lock for the pte_same() check 4366 * (to avoid false positives from pte_same). For 4367 * further safety release the lock after the swap_free 4368 * so that the swap count won't change under a 4369 * parallel locked swapcache. 4370 */ 4371 folio_unlock(swapcache); 4372 folio_put(swapcache); 4373 } 4374 4375 if (vmf->flags & FAULT_FLAG_WRITE) { 4376 ret |= do_wp_page(vmf); 4377 if (ret & VM_FAULT_ERROR) 4378 ret &= VM_FAULT_ERROR; 4379 goto out; 4380 } 4381 4382 /* No need to invalidate - it was non-present before */ 4383 update_mmu_cache_range(vmf, vma, address, ptep, nr_pages); 4384 unlock: 4385 if (vmf->pte) 4386 pte_unmap_unlock(vmf->pte, vmf->ptl); 4387 out: 4388 /* Clear the swap cache pin for direct swapin after PTL unlock */ 4389 if (need_clear_cache) 4390 swapcache_clear(si, entry, 1); 4391 if (si) 4392 put_swap_device(si); 4393 return ret; 4394 out_nomap: 4395 if (vmf->pte) 4396 pte_unmap_unlock(vmf->pte, vmf->ptl); 4397 out_page: 4398 folio_unlock(folio); 4399 out_release: 4400 folio_put(folio); 4401 if (folio != swapcache && swapcache) { 4402 folio_unlock(swapcache); 4403 folio_put(swapcache); 4404 } 4405 if (need_clear_cache) 4406 swapcache_clear(si, entry, 1); 4407 if (si) 4408 put_swap_device(si); 4409 return ret; 4410 } 4411 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
Hi Alistair, kernel test robot noticed the following build errors: [auto build test ERROR on 6f1833b8208c3b9e59eff10792667b6639365146] url: https://github.com/intel-lab-lkp/linux/commits/Alistair-Popple/mm-gup-c-Remove-redundant-check-for-PCI-P2PDMA-page/20240910-121806 base: 6f1833b8208c3b9e59eff10792667b6639365146 patch link: https://lore.kernel.org/r/c7026449473790e2844bb82012216c57047c7639.1725941415.git-series.apopple%40nvidia.com patch subject: [PATCH 04/12] mm: Allow compound zone device pages config: um-allnoconfig (https://download.01.org/0day-ci/archive/20240912/202409122055.AMlMSljd-lkp@intel.com/config) compiler: clang version 17.0.6 (https://github.com/llvm/llvm-project 6009708b4367171ccdbf4b5905cb6a803753fe18) reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240912/202409122055.AMlMSljd-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202409122055.AMlMSljd-lkp@intel.com/ All errors (new ones prefixed by >>): | ^ In file included from mm/memory.c:44: In file included from include/linux/mm.h:1106: In file included from include/linux/huge_mm.h:8: In file included from include/linux/fs.h:33: In file included from include/linux/percpu-rwsem.h:7: In file included from include/linux/rcuwait.h:6: In file included from include/linux/sched/signal.h:6: include/linux/signal.h:163:1: warning: array index 2 is past the end of the array (that has type 'unsigned long[2]') [-Warray-bounds] 163 | _SIG_SET_BINOP(sigandnsets, _sig_andn) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/signal.h:141:3: note: expanded from macro '_SIG_SET_BINOP' 141 | r->sig[2] = op(a2, b2); \ | ^ ~ arch/x86/include/asm/signal.h:24:2: note: array 'sig' declared here 24 | unsigned long sig[_NSIG_WORDS]; | ^ In file included from mm/memory.c:44: In file included from include/linux/mm.h:1106: In file included from include/linux/huge_mm.h:8: In file included from include/linux/fs.h:33: In file included from include/linux/percpu-rwsem.h:7: In file included from include/linux/rcuwait.h:6: In file included from include/linux/sched/signal.h:6: include/linux/signal.h:187:1: warning: array index 3 is past the end of the array (that has type 'unsigned long[2]') [-Warray-bounds] 187 | _SIG_SET_OP(signotset, _sig_not) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/signal.h:174:27: note: expanded from macro '_SIG_SET_OP' 174 | case 4: set->sig[3] = op(set->sig[3]); \ | ^ ~ include/linux/signal.h:186:24: note: expanded from macro '_sig_not' 186 | #define _sig_not(x) (~(x)) | ^ arch/x86/include/asm/signal.h:24:2: note: array 'sig' declared here 24 | unsigned long sig[_NSIG_WORDS]; | ^ In file included from mm/memory.c:44: In file included from include/linux/mm.h:1106: In file included from include/linux/huge_mm.h:8: In file included from include/linux/fs.h:33: In file included from include/linux/percpu-rwsem.h:7: In file included from include/linux/rcuwait.h:6: In file included from include/linux/sched/signal.h:6: include/linux/signal.h:187:1: warning: array index 3 is past the end of the array (that has type 'unsigned long[2]') [-Warray-bounds] 187 | _SIG_SET_OP(signotset, _sig_not) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/signal.h:174:10: note: expanded from macro '_SIG_SET_OP' 174 | case 4: set->sig[3] = op(set->sig[3]); \ | ^ ~ arch/x86/include/asm/signal.h:24:2: note: array 'sig' declared here 24 | unsigned long sig[_NSIG_WORDS]; | ^ In file included from mm/memory.c:44: In file included from include/linux/mm.h:1106: In file included from include/linux/huge_mm.h:8: In file included from include/linux/fs.h:33: In file included from include/linux/percpu-rwsem.h:7: In file included from include/linux/rcuwait.h:6: In file included from include/linux/sched/signal.h:6: include/linux/signal.h:187:1: warning: array index 2 is past the end of the array (that has type 'unsigned long[2]') [-Warray-bounds] 187 | _SIG_SET_OP(signotset, _sig_not) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/signal.h:175:20: note: expanded from macro '_SIG_SET_OP' 175 | set->sig[2] = op(set->sig[2]); \ | ^ ~ include/linux/signal.h:186:24: note: expanded from macro '_sig_not' 186 | #define _sig_not(x) (~(x)) | ^ arch/x86/include/asm/signal.h:24:2: note: array 'sig' declared here 24 | unsigned long sig[_NSIG_WORDS]; | ^ In file included from mm/memory.c:44: In file included from include/linux/mm.h:1106: In file included from include/linux/huge_mm.h:8: In file included from include/linux/fs.h:33: In file included from include/linux/percpu-rwsem.h:7: In file included from include/linux/rcuwait.h:6: In file included from include/linux/sched/signal.h:6: include/linux/signal.h:187:1: warning: array index 2 is past the end of the array (that has type 'unsigned long[2]') [-Warray-bounds] 187 | _SIG_SET_OP(signotset, _sig_not) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/signal.h:175:3: note: expanded from macro '_SIG_SET_OP' 175 | set->sig[2] = op(set->sig[2]); \ | ^ ~ arch/x86/include/asm/signal.h:24:2: note: array 'sig' declared here 24 | unsigned long sig[_NSIG_WORDS]; | ^ In file included from mm/memory.c:51: include/linux/mman.h:158:9: warning: division by zero is undefined [-Wdivision-by-zero] 158 | _calc_vm_trans(flags, MAP_SYNC, VM_SYNC ) | | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/mman.h:136:21: note: expanded from macro '_calc_vm_trans' 136 | : ((x) & (bit1)) / ((bit1) / (bit2)))) | ^ ~~~~~~~~~~~~~~~~~ include/linux/mman.h:159:9: warning: division by zero is undefined [-Wdivision-by-zero] 159 | _calc_vm_trans(flags, MAP_STACK, VM_NOHUGEPAGE) | | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/mman.h:136:21: note: expanded from macro '_calc_vm_trans' 136 | : ((x) & (bit1)) / ((bit1) / (bit2)))) | ^ ~~~~~~~~~~~~~~~~~ >> mm/memory.c:4052:12: error: call to undeclared function 'page_dev_pagemap'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration] 4052 | pgmap = page_dev_pagemap(vmf->page); | ^ >> mm/memory.c:4052:10: error: incompatible integer to pointer conversion assigning to 'struct dev_pagemap *' from 'int' [-Wint-conversion] 4052 | pgmap = page_dev_pagemap(vmf->page); | ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 42 warnings and 8 errors generated. vim +/page_dev_pagemap +4052 mm/memory.c 3988 3989 /* 3990 * We enter with non-exclusive mmap_lock (to exclude vma changes, 3991 * but allow concurrent faults), and pte mapped but not yet locked. 3992 * We return with pte unmapped and unlocked. 3993 * 3994 * We return with the mmap_lock locked or unlocked in the same cases 3995 * as does filemap_fault(). 3996 */ 3997 vm_fault_t do_swap_page(struct vm_fault *vmf) 3998 { 3999 struct vm_area_struct *vma = vmf->vma; 4000 struct folio *swapcache, *folio = NULL; 4001 struct page *page; 4002 struct swap_info_struct *si = NULL; 4003 rmap_t rmap_flags = RMAP_NONE; 4004 bool need_clear_cache = false; 4005 bool exclusive = false; 4006 swp_entry_t entry; 4007 pte_t pte; 4008 vm_fault_t ret = 0; 4009 void *shadow = NULL; 4010 int nr_pages; 4011 unsigned long page_idx; 4012 unsigned long address; 4013 pte_t *ptep; 4014 4015 if (!pte_unmap_same(vmf)) 4016 goto out; 4017 4018 entry = pte_to_swp_entry(vmf->orig_pte); 4019 if (unlikely(non_swap_entry(entry))) { 4020 if (is_migration_entry(entry)) { 4021 migration_entry_wait(vma->vm_mm, vmf->pmd, 4022 vmf->address); 4023 } else if (is_device_exclusive_entry(entry)) { 4024 vmf->page = pfn_swap_entry_to_page(entry); 4025 ret = remove_device_exclusive_entry(vmf); 4026 } else if (is_device_private_entry(entry)) { 4027 struct dev_pagemap *pgmap; 4028 if (vmf->flags & FAULT_FLAG_VMA_LOCK) { 4029 /* 4030 * migrate_to_ram is not yet ready to operate 4031 * under VMA lock. 4032 */ 4033 vma_end_read(vma); 4034 ret = VM_FAULT_RETRY; 4035 goto out; 4036 } 4037 4038 vmf->page = pfn_swap_entry_to_page(entry); 4039 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, 4040 vmf->address, &vmf->ptl); 4041 if (unlikely(!vmf->pte || 4042 !pte_same(ptep_get(vmf->pte), 4043 vmf->orig_pte))) 4044 goto unlock; 4045 4046 /* 4047 * Get a page reference while we know the page can't be 4048 * freed. 4049 */ 4050 get_page(vmf->page); 4051 pte_unmap_unlock(vmf->pte, vmf->ptl); > 4052 pgmap = page_dev_pagemap(vmf->page); 4053 ret = pgmap->ops->migrate_to_ram(vmf); 4054 put_page(vmf->page); 4055 } else if (is_hwpoison_entry(entry)) { 4056 ret = VM_FAULT_HWPOISON; 4057 } else if (is_pte_marker_entry(entry)) { 4058 ret = handle_pte_marker(vmf); 4059 } else { 4060 print_bad_pte(vma, vmf->address, vmf->orig_pte, NULL); 4061 ret = VM_FAULT_SIGBUS; 4062 } 4063 goto out; 4064 } 4065 4066 /* Prevent swapoff from happening to us. */ 4067 si = get_swap_device(entry); 4068 if (unlikely(!si)) 4069 goto out; 4070 4071 folio = swap_cache_get_folio(entry, vma, vmf->address); 4072 if (folio) 4073 page = folio_file_page(folio, swp_offset(entry)); 4074 swapcache = folio; 4075 4076 if (!folio) { 4077 if (data_race(si->flags & SWP_SYNCHRONOUS_IO) && 4078 __swap_count(entry) == 1) { 4079 /* 4080 * Prevent parallel swapin from proceeding with 4081 * the cache flag. Otherwise, another thread may 4082 * finish swapin first, free the entry, and swapout 4083 * reusing the same entry. It's undetectable as 4084 * pte_same() returns true due to entry reuse. 4085 */ 4086 if (swapcache_prepare(entry, 1)) { 4087 /* Relax a bit to prevent rapid repeated page faults */ 4088 schedule_timeout_uninterruptible(1); 4089 goto out; 4090 } 4091 need_clear_cache = true; 4092 4093 /* skip swapcache */ 4094 folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, 4095 vma, vmf->address, false); 4096 if (folio) { 4097 __folio_set_locked(folio); 4098 __folio_set_swapbacked(folio); 4099 4100 if (mem_cgroup_swapin_charge_folio(folio, 4101 vma->vm_mm, GFP_KERNEL, 4102 entry)) { 4103 ret = VM_FAULT_OOM; 4104 goto out_page; 4105 } 4106 mem_cgroup_swapin_uncharge_swap(entry); 4107 4108 shadow = get_shadow_from_swap_cache(entry); 4109 if (shadow) 4110 workingset_refault(folio, shadow); 4111 4112 folio_add_lru(folio); 4113 4114 /* To provide entry to swap_read_folio() */ 4115 folio->swap = entry; 4116 swap_read_folio(folio, NULL); 4117 folio->private = NULL; 4118 } 4119 } else { 4120 folio = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE, 4121 vmf); 4122 swapcache = folio; 4123 } 4124 4125 if (!folio) { 4126 /* 4127 * Back out if somebody else faulted in this pte 4128 * while we released the pte lock. 4129 */ 4130 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, 4131 vmf->address, &vmf->ptl); 4132 if (likely(vmf->pte && 4133 pte_same(ptep_get(vmf->pte), vmf->orig_pte))) 4134 ret = VM_FAULT_OOM; 4135 goto unlock; 4136 } 4137 4138 /* Had to read the page from swap area: Major fault */ 4139 ret = VM_FAULT_MAJOR; 4140 count_vm_event(PGMAJFAULT); 4141 count_memcg_event_mm(vma->vm_mm, PGMAJFAULT); 4142 page = folio_file_page(folio, swp_offset(entry)); 4143 } else if (PageHWPoison(page)) { 4144 /* 4145 * hwpoisoned dirty swapcache pages are kept for killing 4146 * owner processes (which may be unknown at hwpoison time) 4147 */ 4148 ret = VM_FAULT_HWPOISON; 4149 goto out_release; 4150 } 4151 4152 ret |= folio_lock_or_retry(folio, vmf); 4153 if (ret & VM_FAULT_RETRY) 4154 goto out_release; 4155 4156 if (swapcache) { 4157 /* 4158 * Make sure folio_free_swap() or swapoff did not release the 4159 * swapcache from under us. The page pin, and pte_same test 4160 * below, are not enough to exclude that. Even if it is still 4161 * swapcache, we need to check that the page's swap has not 4162 * changed. 4163 */ 4164 if (unlikely(!folio_test_swapcache(folio) || 4165 page_swap_entry(page).val != entry.val)) 4166 goto out_page; 4167 4168 /* 4169 * KSM sometimes has to copy on read faults, for example, if 4170 * page->index of !PageKSM() pages would be nonlinear inside the 4171 * anon VMA -- PageKSM() is lost on actual swapout. 4172 */ 4173 folio = ksm_might_need_to_copy(folio, vma, vmf->address); 4174 if (unlikely(!folio)) { 4175 ret = VM_FAULT_OOM; 4176 folio = swapcache; 4177 goto out_page; 4178 } else if (unlikely(folio == ERR_PTR(-EHWPOISON))) { 4179 ret = VM_FAULT_HWPOISON; 4180 folio = swapcache; 4181 goto out_page; 4182 } 4183 if (folio != swapcache) 4184 page = folio_page(folio, 0); 4185 4186 /* 4187 * If we want to map a page that's in the swapcache writable, we 4188 * have to detect via the refcount if we're really the exclusive 4189 * owner. Try removing the extra reference from the local LRU 4190 * caches if required. 4191 */ 4192 if ((vmf->flags & FAULT_FLAG_WRITE) && folio == swapcache && 4193 !folio_test_ksm(folio) && !folio_test_lru(folio)) 4194 lru_add_drain(); 4195 } 4196 4197 folio_throttle_swaprate(folio, GFP_KERNEL); 4198 4199 /* 4200 * Back out if somebody else already faulted in this pte. 4201 */ 4202 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, 4203 &vmf->ptl); 4204 if (unlikely(!vmf->pte || !pte_same(ptep_get(vmf->pte), vmf->orig_pte))) 4205 goto out_nomap; 4206 4207 if (unlikely(!folio_test_uptodate(folio))) { 4208 ret = VM_FAULT_SIGBUS; 4209 goto out_nomap; 4210 } 4211 4212 nr_pages = 1; 4213 page_idx = 0; 4214 address = vmf->address; 4215 ptep = vmf->pte; 4216 if (folio_test_large(folio) && folio_test_swapcache(folio)) { 4217 int nr = folio_nr_pages(folio); 4218 unsigned long idx = folio_page_idx(folio, page); 4219 unsigned long folio_start = address - idx * PAGE_SIZE; 4220 unsigned long folio_end = folio_start + nr * PAGE_SIZE; 4221 pte_t *folio_ptep; 4222 pte_t folio_pte; 4223 4224 if (unlikely(folio_start < max(address & PMD_MASK, vma->vm_start))) 4225 goto check_folio; 4226 if (unlikely(folio_end > pmd_addr_end(address, vma->vm_end))) 4227 goto check_folio; 4228 4229 folio_ptep = vmf->pte - idx; 4230 folio_pte = ptep_get(folio_ptep); 4231 if (!pte_same(folio_pte, pte_move_swp_offset(vmf->orig_pte, -idx)) || 4232 swap_pte_batch(folio_ptep, nr, folio_pte) != nr) 4233 goto check_folio; 4234 4235 page_idx = idx; 4236 address = folio_start; 4237 ptep = folio_ptep; 4238 nr_pages = nr; 4239 entry = folio->swap; 4240 page = &folio->page; 4241 } 4242 4243 check_folio: 4244 /* 4245 * PG_anon_exclusive reuses PG_mappedtodisk for anon pages. A swap pte 4246 * must never point at an anonymous page in the swapcache that is 4247 * PG_anon_exclusive. Sanity check that this holds and especially, that 4248 * no filesystem set PG_mappedtodisk on a page in the swapcache. Sanity 4249 * check after taking the PT lock and making sure that nobody 4250 * concurrently faulted in this page and set PG_anon_exclusive. 4251 */ 4252 BUG_ON(!folio_test_anon(folio) && folio_test_mappedtodisk(folio)); 4253 BUG_ON(folio_test_anon(folio) && PageAnonExclusive(page)); 4254 4255 /* 4256 * Check under PT lock (to protect against concurrent fork() sharing 4257 * the swap entry concurrently) for certainly exclusive pages. 4258 */ 4259 if (!folio_test_ksm(folio)) { 4260 exclusive = pte_swp_exclusive(vmf->orig_pte); 4261 if (folio != swapcache) { 4262 /* 4263 * We have a fresh page that is not exposed to the 4264 * swapcache -> certainly exclusive. 4265 */ 4266 exclusive = true; 4267 } else if (exclusive && folio_test_writeback(folio) && 4268 data_race(si->flags & SWP_STABLE_WRITES)) { 4269 /* 4270 * This is tricky: not all swap backends support 4271 * concurrent page modifications while under writeback. 4272 * 4273 * So if we stumble over such a page in the swapcache 4274 * we must not set the page exclusive, otherwise we can 4275 * map it writable without further checks and modify it 4276 * while still under writeback. 4277 * 4278 * For these problematic swap backends, simply drop the 4279 * exclusive marker: this is perfectly fine as we start 4280 * writeback only if we fully unmapped the page and 4281 * there are no unexpected references on the page after 4282 * unmapping succeeded. After fully unmapped, no 4283 * further GUP references (FOLL_GET and FOLL_PIN) can 4284 * appear, so dropping the exclusive marker and mapping 4285 * it only R/O is fine. 4286 */ 4287 exclusive = false; 4288 } 4289 } 4290 4291 /* 4292 * Some architectures may have to restore extra metadata to the page 4293 * when reading from swap. This metadata may be indexed by swap entry 4294 * so this must be called before swap_free(). 4295 */ 4296 arch_swap_restore(folio_swap(entry, folio), folio); 4297 4298 /* 4299 * Remove the swap entry and conditionally try to free up the swapcache. 4300 * We're already holding a reference on the page but haven't mapped it 4301 * yet. 4302 */ 4303 swap_free_nr(entry, nr_pages); 4304 if (should_try_to_free_swap(folio, vma, vmf->flags)) 4305 folio_free_swap(folio); 4306 4307 add_mm_counter(vma->vm_mm, MM_ANONPAGES, nr_pages); 4308 add_mm_counter(vma->vm_mm, MM_SWAPENTS, -nr_pages); 4309 pte = mk_pte(page, vma->vm_page_prot); 4310 if (pte_swp_soft_dirty(vmf->orig_pte)) 4311 pte = pte_mksoft_dirty(pte); 4312 if (pte_swp_uffd_wp(vmf->orig_pte)) 4313 pte = pte_mkuffd_wp(pte); 4314 4315 /* 4316 * Same logic as in do_wp_page(); however, optimize for pages that are 4317 * certainly not shared either because we just allocated them without 4318 * exposing them to the swapcache or because the swap entry indicates 4319 * exclusivity. 4320 */ 4321 if (!folio_test_ksm(folio) && 4322 (exclusive || folio_ref_count(folio) == 1)) { 4323 if ((vma->vm_flags & VM_WRITE) && !userfaultfd_pte_wp(vma, pte) && 4324 !pte_needs_soft_dirty_wp(vma, pte)) { 4325 pte = pte_mkwrite(pte, vma); 4326 if (vmf->flags & FAULT_FLAG_WRITE) { 4327 pte = pte_mkdirty(pte); 4328 vmf->flags &= ~FAULT_FLAG_WRITE; 4329 } 4330 } 4331 rmap_flags |= RMAP_EXCLUSIVE; 4332 } 4333 folio_ref_add(folio, nr_pages - 1); 4334 flush_icache_pages(vma, page, nr_pages); 4335 vmf->orig_pte = pte_advance_pfn(pte, page_idx); 4336 4337 /* ksm created a completely new copy */ 4338 if (unlikely(folio != swapcache && swapcache)) { 4339 folio_add_new_anon_rmap(folio, vma, address, RMAP_EXCLUSIVE); 4340 folio_add_lru_vma(folio, vma); 4341 } else if (!folio_test_anon(folio)) { 4342 /* 4343 * We currently only expect small !anon folios, which are either 4344 * fully exclusive or fully shared. If we ever get large folios 4345 * here, we have to be careful. 4346 */ 4347 VM_WARN_ON_ONCE(folio_test_large(folio)); 4348 VM_WARN_ON_FOLIO(!folio_test_locked(folio), folio); 4349 folio_add_new_anon_rmap(folio, vma, address, rmap_flags); 4350 } else { 4351 folio_add_anon_rmap_ptes(folio, page, nr_pages, vma, address, 4352 rmap_flags); 4353 } 4354 4355 VM_BUG_ON(!folio_test_anon(folio) || 4356 (pte_write(pte) && !PageAnonExclusive(page))); 4357 set_ptes(vma->vm_mm, address, ptep, pte, nr_pages); 4358 arch_do_swap_page_nr(vma->vm_mm, vma, address, 4359 pte, pte, nr_pages); 4360 4361 folio_unlock(folio); 4362 if (folio != swapcache && swapcache) { 4363 /* 4364 * Hold the lock to avoid the swap entry to be reused 4365 * until we take the PT lock for the pte_same() check 4366 * (to avoid false positives from pte_same). For 4367 * further safety release the lock after the swap_free 4368 * so that the swap count won't change under a 4369 * parallel locked swapcache. 4370 */ 4371 folio_unlock(swapcache); 4372 folio_put(swapcache); 4373 } 4374 4375 if (vmf->flags & FAULT_FLAG_WRITE) { 4376 ret |= do_wp_page(vmf); 4377 if (ret & VM_FAULT_ERROR) 4378 ret &= VM_FAULT_ERROR; 4379 goto out; 4380 } 4381 4382 /* No need to invalidate - it was non-present before */ 4383 update_mmu_cache_range(vmf, vma, address, ptep, nr_pages); 4384 unlock: 4385 if (vmf->pte) 4386 pte_unmap_unlock(vmf->pte, vmf->ptl); 4387 out: 4388 /* Clear the swap cache pin for direct swapin after PTL unlock */ 4389 if (need_clear_cache) 4390 swapcache_clear(si, entry, 1); 4391 if (si) 4392 put_swap_device(si); 4393 return ret; 4394 out_nomap: 4395 if (vmf->pte) 4396 pte_unmap_unlock(vmf->pte, vmf->ptl); 4397 out_page: 4398 folio_unlock(folio); 4399 out_release: 4400 folio_put(folio); 4401 if (folio != swapcache && swapcache) { 4402 folio_unlock(swapcache); 4403 folio_put(swapcache); 4404 } 4405 if (need_clear_cache) 4406 swapcache_clear(si, entry, 1); 4407 if (si) 4408 put_swap_device(si); 4409 return ret; 4410 } 4411 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
On Tue, Sep 10, 2024 at 02:14:29PM +1000, Alistair Popple wrote: > @@ -337,6 +341,7 @@ struct folio { > /* private: */ > }; > /* public: */ > + struct dev_pagemap *pgmap; Shouldn't that be indented by one more tab stop? And for ease of reading, perhaps it should be placed either immediately before or after 'struct list_head lru;'? > +++ b/include/linux/mmzone.h > @@ -1134,6 +1134,12 @@ static inline bool is_zone_device_page(const struct page *page) > return page_zonenum(page) == ZONE_DEVICE; > } > > +static inline struct dev_pagemap *page_dev_pagemap(const struct page *page) > +{ > + WARN_ON(!is_zone_device_page(page)); > + return page_folio(page)->pgmap; > +} I haven't read to the end yet, but presumably we'll eventually want: static inline struct dev_pagemap *folio_dev_pagemap(const struct folio *folio) { WARN_ON(!folio_is_zone_device(folio)) return folio->pgmap; } and since we'll want it eventually, maybe now is the time to add it, and make page_dev_pagemap() simply call it?
Matthew Wilcox <willy@infradead.org> writes: > On Tue, Sep 10, 2024 at 02:14:29PM +1000, Alistair Popple wrote: >> @@ -337,6 +341,7 @@ struct folio { >> /* private: */ >> }; >> /* public: */ >> + struct dev_pagemap *pgmap; > > Shouldn't that be indented by one more tab stop? > > And for ease of reading, perhaps it should be placed either immediately > before or after 'struct list_head lru;'? > >> +++ b/include/linux/mmzone.h >> @@ -1134,6 +1134,12 @@ static inline bool is_zone_device_page(const struct page *page) >> return page_zonenum(page) == ZONE_DEVICE; >> } >> >> +static inline struct dev_pagemap *page_dev_pagemap(const struct page *page) >> +{ >> + WARN_ON(!is_zone_device_page(page)); >> + return page_folio(page)->pgmap; >> +} > > I haven't read to the end yet, but presumably we'll eventually want: > > static inline struct dev_pagemap *folio_dev_pagemap(const struct folio *folio) > { > WARN_ON(!folio_is_zone_device(folio)) > return folio->pgmap; > } > > and since we'll want it eventually, maybe now is the time to add it, > and make page_dev_pagemap() simply call it? Sounds reasonable. I had open-coded folio->pgmap where it's needed because at those points it's "obviously" a ZONE_DEVICE folio. Will add it.
On Tue, Sep 10, 2024 at 04:57:41PM +1000, Alistair Popple wrote: > > Matthew Wilcox <willy@infradead.org> writes: > > > On Tue, Sep 10, 2024 at 02:14:29PM +1000, Alistair Popple wrote: > >> @@ -337,6 +341,7 @@ struct folio { > >> /* private: */ > >> }; > >> /* public: */ > >> + struct dev_pagemap *pgmap; > > > > Shouldn't that be indented by one more tab stop? > > > > And for ease of reading, perhaps it should be placed either immediately > > before or after 'struct list_head lru;'? > > > >> +++ b/include/linux/mmzone.h > >> @@ -1134,6 +1134,12 @@ static inline bool is_zone_device_page(const struct page *page) > >> return page_zonenum(page) == ZONE_DEVICE; > >> } > >> > >> +static inline struct dev_pagemap *page_dev_pagemap(const struct page *page) > >> +{ > >> + WARN_ON(!is_zone_device_page(page)); > >> + return page_folio(page)->pgmap; > >> +} > > > > I haven't read to the end yet, but presumably we'll eventually want: > > > > static inline struct dev_pagemap *folio_dev_pagemap(const struct folio *folio) > > { > > WARN_ON(!folio_is_zone_device(folio)) > > return folio->pgmap; > > } > > > > and since we'll want it eventually, maybe now is the time to add it, > > and make page_dev_pagemap() simply call it? > > Sounds reasonable. I had open-coded folio->pgmap where it's needed > because at those points it's "obviously" a ZONE_DEVICE folio. Will add > it. Oh, if it's obvious then just do the dereference.
© 2016 - 2024 Red Hat, Inc.