A future change will remove device private pages from the physical
address space. This will mean that device private pages no longer have
pfns and must be handled separately.
When migrating a device private page a migration entry is created for
that page - this includes the pfn for that page. Once device private
pages begin using device memory offsets instead of pfns we will need to
be able to determine which kind of value is in the entry so we can
associate it with the correct page.
Introduce new swap types:
- SWP_MIGRATION_DEVICE_READ
- SWP_MIGRATION_DEVICE_WRITE
- SWP_MIGRATION_DEVICE_READ_EXCLUSIVE
These correspond to
- SWP_MIGRATION_READ
- SWP_MIGRATION_WRITE
- SWP_MIGRATION_READ_EXCLUSIVE
except the swap entry contains a device private offset.
The SWP_MIGRATION_DEVICE swap types are treated as specializations of
the SWP_MIGRATION types. That is, the existing helpers such as
is_writable_migration_entry() will still return true for a
SWP_MIGRATION_DEVICE_WRITE entry. Likewise, the
make_*__migration_entry_from_page() helpers will determine create either
a SWP_MIGRATION_DEVICE or a SWP_MIGRATION type as the page requires.
Introduce new helpers such as
is_writable_device_migration_private_entry() to disambiguate between a
SWP_MIGRATION_WRITE and a SWP_MIGRATION_DEVICE_WRITE entry.
Introduce corresponding softleaf types and helpers.
Signed-off-by: Jordan Niethe <jniethe@nvidia.com>
Signed-off-by: Alistair Popple <apopple@nvidia.com>
---
v1:
- Update for softleaf infrastructure
- Handle make_readable_migration_entry_from_page() and friends
- s/make_device_migration_readable_exclusive_migration_entry/make_readable_exclusive_migration_device_private_entry
- s/is_device_migration_readable_exclusive_entry/is_readable_exclusive_device_private_migration_entry/
v2:
- Add softleaf_is_migration_device_private_read()
---
include/linux/leafops.h | 86 +++++++++++++++++++++++++++++++++++++----
include/linux/swap.h | 8 +++-
include/linux/swapops.h | 79 +++++++++++++++++++++++++++++++++++++
3 files changed, 164 insertions(+), 9 deletions(-)
diff --git a/include/linux/leafops.h b/include/linux/leafops.h
index 2fde8208da13..2fa09ffe9e34 100644
--- a/include/linux/leafops.h
+++ b/include/linux/leafops.h
@@ -28,6 +28,9 @@ enum softleaf_type {
SOFTLEAF_DEVICE_PRIVATE_READ,
SOFTLEAF_DEVICE_PRIVATE_WRITE,
SOFTLEAF_DEVICE_EXCLUSIVE,
+ SOFTLEAF_MIGRATION_DEVICE_READ,
+ SOFTLEAF_MIGRATION_DEVICE_READ_EXCLUSIVE,
+ SOFTLEAF_MIGRATION_DEVICE_WRITE,
/* H/W posion types. */
SOFTLEAF_HWPOISON,
/* Marker types. */
@@ -165,6 +168,12 @@ static inline enum softleaf_type softleaf_type(softleaf_t entry)
return SOFTLEAF_DEVICE_PRIVATE_READ;
case SWP_DEVICE_EXCLUSIVE:
return SOFTLEAF_DEVICE_EXCLUSIVE;
+ case SWP_MIGRATION_DEVICE_READ:
+ return SOFTLEAF_MIGRATION_DEVICE_READ;
+ case SWP_MIGRATION_DEVICE_WRITE:
+ return SOFTLEAF_MIGRATION_DEVICE_WRITE;
+ case SWP_MIGRATION_DEVICE_READ_EXCLUSIVE:
+ return SOFTLEAF_MIGRATION_DEVICE_READ_EXCLUSIVE;
#endif
#ifdef CONFIG_MEMORY_FAILURE
case SWP_HWPOISON:
@@ -190,16 +199,75 @@ static inline bool softleaf_is_swap(softleaf_t entry)
return softleaf_type(entry) == SOFTLEAF_SWAP;
}
+/**
+ * softleaf_is_migration_device_private() - Is this leaf entry a migration
+ * device private entry?
+ * @entry: Leaf entry.
+ *
+ * Returns: true if the leaf entry is a device private entry, otherwise false.
+ */
+static inline bool softleaf_is_migration_device_private(softleaf_t entry)
+{
+ switch (softleaf_type(entry)) {
+ case SOFTLEAF_MIGRATION_DEVICE_READ:
+ case SOFTLEAF_MIGRATION_DEVICE_WRITE:
+ case SOFTLEAF_MIGRATION_DEVICE_READ_EXCLUSIVE:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/**
+ * softleaf_is_migration_device_private_write() - Is this leaf entry a writable
+ * device private migration entry?
+ * @entry: Leaf entry.
+ *
+ * Returns: true if the leaf entry is a writable device private migration entry,
+ * otherwise false.
+ */
+static inline bool softleaf_is_migration_device_private_write(softleaf_t entry)
+{
+ return softleaf_type(entry) == SOFTLEAF_MIGRATION_DEVICE_WRITE;
+}
+
+/**
+ * softleaf_is_migration_device_private_read() - Is this leaf entry a readable
+ * device private migration entry?
+ * @entry: Leaf entry.
+ *
+ * Returns: true if the leaf entry is an readable device private migration
+ * entry, otherwise false.
+ */
+static inline bool softleaf_is_migration_device_private_read(softleaf_t entry)
+{
+ return softleaf_type(entry) == SOFTLEAF_MIGRATION_DEVICE_READ;
+}
+
+/**
+ * softleaf_is_migration_read_exclusive() - Is this leaf entry an exclusive
+ * readable device private migration entry?
+ * @entry: Leaf entry.
+ *
+ * Returns: true if the leaf entry is an exclusive readable device private
+ * migration entry, otherwise false.
+ */
+static inline bool softleaf_is_migration_device_private_read_exclusive(softleaf_t entry)
+{
+ return softleaf_type(entry) == SOFTLEAF_MIGRATION_DEVICE_READ_EXCLUSIVE;
+}
+
/**
* softleaf_is_migration_write() - Is this leaf entry a writable migration entry?
* @entry: Leaf entry.
*
- * Returns: true if the leaf entry is a writable migration entry, otherwise
- * false.
+ * Returns: true if the leaf entry is a writable migration entry or a writable
+ * device private migration entry, otherwise false.
*/
static inline bool softleaf_is_migration_write(softleaf_t entry)
{
- return softleaf_type(entry) == SOFTLEAF_MIGRATION_WRITE;
+ return softleaf_type(entry) == SOFTLEAF_MIGRATION_WRITE ||
+ softleaf_is_migration_device_private_write(entry);
}
/**
@@ -211,7 +279,8 @@ static inline bool softleaf_is_migration_write(softleaf_t entry)
*/
static inline bool softleaf_is_migration_read(softleaf_t entry)
{
- return softleaf_type(entry) == SOFTLEAF_MIGRATION_READ;
+ return softleaf_type(entry) == SOFTLEAF_MIGRATION_READ ||
+ softleaf_is_migration_device_private_read(entry);
}
/**
@@ -219,12 +288,13 @@ static inline bool softleaf_is_migration_read(softleaf_t entry)
* readable migration entry?
* @entry: Leaf entry.
*
- * Returns: true if the leaf entry is an exclusive readable migration entry,
- * otherwise false.
+ * Returns: true if the leaf entry is an exclusive readable migration entry or
+ * exclusive readable device private migration entry, otherwise false.
*/
static inline bool softleaf_is_migration_read_exclusive(softleaf_t entry)
{
- return softleaf_type(entry) == SOFTLEAF_MIGRATION_READ_EXCLUSIVE;
+ return softleaf_type(entry) == SOFTLEAF_MIGRATION_READ_EXCLUSIVE ||
+ softleaf_is_migration_device_private_read_exclusive(entry);
}
/**
@@ -241,7 +311,7 @@ static inline bool softleaf_is_migration(softleaf_t entry)
case SOFTLEAF_MIGRATION_WRITE:
return true;
default:
- return false;
+ return softleaf_is_migration_device_private(entry);
}
}
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 38ca3df68716..c15e3b3067cd 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -74,12 +74,18 @@ static inline int current_is_kswapd(void)
*
* When a page is mapped by the device for exclusive access we set the CPU page
* table entries to a special SWP_DEVICE_EXCLUSIVE entry.
+ *
+ * Because device private pages do not use regular PFNs, special migration
+ * entries are also needed.
*/
#ifdef CONFIG_DEVICE_PRIVATE
-#define SWP_DEVICE_NUM 3
+#define SWP_DEVICE_NUM 6
#define SWP_DEVICE_WRITE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM)
#define SWP_DEVICE_READ (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+1)
#define SWP_DEVICE_EXCLUSIVE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+2)
+#define SWP_MIGRATION_DEVICE_READ (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+3)
+#define SWP_MIGRATION_DEVICE_READ_EXCLUSIVE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+4)
+#define SWP_MIGRATION_DEVICE_WRITE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+5)
#else
#define SWP_DEVICE_NUM 0
#endif
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index a9ad997bd5ec..bae76d3831fb 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -148,6 +148,43 @@ static inline swp_entry_t make_device_exclusive_entry(pgoff_t offset)
return swp_entry(SWP_DEVICE_EXCLUSIVE, offset);
}
+static inline swp_entry_t make_readable_migration_device_private_entry(pgoff_t offset)
+{
+ return swp_entry(SWP_MIGRATION_DEVICE_READ, offset);
+}
+
+static inline swp_entry_t make_writable_migration_device_private_entry(pgoff_t offset)
+{
+ return swp_entry(SWP_MIGRATION_DEVICE_WRITE, offset);
+}
+
+static inline bool is_device_private_migration_entry(swp_entry_t entry)
+{
+ return unlikely(swp_type(entry) == SWP_MIGRATION_DEVICE_READ ||
+ swp_type(entry) == SWP_MIGRATION_DEVICE_READ_EXCLUSIVE ||
+ swp_type(entry) == SWP_MIGRATION_DEVICE_WRITE);
+}
+
+static inline bool is_readable_device_migration_private_entry(swp_entry_t entry)
+{
+ return unlikely(swp_type(entry) == SWP_MIGRATION_DEVICE_READ);
+}
+
+static inline bool is_writable_device_migration_private_entry(swp_entry_t entry)
+{
+ return unlikely(swp_type(entry) == SWP_MIGRATION_DEVICE_WRITE);
+}
+
+static inline swp_entry_t make_readable_exclusive_migration_device_private_entry(pgoff_t offset)
+{
+ return swp_entry(SWP_MIGRATION_DEVICE_READ_EXCLUSIVE, offset);
+}
+
+static inline bool is_readable_exclusive_device_private_migration_entry(swp_entry_t entry)
+{
+ return swp_type(entry) == SWP_MIGRATION_DEVICE_READ_EXCLUSIVE;
+}
+
#else /* CONFIG_DEVICE_PRIVATE */
static inline swp_entry_t make_readable_device_private_entry(pgoff_t offset)
{
@@ -164,6 +201,36 @@ static inline swp_entry_t make_device_exclusive_entry(pgoff_t offset)
return swp_entry(0, 0);
}
+static inline swp_entry_t make_readable_migration_device_private_entry(pgoff_t offset)
+{
+ return swp_entry(0, 0);
+}
+
+static inline swp_entry_t make_writable_migration_device_private_entry(pgoff_t offset)
+{
+ return swp_entry(0, 0);
+}
+
+static inline bool is_device_private_migration_entry(swp_entry_t entry)
+{
+ return false;
+}
+
+static inline bool is_writable_device_migration_private_entry(swp_entry_t entry)
+{
+ return false;
+}
+
+static inline swp_entry_t make_readable_exclusive_migration_device_private_entry(pgoff_t offset)
+{
+ return swp_entry(0, 0);
+}
+
+static inline bool is_readable_exclusive_device_private_migration_entry(swp_entry_t entry)
+{
+ return false;
+}
+
#endif /* CONFIG_DEVICE_PRIVATE */
#ifdef CONFIG_MIGRATION
@@ -175,6 +242,10 @@ static inline swp_entry_t make_readable_migration_entry(pgoff_t offset)
static inline swp_entry_t make_readable_migration_entry_from_page(struct page *page, pgoff_t flags)
{
+ if (is_device_private_page(page))
+ return make_readable_migration_device_private_entry(
+ page_to_pfn(page) | flags);
+
return swp_entry(SWP_MIGRATION_READ, page_to_pfn(page) | flags);
}
@@ -186,6 +257,10 @@ static inline swp_entry_t make_readable_exclusive_migration_entry(pgoff_t offset
static inline swp_entry_t make_readable_exclusive_migration_entry_from_page(struct page *page,
pgoff_t flags)
{
+ if (is_device_private_page(page))
+ return make_readable_exclusive_migration_device_private_entry(
+ page_to_pfn(page) | flags);
+
return swp_entry(SWP_MIGRATION_READ_EXCLUSIVE, page_to_pfn(page) | flags);
}
@@ -197,6 +272,10 @@ static inline swp_entry_t make_writable_migration_entry(pgoff_t offset)
static inline swp_entry_t make_writable_migration_entry_from_page(struct page *page,
pgoff_t flags)
{
+ if (is_device_private_page(page))
+ return make_writable_migration_device_private_entry(
+ page_to_pfn(page) | flags);
+
return swp_entry(SWP_MIGRATION_WRITE, page_to_pfn(page) | flags);
}
--
2.34.1
Hi,
I am copying across this discussion that occurred on a resend of this
series on the intel-xe list for a wider audience [0].
[0]
https://lore.kernel.org/all/8bf680f0-94ed-4614-9ace-a081a6558460@nvidia.com/
On 9/1/26 06:08, Lorenzo Stoakes wrote:
>> @@ -28,6 +28,9 @@ enum softleaf_type {
>> SOFTLEAF_DEVICE_PRIVATE_READ,
>> SOFTLEAF_DEVICE_PRIVATE_WRITE,
>> SOFTLEAF_DEVICE_EXCLUSIVE,
>> + SOFTLEAF_MIGRATION_DEVICE_READ,
>> + SOFTLEAF_MIGRATION_DEVICE_READ_EXCLUSIVE,
>> + SOFTLEAF_MIGRATION_DEVICE_WRITE,
>
> I think these should be SOFTLEAF_MIGRATION_DEVICE_PRIVATE_xxx
>
> And I realise that's a mouthful 🙂
>
> But to be consistent with other naming, including
SOFTLEAF_DEVICE_PRIVATE_*.
Sure.
>
>
> I don't see any users of this, do you definitely use it? I mean
presumably you
> might in a subsequent patch, not checked.
>
> Otherwise I'd wrap it into softleaf_is_migration().
It gets used in the final patch of the series when we begin doing things
like:
static inline struct page *softleaf_to_page(softleaf_t entry)
{
struct page *page;
if (softleaf_is_migration_device_private(entry) ||
softleaf_is_device_private(entry))
page = device_private_entry_to_page(entry);
else
page = pfn_to_page(softleaf_to_pfn(entry));
>
>>
>> /**
>> @@ -211,7 +279,8 @@ static inline bool
softleaf_is_migration_write(softleaf_t entry)
>> */
>> static inline bool softleaf_is_migration_read(softleaf_t entry)
>> {
>
> For these ones that you are making sort of compound now, can you
please update
> the kdoc to reflect it? You've done it for others but not this one.
>
Sure, sorry I missed this one.
>> - return softleaf_type(entry) == SOFTLEAF_MIGRATION_READ;
>> + return softleaf_type(entry) == SOFTLEAF_MIGRATION_READ ||
>> + softleaf_is_migration_device_private_read(entry);
>> }
>>
>> /**
>> @@ -219,12 +288,13 @@ static inline bool
softleaf_is_migration_read(softleaf_t entry)
>> * readable migration entry?
>> * @entry: Leaf entry.
>> *
>> - * Returns: true if the leaf entry is an exclusive readable
migration entry,
>> - * otherwise false.
>> + * Returns: true if the leaf entry is an exclusive readable
migration entry or
>> + * exclusive readable device private migration entry, otherwise false.
>> */
>> static inline bool softleaf_is_migration_read_exclusive(softleaf_t
entry)
>> {
>> - return softleaf_type(entry) == SOFTLEAF_MIGRATION_READ_EXCLUSIVE;
>> + return softleaf_type(entry) == SOFTLEAF_MIGRATION_READ_EXCLUSIVE ||
>> + softleaf_is_migration_device_private_read_exclusive(entry);
>> }
>>
>> /**
>> @@ -241,7 +311,7 @@ static inline bool
softleaf_is_migration(softleaf_t entry)
>> case SOFTLEAF_MIGRATION_WRITE:
>> return true;
>> default:
>> - return false;
>> + return softleaf_is_migration_device_private(entry);
>> }
>> }
>
> So all of these above ^^^ are making it so you can't determine if an
entry is
> 'migration xxx' vs. 'migration xxx device private'. Is this ok?
>
> Does anything need to exclusively determine if something is a 'migration
> xxx'?
>
> If not then fine, but just wanted to check.
The new entry types have the following relationship:
SOFTLEAF_MIGRATION_DEVICE_READ is-a SOFTLEAF_MIGRATION_READ
SOFTLEAF_MIGRATION_READ !is-a SOFTLEAF_MIGRATION_DEVICE_READ
So it is remains possible the distinguish the types using the
softleaf_is_migration_device_private_read() check
In practice, the only reason for introducing this new type is so we know
when
we can not call pfn_to_page() on the swap entry offset. That is the only
time
that the difference matters.
Rather than introducing a new type we could accomplish this by adding a new
flag like SWP_MIG_DIRTY, SWP_MIG_YOUNG but my concern was how we handle the
!migration_entry_supports_ad() case.
>
>>
>> diff --git a/include/linux/swap.h b/include/linux/swap.h
>> index 38ca3df68716..c15e3b3067cd 100644
>> --- a/include/linux/swap.h
>> +++ b/include/linux/swap.h
>> @@ -74,12 +74,18 @@ static inline int current_is_kswapd(void)
>> *
>> * When a page is mapped by the device for exclusive access we set
the CPU page
>> * table entries to a special SWP_DEVICE_EXCLUSIVE entry.
>> + *
>> + * Because device private pages do not use regular PFNs, special
migration
>> + * entries are also needed.
>> */
>> #ifdef CONFIG_DEVICE_PRIVATE
>> -#define SWP_DEVICE_NUM 3
>> +#define SWP_DEVICE_NUM 6
>> #define SWP_DEVICE_WRITE
(MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM)
>> #define SWP_DEVICE_READ
(MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+1)
>> #define SWP_DEVICE_EXCLUSIVE
(MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+2)
>> +#define SWP_MIGRATION_DEVICE_READ
(MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+3)
>> +#define SWP_MIGRATION_DEVICE_READ_EXCLUSIVE
(MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+4)
>> +#define SWP_MIGRATION_DEVICE_WRITE
(MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+5)
>
> I've lost track on how many entries we have left, have you tested
this with a
> config that maximises the number?
Good point - let me double check that.
>
> I really hate how we do this by the way, that's another thing to
fix... 🙂
>
>> #else
>> #define SWP_DEVICE_NUM 0
>> #endif
>> diff --git a/include/linux/swapops.h b/include/linux/swapops.h
>> index a9ad997bd5ec..bae76d3831fb 100644
>> --- a/include/linux/swapops.h
>> +++ b/include/linux/swapops.h
>
> Since this is pure softleaf stuff can we please put it all in
leafops.h if
> possible? I know we already have some stuff here rather than there,
but this
> really doesn't seem to belong here.
The reason for adding the swapops.h entries to correspond with the softleaf
entries was it looked like the swapops were still required for making
the entries currently.
That is, there aren't softleaf equivalents to
make_readable_device_private_entry() and friends yet.
Would it be better if I introduced the swapops.h changes in a proceeding
patch?
>
>
> Cheers, Lorenzo
Thanks for reviewing.
Jordan.
© 2016 - 2026 Red Hat, Inc.