Switch over from rwlock_t to a atomic_t variable that takes
negative value when the page is under migration, or positive
values when the page is used by zsmalloc users (object map,
etc.) Using a rwsem per-zspage is a little too memory heavy,
a simple atomic_t should suffice, after all we only need to
mark zspage as either used-for-write or used-for-read. This
is needed to make zsmalloc preemtible in the future.
Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org>
---
mm/zsmalloc.c | 112 +++++++++++++++++++++++++++++---------------------
1 file changed, 66 insertions(+), 46 deletions(-)
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 817626a351f8..28a75bfbeaa6 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -257,6 +257,9 @@ static inline void free_zpdesc(struct zpdesc *zpdesc)
__free_page(page);
}
+#define ZS_PAGE_UNLOCKED 0
+#define ZS_PAGE_WRLOCKED -1
+
struct zspage {
struct {
unsigned int huge:HUGE_BITS;
@@ -269,7 +272,7 @@ struct zspage {
struct zpdesc *first_zpdesc;
struct list_head list; /* fullness list */
struct zs_pool *pool;
- rwlock_t lock;
+ atomic_t lock;
};
struct mapping_area {
@@ -290,11 +293,53 @@ static bool ZsHugePage(struct zspage *zspage)
return zspage->huge;
}
-static void migrate_lock_init(struct zspage *zspage);
-static void migrate_read_lock(struct zspage *zspage);
-static void migrate_read_unlock(struct zspage *zspage);
-static void migrate_write_lock(struct zspage *zspage);
-static void migrate_write_unlock(struct zspage *zspage);
+static void zspage_lock_init(struct zspage *zspage)
+{
+ atomic_set(&zspage->lock, ZS_PAGE_UNLOCKED);
+}
+
+static void zspage_read_lock(struct zspage *zspage)
+{
+ atomic_t *lock = &zspage->lock;
+ int old;
+
+ while (1) {
+ old = atomic_read(lock);
+ if (old == ZS_PAGE_WRLOCKED) {
+ cpu_relax();
+ continue;
+ }
+
+ if (atomic_cmpxchg(lock, old, old + 1) == old)
+ return;
+
+ cpu_relax();
+ }
+}
+
+static void zspage_read_unlock(struct zspage *zspage)
+{
+ atomic_dec(&zspage->lock);
+}
+
+static void zspage_write_lock(struct zspage *zspage)
+{
+ atomic_t *lock = &zspage->lock;
+ int old;
+
+ while (1) {
+ old = atomic_cmpxchg(lock, ZS_PAGE_UNLOCKED, ZS_PAGE_WRLOCKED);
+ if (old == ZS_PAGE_UNLOCKED)
+ return;
+
+ cpu_relax();
+ }
+}
+
+static void zspage_write_unlock(struct zspage *zspage)
+{
+ atomic_set(&zspage->lock, ZS_PAGE_UNLOCKED);
+}
#ifdef CONFIG_COMPACTION
static void kick_deferred_free(struct zs_pool *pool);
@@ -992,7 +1037,7 @@ static struct zspage *alloc_zspage(struct zs_pool *pool,
return NULL;
zspage->magic = ZSPAGE_MAGIC;
- migrate_lock_init(zspage);
+ zspage_lock_init(zspage);
for (i = 0; i < class->pages_per_zspage; i++) {
struct zpdesc *zpdesc;
@@ -1217,7 +1262,7 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
* zs_unmap_object API so delegate the locking from class to zspage
* which is smaller granularity.
*/
- migrate_read_lock(zspage);
+ zspage_read_lock(zspage);
read_unlock(&pool->migrate_lock);
class = zspage_class(pool, zspage);
@@ -1277,7 +1322,7 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
}
local_unlock(&zs_map_area.lock);
- migrate_read_unlock(zspage);
+ zspage_read_unlock(zspage);
}
EXPORT_SYMBOL_GPL(zs_unmap_object);
@@ -1671,18 +1716,18 @@ static void lock_zspage(struct zspage *zspage)
/*
* Pages we haven't locked yet can be migrated off the list while we're
* trying to lock them, so we need to be careful and only attempt to
- * lock each page under migrate_read_lock(). Otherwise, the page we lock
+ * lock each page under zspage_read_lock(). Otherwise, the page we lock
* may no longer belong to the zspage. This means that we may wait for
* the wrong page to unlock, so we must take a reference to the page
- * prior to waiting for it to unlock outside migrate_read_lock().
+ * prior to waiting for it to unlock outside zspage_read_lock().
*/
while (1) {
- migrate_read_lock(zspage);
+ zspage_read_lock(zspage);
zpdesc = get_first_zpdesc(zspage);
if (zpdesc_trylock(zpdesc))
break;
zpdesc_get(zpdesc);
- migrate_read_unlock(zspage);
+ zspage_read_unlock(zspage);
zpdesc_wait_locked(zpdesc);
zpdesc_put(zpdesc);
}
@@ -1693,41 +1738,16 @@ static void lock_zspage(struct zspage *zspage)
curr_zpdesc = zpdesc;
} else {
zpdesc_get(zpdesc);
- migrate_read_unlock(zspage);
+ zspage_read_unlock(zspage);
zpdesc_wait_locked(zpdesc);
zpdesc_put(zpdesc);
- migrate_read_lock(zspage);
+ zspage_read_lock(zspage);
}
}
- migrate_read_unlock(zspage);
+ zspage_read_unlock(zspage);
}
#endif /* CONFIG_COMPACTION */
-static void migrate_lock_init(struct zspage *zspage)
-{
- rwlock_init(&zspage->lock);
-}
-
-static void migrate_read_lock(struct zspage *zspage) __acquires(&zspage->lock)
-{
- read_lock(&zspage->lock);
-}
-
-static void migrate_read_unlock(struct zspage *zspage) __releases(&zspage->lock)
-{
- read_unlock(&zspage->lock);
-}
-
-static void migrate_write_lock(struct zspage *zspage)
-{
- write_lock(&zspage->lock);
-}
-
-static void migrate_write_unlock(struct zspage *zspage)
-{
- write_unlock(&zspage->lock);
-}
-
#ifdef CONFIG_COMPACTION
static const struct movable_operations zsmalloc_mops;
@@ -1803,8 +1823,8 @@ static int zs_page_migrate(struct page *newpage, struct page *page,
* the class lock protects zpage alloc/free in the zspage.
*/
spin_lock(&class->lock);
- /* the migrate_write_lock protects zpage access via zs_map_object */
- migrate_write_lock(zspage);
+ /* the zspage_write_lock protects zpage access via zs_map_object */
+ zspage_write_lock(zspage);
offset = get_first_obj_offset(zpdesc);
s_addr = kmap_local_zpdesc(zpdesc);
@@ -1835,7 +1855,7 @@ static int zs_page_migrate(struct page *newpage, struct page *page,
*/
write_unlock(&pool->migrate_lock);
spin_unlock(&class->lock);
- migrate_write_unlock(zspage);
+ zspage_write_unlock(zspage);
zpdesc_get(newzpdesc);
if (zpdesc_zone(newzpdesc) != zpdesc_zone(zpdesc)) {
@@ -1971,9 +1991,9 @@ static unsigned long __zs_compact(struct zs_pool *pool,
if (!src_zspage)
break;
- migrate_write_lock(src_zspage);
+ zspage_write_lock(src_zspage);
migrate_zspage(pool, src_zspage, dst_zspage);
- migrate_write_unlock(src_zspage);
+ zspage_write_unlock(src_zspage);
fg = putback_zspage(class, src_zspage);
if (fg == ZS_INUSE_RATIO_0) {
--
2.48.1.262.g85cc9f2d1e-goog
On 27. 01. 25 08:59, Sergey Senozhatsky wrote:
> Switch over from rwlock_t to a atomic_t variable that takes
> negative value when the page is under migration, or positive
> values when the page is used by zsmalloc users (object map,
> etc.) Using a rwsem per-zspage is a little too memory heavy,
> a simple atomic_t should suffice, after all we only need to
> mark zspage as either used-for-write or used-for-read. This
> is needed to make zsmalloc preemtible in the future.
>
> Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org>
> ---
> mm/zsmalloc.c | 112 +++++++++++++++++++++++++++++---------------------
> 1 file changed, 66 insertions(+), 46 deletions(-)
>
> diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
> index 817626a351f8..28a75bfbeaa6 100644
> --- a/mm/zsmalloc.c
> +++ b/mm/zsmalloc.c
> @@ -257,6 +257,9 @@ static inline void free_zpdesc(struct zpdesc *zpdesc)
> __free_page(page);
> }
>
> +#define ZS_PAGE_UNLOCKED 0
> +#define ZS_PAGE_WRLOCKED -1
> +
> struct zspage {
> struct {
> unsigned int huge:HUGE_BITS;
> @@ -269,7 +272,7 @@ struct zspage {
> struct zpdesc *first_zpdesc;
> struct list_head list; /* fullness list */
> struct zs_pool *pool;
> - rwlock_t lock;
> + atomic_t lock;
> };
>
> struct mapping_area {
> @@ -290,11 +293,53 @@ static bool ZsHugePage(struct zspage *zspage)
> return zspage->huge;
> }
>
> -static void migrate_lock_init(struct zspage *zspage);
> -static void migrate_read_lock(struct zspage *zspage);
> -static void migrate_read_unlock(struct zspage *zspage);
> -static void migrate_write_lock(struct zspage *zspage);
> -static void migrate_write_unlock(struct zspage *zspage);
> +static void zspage_lock_init(struct zspage *zspage)
> +{
> + atomic_set(&zspage->lock, ZS_PAGE_UNLOCKED);
> +}
> +
> +static void zspage_read_lock(struct zspage *zspage)
> +{
> + atomic_t *lock = &zspage->lock;
> + int old;
> +
> + while (1) {
> + old = atomic_read(lock);
> + if (old == ZS_PAGE_WRLOCKED) {
> + cpu_relax();
> + continue;
> + }
> +
> + if (atomic_cmpxchg(lock, old, old + 1) == old)
> + return;
You can use atomic_try_cmpxchg() here:
if (atomic_try_cmpxchg(lock, &old, old + 1))
return;
> +
> + cpu_relax();
> + }
> +}
> +
> +static void zspage_read_unlock(struct zspage *zspage)
> +{
> + atomic_dec(&zspage->lock);
> +}
> +
> +static void zspage_write_lock(struct zspage *zspage)
> +{
> + atomic_t *lock = &zspage->lock;
> + int old;
> +
> + while (1) {
> + old = atomic_cmpxchg(lock, ZS_PAGE_UNLOCKED, ZS_PAGE_WRLOCKED);
> + if (old == ZS_PAGE_UNLOCKED)
> + return;
Also, the above code can be rewritten as:
while (1) {
old = ZS_PAGE_UNLOCKED;
if (atomic_try_cmpxchg (lock, &old, ZS_PAGE_WRLOCKED))
return;
> +
> + cpu_relax();
> + }
> +}
The above change will result in a slightly better generated asm.
Uros.
On (25/01/27 21:23), Uros Bizjak wrote:
> > +static void zspage_read_lock(struct zspage *zspage)
> > +{
> > + atomic_t *lock = &zspage->lock;
> > + int old;
> > +
> > + while (1) {
> > + old = atomic_read(lock);
> > + if (old == ZS_PAGE_WRLOCKED) {
> > + cpu_relax();
> > + continue;
> > + }
> > +
> > + if (atomic_cmpxchg(lock, old, old + 1) == old)
> > + return;
>
> You can use atomic_try_cmpxchg() here:
>
> if (atomic_try_cmpxchg(lock, &old, old + 1))
> return;
>
> > +
> > + cpu_relax();
> > + }
> > +}
> > +
> > +static void zspage_read_unlock(struct zspage *zspage)
> > +{
> > + atomic_dec(&zspage->lock);
> > +}
> > +
> > +static void zspage_write_lock(struct zspage *zspage)
> > +{
> > + atomic_t *lock = &zspage->lock;
> > + int old;
> > +
> > + while (1) {
> > + old = atomic_cmpxchg(lock, ZS_PAGE_UNLOCKED, ZS_PAGE_WRLOCKED);
> > + if (old == ZS_PAGE_UNLOCKED)
> > + return;
>
> Also, the above code can be rewritten as:
>
> while (1) {
> old = ZS_PAGE_UNLOCKED;
> if (atomic_try_cmpxchg (lock, &old, ZS_PAGE_WRLOCKED))
> return;
> > +
> > + cpu_relax();
> > + }
> > +}
>
> The above change will result in a slightly better generated asm.
Thanks, I'll take a look for the next version.
© 2016 - 2026 Red Hat, Inc.