Switch over from rwlock_t to a atomic_t variable that takes
negative value when the page is under migration, or positive
values when the page is used by zsmalloc users (object map,
etc.) Using a rwsem per-zspage is a little too memory heavy,
a simple atomic_t should suffice, after all we only need to
mark zspage as either used-for-write or used-for-read. This
is needed to make zsmalloc preemtible in the future.
Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org>
---
mm/zsmalloc.c | 112 +++++++++++++++++++++++++++++---------------------
1 file changed, 66 insertions(+), 46 deletions(-)
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 817626a351f8..28a75bfbeaa6 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -257,6 +257,9 @@ static inline void free_zpdesc(struct zpdesc *zpdesc)
__free_page(page);
}
+#define ZS_PAGE_UNLOCKED 0
+#define ZS_PAGE_WRLOCKED -1
+
struct zspage {
struct {
unsigned int huge:HUGE_BITS;
@@ -269,7 +272,7 @@ struct zspage {
struct zpdesc *first_zpdesc;
struct list_head list; /* fullness list */
struct zs_pool *pool;
- rwlock_t lock;
+ atomic_t lock;
};
struct mapping_area {
@@ -290,11 +293,53 @@ static bool ZsHugePage(struct zspage *zspage)
return zspage->huge;
}
-static void migrate_lock_init(struct zspage *zspage);
-static void migrate_read_lock(struct zspage *zspage);
-static void migrate_read_unlock(struct zspage *zspage);
-static void migrate_write_lock(struct zspage *zspage);
-static void migrate_write_unlock(struct zspage *zspage);
+static void zspage_lock_init(struct zspage *zspage)
+{
+ atomic_set(&zspage->lock, ZS_PAGE_UNLOCKED);
+}
+
+static void zspage_read_lock(struct zspage *zspage)
+{
+ atomic_t *lock = &zspage->lock;
+ int old;
+
+ while (1) {
+ old = atomic_read(lock);
+ if (old == ZS_PAGE_WRLOCKED) {
+ cpu_relax();
+ continue;
+ }
+
+ if (atomic_cmpxchg(lock, old, old + 1) == old)
+ return;
+
+ cpu_relax();
+ }
+}
+
+static void zspage_read_unlock(struct zspage *zspage)
+{
+ atomic_dec(&zspage->lock);
+}
+
+static void zspage_write_lock(struct zspage *zspage)
+{
+ atomic_t *lock = &zspage->lock;
+ int old;
+
+ while (1) {
+ old = atomic_cmpxchg(lock, ZS_PAGE_UNLOCKED, ZS_PAGE_WRLOCKED);
+ if (old == ZS_PAGE_UNLOCKED)
+ return;
+
+ cpu_relax();
+ }
+}
+
+static void zspage_write_unlock(struct zspage *zspage)
+{
+ atomic_set(&zspage->lock, ZS_PAGE_UNLOCKED);
+}
#ifdef CONFIG_COMPACTION
static void kick_deferred_free(struct zs_pool *pool);
@@ -992,7 +1037,7 @@ static struct zspage *alloc_zspage(struct zs_pool *pool,
return NULL;
zspage->magic = ZSPAGE_MAGIC;
- migrate_lock_init(zspage);
+ zspage_lock_init(zspage);
for (i = 0; i < class->pages_per_zspage; i++) {
struct zpdesc *zpdesc;
@@ -1217,7 +1262,7 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
* zs_unmap_object API so delegate the locking from class to zspage
* which is smaller granularity.
*/
- migrate_read_lock(zspage);
+ zspage_read_lock(zspage);
read_unlock(&pool->migrate_lock);
class = zspage_class(pool, zspage);
@@ -1277,7 +1322,7 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
}
local_unlock(&zs_map_area.lock);
- migrate_read_unlock(zspage);
+ zspage_read_unlock(zspage);
}
EXPORT_SYMBOL_GPL(zs_unmap_object);
@@ -1671,18 +1716,18 @@ static void lock_zspage(struct zspage *zspage)
/*
* Pages we haven't locked yet can be migrated off the list while we're
* trying to lock them, so we need to be careful and only attempt to
- * lock each page under migrate_read_lock(). Otherwise, the page we lock
+ * lock each page under zspage_read_lock(). Otherwise, the page we lock
* may no longer belong to the zspage. This means that we may wait for
* the wrong page to unlock, so we must take a reference to the page
- * prior to waiting for it to unlock outside migrate_read_lock().
+ * prior to waiting for it to unlock outside zspage_read_lock().
*/
while (1) {
- migrate_read_lock(zspage);
+ zspage_read_lock(zspage);
zpdesc = get_first_zpdesc(zspage);
if (zpdesc_trylock(zpdesc))
break;
zpdesc_get(zpdesc);
- migrate_read_unlock(zspage);
+ zspage_read_unlock(zspage);
zpdesc_wait_locked(zpdesc);
zpdesc_put(zpdesc);
}
@@ -1693,41 +1738,16 @@ static void lock_zspage(struct zspage *zspage)
curr_zpdesc = zpdesc;
} else {
zpdesc_get(zpdesc);
- migrate_read_unlock(zspage);
+ zspage_read_unlock(zspage);
zpdesc_wait_locked(zpdesc);
zpdesc_put(zpdesc);
- migrate_read_lock(zspage);
+ zspage_read_lock(zspage);
}
}
- migrate_read_unlock(zspage);
+ zspage_read_unlock(zspage);
}
#endif /* CONFIG_COMPACTION */
-static void migrate_lock_init(struct zspage *zspage)
-{
- rwlock_init(&zspage->lock);
-}
-
-static void migrate_read_lock(struct zspage *zspage) __acquires(&zspage->lock)
-{
- read_lock(&zspage->lock);
-}
-
-static void migrate_read_unlock(struct zspage *zspage) __releases(&zspage->lock)
-{
- read_unlock(&zspage->lock);
-}
-
-static void migrate_write_lock(struct zspage *zspage)
-{
- write_lock(&zspage->lock);
-}
-
-static void migrate_write_unlock(struct zspage *zspage)
-{
- write_unlock(&zspage->lock);
-}
-
#ifdef CONFIG_COMPACTION
static const struct movable_operations zsmalloc_mops;
@@ -1803,8 +1823,8 @@ static int zs_page_migrate(struct page *newpage, struct page *page,
* the class lock protects zpage alloc/free in the zspage.
*/
spin_lock(&class->lock);
- /* the migrate_write_lock protects zpage access via zs_map_object */
- migrate_write_lock(zspage);
+ /* the zspage_write_lock protects zpage access via zs_map_object */
+ zspage_write_lock(zspage);
offset = get_first_obj_offset(zpdesc);
s_addr = kmap_local_zpdesc(zpdesc);
@@ -1835,7 +1855,7 @@ static int zs_page_migrate(struct page *newpage, struct page *page,
*/
write_unlock(&pool->migrate_lock);
spin_unlock(&class->lock);
- migrate_write_unlock(zspage);
+ zspage_write_unlock(zspage);
zpdesc_get(newzpdesc);
if (zpdesc_zone(newzpdesc) != zpdesc_zone(zpdesc)) {
@@ -1971,9 +1991,9 @@ static unsigned long __zs_compact(struct zs_pool *pool,
if (!src_zspage)
break;
- migrate_write_lock(src_zspage);
+ zspage_write_lock(src_zspage);
migrate_zspage(pool, src_zspage, dst_zspage);
- migrate_write_unlock(src_zspage);
+ zspage_write_unlock(src_zspage);
fg = putback_zspage(class, src_zspage);
if (fg == ZS_INUSE_RATIO_0) {
--
2.48.1.262.g85cc9f2d1e-goog
On 27. 01. 25 08:59, Sergey Senozhatsky wrote: > Switch over from rwlock_t to a atomic_t variable that takes > negative value when the page is under migration, or positive > values when the page is used by zsmalloc users (object map, > etc.) Using a rwsem per-zspage is a little too memory heavy, > a simple atomic_t should suffice, after all we only need to > mark zspage as either used-for-write or used-for-read. This > is needed to make zsmalloc preemtible in the future. > > Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org> > --- > mm/zsmalloc.c | 112 +++++++++++++++++++++++++++++--------------------- > 1 file changed, 66 insertions(+), 46 deletions(-) > > diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c > index 817626a351f8..28a75bfbeaa6 100644 > --- a/mm/zsmalloc.c > +++ b/mm/zsmalloc.c > @@ -257,6 +257,9 @@ static inline void free_zpdesc(struct zpdesc *zpdesc) > __free_page(page); > } > > +#define ZS_PAGE_UNLOCKED 0 > +#define ZS_PAGE_WRLOCKED -1 > + > struct zspage { > struct { > unsigned int huge:HUGE_BITS; > @@ -269,7 +272,7 @@ struct zspage { > struct zpdesc *first_zpdesc; > struct list_head list; /* fullness list */ > struct zs_pool *pool; > - rwlock_t lock; > + atomic_t lock; > }; > > struct mapping_area { > @@ -290,11 +293,53 @@ static bool ZsHugePage(struct zspage *zspage) > return zspage->huge; > } > > -static void migrate_lock_init(struct zspage *zspage); > -static void migrate_read_lock(struct zspage *zspage); > -static void migrate_read_unlock(struct zspage *zspage); > -static void migrate_write_lock(struct zspage *zspage); > -static void migrate_write_unlock(struct zspage *zspage); > +static void zspage_lock_init(struct zspage *zspage) > +{ > + atomic_set(&zspage->lock, ZS_PAGE_UNLOCKED); > +} > + > +static void zspage_read_lock(struct zspage *zspage) > +{ > + atomic_t *lock = &zspage->lock; > + int old; > + > + while (1) { > + old = atomic_read(lock); > + if (old == ZS_PAGE_WRLOCKED) { > + cpu_relax(); > + continue; > + } > + > + if (atomic_cmpxchg(lock, old, old + 1) == old) > + return; You can use atomic_try_cmpxchg() here: if (atomic_try_cmpxchg(lock, &old, old + 1)) return; > + > + cpu_relax(); > + } > +} > + > +static void zspage_read_unlock(struct zspage *zspage) > +{ > + atomic_dec(&zspage->lock); > +} > + > +static void zspage_write_lock(struct zspage *zspage) > +{ > + atomic_t *lock = &zspage->lock; > + int old; > + > + while (1) { > + old = atomic_cmpxchg(lock, ZS_PAGE_UNLOCKED, ZS_PAGE_WRLOCKED); > + if (old == ZS_PAGE_UNLOCKED) > + return; Also, the above code can be rewritten as: while (1) { old = ZS_PAGE_UNLOCKED; if (atomic_try_cmpxchg (lock, &old, ZS_PAGE_WRLOCKED)) return; > + > + cpu_relax(); > + } > +} The above change will result in a slightly better generated asm. Uros.
On (25/01/27 21:23), Uros Bizjak wrote: > > +static void zspage_read_lock(struct zspage *zspage) > > +{ > > + atomic_t *lock = &zspage->lock; > > + int old; > > + > > + while (1) { > > + old = atomic_read(lock); > > + if (old == ZS_PAGE_WRLOCKED) { > > + cpu_relax(); > > + continue; > > + } > > + > > + if (atomic_cmpxchg(lock, old, old + 1) == old) > > + return; > > You can use atomic_try_cmpxchg() here: > > if (atomic_try_cmpxchg(lock, &old, old + 1)) > return; > > > + > > + cpu_relax(); > > + } > > +} > > + > > +static void zspage_read_unlock(struct zspage *zspage) > > +{ > > + atomic_dec(&zspage->lock); > > +} > > + > > +static void zspage_write_lock(struct zspage *zspage) > > +{ > > + atomic_t *lock = &zspage->lock; > > + int old; > > + > > + while (1) { > > + old = atomic_cmpxchg(lock, ZS_PAGE_UNLOCKED, ZS_PAGE_WRLOCKED); > > + if (old == ZS_PAGE_UNLOCKED) > > + return; > > Also, the above code can be rewritten as: > > while (1) { > old = ZS_PAGE_UNLOCKED; > if (atomic_try_cmpxchg (lock, &old, ZS_PAGE_WRLOCKED)) > return; > > + > > + cpu_relax(); > > + } > > +} > > The above change will result in a slightly better generated asm. Thanks, I'll take a look for the next version.
© 2016 - 2025 Red Hat, Inc.