From nobody Sat Feb  7 20:13:05 2026
Received: from us-smtp-delivery-124.mimecast.com
 (us-smtp-delivery-124.mimecast.com [170.10.129.124])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 701BED30B
	for <linux-kernel@vger.kernel.org>; Sat, 22 Jun 2024 03:58:49 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=170.10.129.124
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1719028731; cv=none;
 b=nr8CIru3PZrSqcPEwO0NfOGTViK9J8I5XlaZjwTQHKkkSkyMfDUIXPNSubrlgpXSoJ4B3CiaLHUBXHO+BAyADNgYOvubPWLgnGTrdww1Nx9ekITTEqNyCXKv9U0v6Xke+AKr5Ps40uDgJIFCjKA5erelzw4DSMSbHs+HYANvADg=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1719028731; c=relaxed/simple;
	bh=qbOY516tJsuzI4bbQSlbHnr2zLlW6uAY5SR1yexXpx0=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version;
 b=oPEOw6Muysumf1y/D0583axCutS1qJWz14CSd1RdJ9BokkwQ3Z5rkTuKijIhbgGMpY+yzcDgeO0Sg45J378XeRnDtjVeZlIFV22ACcxyHZEuqcEUGeStZx70yK31fWy1V/nEJzhiDV2IIWhL3qozwZ1DQ/yqNAYuwOse8GgeOdY=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=redhat.com;
 spf=pass smtp.mailfrom=redhat.com;
 dkim=pass (1024-bit key) header.d=redhat.com header.i=@redhat.com
 header.b=AlaAp8kK; arc=none smtp.client-ip=170.10.129.124
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=redhat.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=pass smtp.mailfrom=redhat.com
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (1024-bit key) header.d=redhat.com header.i=@redhat.com
 header.b="AlaAp8kK"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com;
	s=mimecast20190719; t=1719028728;
	h=from:from:reply-to:subject:subject:date:date:message-id:message-id:
	 to:to:cc:cc:mime-version:mime-version:
	 content-transfer-encoding:content-transfer-encoding:
	 in-reply-to:in-reply-to:references:references;
	bh=z0Dg1NIU4Aju17NE5wHHed1iDaZiJauMtlBEOZtTK7I=;
	b=AlaAp8kKoRXQZjcb0yp3eg1jW+vAp8LBARNfAUl7DwAXoCYU1MT/Fjn0f/TC0AzjuK3+Fl
	DNEKvIhSBuOSV2oMzSoJVab4HE3L7AJCF8hCeRwl2bBgu363dvH3CzxQX6LEJ6UB13JkOU
	hIw3XJoog80O5Nb9ALz6kG1Ss+AgKnE=
Received: from mail-pj1-f70.google.com (mail-pj1-f70.google.com
 [209.85.216.70]) by relay.mimecast.com with ESMTP with STARTTLS
 (version=TLSv1.3, cipher=TLS_AES_256_GCM_SHA384) id
 us-mta-695-jISUu8HkOM-yj133N73ZTw-1; Fri, 21 Jun 2024 23:58:46 -0400
X-MC-Unique: jISUu8HkOM-yj133N73ZTw-1
Received: by mail-pj1-f70.google.com with SMTP id
 98e67ed59e1d1-2c7a6b95d56so2609695a91.1
        for <linux-kernel@vger.kernel.org>;
 Fri, 21 Jun 2024 20:58:46 -0700 (PDT)
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
        d=1e100.net; s=20230601; t=1719028725; x=1719633525;
        h=content-transfer-encoding:mime-version:references:in-reply-to
         :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc
         :subject:date:message-id:reply-to;
        bh=z0Dg1NIU4Aju17NE5wHHed1iDaZiJauMtlBEOZtTK7I=;
        b=vyy1zQffiikIlQUDnrve3/PPZENd6lR1rvGMEXOwtGq5qLBI4mpTYxhR7rcvjD7/0x
         ttWlFHIa+hWUqBFtj9x/JDRAyuWi4RigIIL4AqfVNrFZiyjy0L6nlx5dA6dNt1sGGzLz
         s1aZkVkRouff0v7Bq7tc7MNarYA75hwNucuwhpbumoJotX00LaBM7tFDDqFFvncu3nT5
         Yt4bJnsJw7sVVdz51ozrYoljETT9Gd3bGOFL5TSYcx+ek/sQoS1yaLVZF0I5F/beYWeL
         O05tiBcSoXsftIeqmYTwDPKtObE3VMczHzmqghiR/OZjmBcZIeoKRm8anSga8gpJ43Ep
         P7Mw==
X-Gm-Message-State: AOJu0YyzNovgg+DBvAcnmyMQZR6n7ka7Lrbfcun5q6OT66TwP7uIuxaS
	na5hmF/3zHuhLtdzkGwoSJhKR1rPxEu8SwVH5HcdBIaPDTMojdUF6saaNHIJG0HDziqYWxLOq6V
	cIIWrzwW1Z9819PGXKC4D4qn/LoQMijjDFJe9qzYQn7mDTwpf0x84vUSUlm3i4Q==
X-Received: by 2002:a17:903:2444:b0:1f9:d0da:5b42 with SMTP id
 d9443c01a7336-1f9d0da608amr58943105ad.46.1719028725074;
        Fri, 21 Jun 2024 20:58:45 -0700 (PDT)
X-Google-Smtp-Source: 
 AGHT+IE+Z05JsNeYced1ZM2XKeZjLOv76WXxQ8159J4vFgdL3RzUnlY7H/PhH/+1Ug253Ae9W8bh0A==
X-Received: by 2002:a17:903:2444:b0:1f9:d0da:5b42 with SMTP id
 d9443c01a7336-1f9d0da608amr58942855ad.46.1719028724605;
        Fri, 21 Jun 2024 20:58:44 -0700 (PDT)
Received: from LeoBras.redhat.com ([2804:1b3:a801:c138:e21d:3579:5747:ad1])
        by smtp.gmail.com with ESMTPSA id
 d9443c01a7336-1f9eb32b9edsm21832365ad.118.2024.06.21.20.58.39
        (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256);
        Fri, 21 Jun 2024 20:58:44 -0700 (PDT)
From: Leonardo Bras <leobras@redhat.com>
To: Johannes Weiner <hannes@cmpxchg.org>,
	Michal Hocko <mhocko@kernel.org>,
	Roman Gushchin <roman.gushchin@linux.dev>,
	Shakeel Butt <shakeel.butt@linux.dev>,
	Muchun Song <muchun.song@linux.dev>,
	Andrew Morton <akpm@linux-foundation.org>,
	Christoph Lameter <cl@linux.com>,
	Pekka Enberg <penberg@kernel.org>,
	David Rientjes <rientjes@google.com>,
	Joonsoo Kim <iamjoonsoo.kim@lge.com>,
	Vlastimil Babka <vbabka@suse.cz>,
	Hyeonggon Yoo <42.hyeyoo@gmail.com>,
	Leonardo Bras <leobras@redhat.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Marcelo Tosatti <mtosatti@redhat.com>
Cc: linux-kernel@vger.kernel.org,
	cgroups@vger.kernel.org,
	linux-mm@kvack.org
Subject: [RFC PATCH v1 1/4] Introducing qpw_lock() and per-cpu queue & flush
 work
Date: Sat, 22 Jun 2024 00:58:09 -0300
Message-ID: <20240622035815.569665-2-leobras@redhat.com>
X-Mailer: git-send-email 2.45.2
In-Reply-To: <20240622035815.569665-1-leobras@redhat.com>
References: <20240622035815.569665-1-leobras@redhat.com>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

Some places in the kernel implement a parallel programming strategy
consisting on local_locks() for most of the work, and some rare remote
operations are scheduled on target cpu. This keeps cache bouncing low since
cacheline tends to be mostly local, and avoids the cost of locks in non-RT
kernels, even though the very few remote operations will be expensive due
to scheduling overhead.

On the other hand, for RT workloads this can represent a problem: getting
an important workload scheduled out to deal with some unrelated task is
sure to introduce unexpected deadline misses.

It's interesting, though, that local_lock()s in RT kernels become
spinlock(). We can make use of those to avoid scheduling work on a remote
cpu by directly updating another cpu's per_cpu structure, while holding
it's spinlock().

In order to do that, it's necessary to introduce a new set of functions to
make it possible to get another cpu's per-cpu "local" lock (qpw_{un,}lock*)
and also the corresponding queue_percpu_work_on() and flush_percpu_work()
helpers to run the remote work.

On non-RT kernels, no changes are expected, as every one of the introduced
helpers work the exactly same as the current implementation:
qpw_{un,}lock*()        ->  local_{un,}lock*() (ignores cpu parameter)
queue_percpu_work_on()  ->  queue_work_on()
flush_percpu_work()     ->  flush_work()

For RT kernels, though, qpw_{un,}lock*() will use the extra cpu parameter
to select the correct per-cpu structure to work on, and acquire the
spinlock for that cpu.

queue_percpu_work_on() will just call the requested function in the current
cpu, which will operate in another cpu's per-cpu object. Since the
local_locks() become spinlock()s in PREEMPT_RT, we are safe doing that.

flush_percpu_work() then becomes a no-op since no work is actually
scheduled on a remote cpu.

Some minimal code rework is needed in order to make this mechanism work:
The calls for local_{un,}lock*() on the functions that are currently
scheduled on remote cpus need to be replaced by qpw_{un,}lock_n*(), so in
RT kernels they can reference a different cpu. It's also necessary to use a
qpw_struct instead of a work_struct, but it just contains a work struct
and, in PREEMPT_RT, the target cpu.

This should have almost no impact on non-RT kernels: few this_cpu_ptr()
will become per_cpu_ptr(,smp_processor_id()).

On RT kernels, this should improve performance and reduce latency by
removing scheduling noise.

Signed-off-by: Leonardo Bras <leobras@redhat.com>
---
 include/linux/qpw.h | 88 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 88 insertions(+)
 create mode 100644 include/linux/qpw.h

diff --git a/include/linux/qpw.h b/include/linux/qpw.h
new file mode 100644
index 000000000000..ea2686a01e5e
--- /dev/null
+++ b/include/linux/qpw.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_QPW_H
+#define _LINUX_QPW_H
+
+#include "linux/local_lock.h"
+#include "linux/workqueue.h"
+
+#ifndef CONFIG_PREEMPT_RT
+
+struct qpw_struct {
+	struct work_struct work;
+};
+
+#define qpw_lock(lock, cpu)					\
+	local_lock(lock)
+
+#define qpw_unlock(lock, cpu)					\
+	local_unlock(lock)
+
+#define qpw_lock_irqsave(lock, flags, cpu)			\
+	local_lock_irqsave(lock, flags)
+
+#define qpw_unlock_irqrestore(lock, flags, cpu)			\
+	local_unlock_irqrestore(lock, flags)
+
+#define queue_percpu_work_on(c, wq, qpw)			\
+	queue_work_on(c, wq, &(qpw)->work)
+
+#define flush_percpu_work(qpw)					\
+	flush_work(&(qpw)->work)
+
+#define qpw_get_cpu(qpw)					\
+	smp_processor_id()
+
+#define INIT_QPW(qpw, func, c)					\
+	INIT_WORK(&(qpw)->work, (func))
+
+#else /* !CONFIG_PREEMPT_RT */
+
+struct qpw_struct {
+	struct work_struct work;
+	int cpu;
+};
+
+#define qpw_lock(__lock, cpu)					\
+	do {							\
+		migrate_disable();				\
+		spin_lock(per_cpu_ptr((__lock), cpu));		\
+	} while (0)
+
+#define qpw_unlock(__lock, cpu)					\
+	do {							\
+		spin_unlock(per_cpu_ptr((__lock), cpu));	\
+		migrate_enable();				\
+	} while (0)
+
+#define qpw_lock_irqsave(lock, flags, cpu)			\
+	do {							\
+		typecheck(unsigned long, flags);		\
+		flags =3D 0;					\
+		qpw_lock(lock, cpu);				\
+	} while (0)
+
+#define qpw_unlock_irqrestore(lock, flags, cpu)			\
+	qpw_unlock(lock, cpu)
+
+#define queue_percpu_work_on(c, wq, qpw)			\
+	do {							\
+		struct qpw_struct *__qpw =3D (qpw);		\
+		WARN_ON((c) !=3D __qpw->cpu);			\
+		__qpw->work.func(&__qpw->work);			\
+	} while (0)
+
+#define flush_percpu_work(qpw)					\
+	do {} while (0)
+
+#define qpw_get_cpu(w)						\
+	container_of((w), struct qpw_struct, work)->cpu
+
+#define INIT_QPW(qpw, func, c)					\
+	do {							\
+		struct qpw_struct *__qpw =3D (qpw);		\
+		INIT_WORK(&__qpw->work, (func));		\
+		__qpw->cpu =3D (c);				\
+	} while (0)
+
+#endif /* CONFIG_PREEMPT_RT */
+#endif /* LINUX_QPW_H */
--=20
2.45.2
From nobody Sat Feb  7 20:13:05 2026
Received: from us-smtp-delivery-124.mimecast.com
 (us-smtp-delivery-124.mimecast.com [170.10.133.124])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 6525715E85
	for <linux-kernel@vger.kernel.org>; Sat, 22 Jun 2024 03:58:54 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=170.10.133.124
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1719028736; cv=none;
 b=o9orj7PU0UNkHQe44eKjvbDx1CqYcG11ZkLyh7Dv+zh6bN5dPNbMHTw1YUKKmJinT96Fz9alU1IT9aPaYPS5NOyLAy3peawWOBeG1M4bCqGKDE4+aC0lrP5u7WqijDcxsaxMZ/mFJe+tAw3szxog5KdM5kUBFavlkfSur2549y8=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1719028736; c=relaxed/simple;
	bh=AK4cQ6JDjcWZj/DqnoVDPSCWzPANRJ/7p5R4kSbF65w=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version;
 b=S1rZS5GyaBfv5HHLyfHDN8GzPMdd65nwZ/gPL7leozJABwhNPizszA5s6Tm090VfiLcc+vWHQKdoZDC9o8t3O/PdJDiU1C+Ra7rvNWf8Rr8o4AWijg9yhakfxKu1gU+NpTS6weqCMJafT5hj9L4vEDZ4c2krp1EThd6VRQxJIZU=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=redhat.com;
 spf=pass smtp.mailfrom=redhat.com;
 dkim=pass (1024-bit key) header.d=redhat.com header.i=@redhat.com
 header.b=OaJcBGfi; arc=none smtp.client-ip=170.10.133.124
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=redhat.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=pass smtp.mailfrom=redhat.com
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (1024-bit key) header.d=redhat.com header.i=@redhat.com
 header.b="OaJcBGfi"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com;
	s=mimecast20190719; t=1719028733;
	h=from:from:reply-to:subject:subject:date:date:message-id:message-id:
	 to:to:cc:cc:mime-version:mime-version:
	 content-transfer-encoding:content-transfer-encoding:
	 in-reply-to:in-reply-to:references:references;
	bh=YZ33pz4CG9kAvALadE1yZYVBLwtylkbWD53nilFvhZA=;
	b=OaJcBGfiLFa353ZRK0j6i2ErF5npKk8Esf9Mqi8MHn8LTW/pFwn4aL7kKY4BEmEpDFz/kl
	tfYRlHy7PlQdfrzjSx/fzYXQOdfZyEy7HuuFIA+xdZvEr+9Rqt1lSk/5J46662ggG4+6yw
	dJBCQEKeX2/5FnnsZxs6qm7SyOjJIzU=
Received: from mail-pl1-f197.google.com (mail-pl1-f197.google.com
 [209.85.214.197]) by relay.mimecast.com with ESMTP with STARTTLS
 (version=TLSv1.3, cipher=TLS_AES_256_GCM_SHA384) id
 us-mta-140-EWSWrVJkNhmcLvXKCtPagA-1; Fri, 21 Jun 2024 23:58:51 -0400
X-MC-Unique: EWSWrVJkNhmcLvXKCtPagA-1
Received: by mail-pl1-f197.google.com with SMTP id
 d9443c01a7336-1f71d5a85f9so31798505ad.0
        for <linux-kernel@vger.kernel.org>;
 Fri, 21 Jun 2024 20:58:51 -0700 (PDT)
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
        d=1e100.net; s=20230601; t=1719028731; x=1719633531;
        h=content-transfer-encoding:mime-version:references:in-reply-to
         :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc
         :subject:date:message-id:reply-to;
        bh=YZ33pz4CG9kAvALadE1yZYVBLwtylkbWD53nilFvhZA=;
        b=hnM5cEFhm1Mk0i9fHlq/5ncBZFi55dX4B2uqCtVQSAEANCXtlYuRGuGcJbw9CSgNB6
         kAAuN6rp0BEVIElyZ2Ola8cihSc09TUdAw0dqYe3l6hRFdNkN5N+YivZKz+IYVxlP1HT
         5jckUfwoiP+ZAEtLK6c9ThTRrm3RnO1XVB5hMo6Ruyhe9fILpC/bAeJb1zEY3GjZ84J/
         luuhoBK0D2e6nCrutBwEIqITCTDjx/q2gq2/7wrJCaBcG+hMKY/IYsAwA8DhjB992NGz
         cZ+83su15DLUzuu1PIl6QPtwfQzjr3TRXYbcxgBBaIApCxT6Std7T5K67/o4lYNVLwES
         6WGg==
X-Gm-Message-State: AOJu0YxOI8n5n5OMiJibBB+Reb62xEh/Rrw20KZwb+1V6VFGgoaKMUuG
	1j75ohnk80pHoAolihmqtkU3cH+efHPr8TJ/+Me33oNoQ7Vs43n4tFiec995+Ys32xwALC/xeeQ
	gvdtsgvKxb+T++FEIDox1FVz3qWFhVVo+vFwyOq1lQt1SUdSNZ/SrNvpoDPU5pQ==
X-Received: by 2002:a17:903:1cf:b0:1f9:9d40:c9bf with SMTP id
 d9443c01a7336-1f9aa4121cdmr114843435ad.18.1719028730789;
        Fri, 21 Jun 2024 20:58:50 -0700 (PDT)
X-Google-Smtp-Source: 
 AGHT+IHt4UWWF8YWLxNNykxeuK6L5NtoG3cPihSQQtBcVaMFhxxpNx9bPrFdzerXafMwk+8wW98i8A==
X-Received: by 2002:a17:903:1cf:b0:1f9:9d40:c9bf with SMTP id
 d9443c01a7336-1f9aa4121cdmr114843195ad.18.1719028730370;
        Fri, 21 Jun 2024 20:58:50 -0700 (PDT)
Received: from LeoBras.redhat.com ([2804:1b3:a801:c138:e21d:3579:5747:ad1])
        by smtp.gmail.com with ESMTPSA id
 d9443c01a7336-1f9eb32b9edsm21832365ad.118.2024.06.21.20.58.45
        (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256);
        Fri, 21 Jun 2024 20:58:49 -0700 (PDT)
From: Leonardo Bras <leobras@redhat.com>
To: Johannes Weiner <hannes@cmpxchg.org>,
	Michal Hocko <mhocko@kernel.org>,
	Roman Gushchin <roman.gushchin@linux.dev>,
	Shakeel Butt <shakeel.butt@linux.dev>,
	Muchun Song <muchun.song@linux.dev>,
	Andrew Morton <akpm@linux-foundation.org>,
	Christoph Lameter <cl@linux.com>,
	Pekka Enberg <penberg@kernel.org>,
	David Rientjes <rientjes@google.com>,
	Joonsoo Kim <iamjoonsoo.kim@lge.com>,
	Vlastimil Babka <vbabka@suse.cz>,
	Hyeonggon Yoo <42.hyeyoo@gmail.com>,
	Leonardo Bras <leobras@redhat.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Marcelo Tosatti <mtosatti@redhat.com>
Cc: linux-kernel@vger.kernel.org,
	cgroups@vger.kernel.org,
	linux-mm@kvack.org
Subject: [RFC PATCH v1 2/4] swap: apply new queue_percpu_work_on() interface
Date: Sat, 22 Jun 2024 00:58:10 -0300
Message-ID: <20240622035815.569665-3-leobras@redhat.com>
X-Mailer: git-send-email 2.45.2
In-Reply-To: <20240622035815.569665-1-leobras@redhat.com>
References: <20240622035815.569665-1-leobras@redhat.com>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

Make use of the new qpw_{un,}lock*() and queue_percpu_work_on()
interface to improve performance & latency on PREEMTP_RT kernels.

For functions that may be scheduled in a different cpu, replace
local_{un,}lock*() by qpw_{un,}lock*(), and replace schedule_work_on() by
queue_percpu_work_on(). The same happens for flush_work() and
flush_percpu_work().

The change requires allocation of qpw_structs instead of a work_structs,
and changing parameters of a few functions to include the cpu parameter.

This should bring no relevant performance impact on non-RT kernels:
For functions that may be scheduled in a different cpu, the local_*lock's
this_cpu_ptr() becomes a per_cpu_ptr(smp_processor_id()).

Signed-off-by: Leonardo Bras <leobras@redhat.com>
---
 mm/swap.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/mm/swap.c b/mm/swap.c
index 67786cb77130..c1a61b7cd71a 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -28,21 +28,21 @@
 #include <linux/memremap.h>
 #include <linux/percpu.h>
 #include <linux/cpu.h>
 #include <linux/notifier.h>
 #include <linux/backing-dev.h>
 #include <linux/memcontrol.h>
 #include <linux/gfp.h>
 #include <linux/uio.h>
 #include <linux/hugetlb.h>
 #include <linux/page_idle.h>
-#include <linux/local_lock.h>
+#include <linux/qpw.h>
 #include <linux/buffer_head.h>
=20
 #include "internal.h"
=20
 #define CREATE_TRACE_POINTS
 #include <trace/events/pagemap.h>
=20
 /* How many pages do we try to swap or page in/out together? As a power of=
 2 */
 int page_cluster;
 const int page_cluster_max =3D 31;
@@ -758,45 +758,45 @@ void lru_add_drain(void)
 	local_unlock(&cpu_fbatches.lock);
 	mlock_drain_local();
 }
=20
 /*
  * It's called from per-cpu workqueue context in SMP case so
  * lru_add_drain_cpu and invalidate_bh_lrus_cpu should run on
  * the same cpu. It shouldn't be a problem in !SMP case since
  * the core is only one and the locks will disable preemption.
  */
-static void lru_add_and_bh_lrus_drain(void)
+static void lru_add_and_bh_lrus_drain(int cpu)
 {
-	local_lock(&cpu_fbatches.lock);
-	lru_add_drain_cpu(smp_processor_id());
-	local_unlock(&cpu_fbatches.lock);
+	qpw_lock(&cpu_fbatches.lock, cpu);
+	lru_add_drain_cpu(cpu);
+	qpw_unlock(&cpu_fbatches.lock, cpu);
 	invalidate_bh_lrus_cpu();
 	mlock_drain_local();
 }
=20
 void lru_add_drain_cpu_zone(struct zone *zone)
 {
 	local_lock(&cpu_fbatches.lock);
 	lru_add_drain_cpu(smp_processor_id());
 	drain_local_pages(zone);
 	local_unlock(&cpu_fbatches.lock);
 	mlock_drain_local();
 }
=20
 #ifdef CONFIG_SMP
=20
-static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
+static DEFINE_PER_CPU(struct qpw_struct, lru_add_drain_qpw);
=20
-static void lru_add_drain_per_cpu(struct work_struct *dummy)
+static void lru_add_drain_per_cpu(struct work_struct *w)
 {
-	lru_add_and_bh_lrus_drain();
+	lru_add_and_bh_lrus_drain(qpw_get_cpu(w));
 }
=20
 static bool cpu_needs_drain(unsigned int cpu)
 {
 	struct cpu_fbatches *fbatches =3D &per_cpu(cpu_fbatches, cpu);
=20
 	/* Check these in order of likelihood that they're not zero */
 	return folio_batch_count(&fbatches->lru_add) ||
 		data_race(folio_batch_count(&per_cpu(lru_rotate.fbatch, cpu))) ||
 		folio_batch_count(&fbatches->lru_deactivate_file) ||
@@ -882,31 +882,31 @@ static inline void __lru_add_drain_all(bool force_all=
_cpus)
 	 *
 	 * If the paired barrier is done at any later step, e.g. after the
 	 * loop, CPU #x will just exit at (C) and miss flushing out all of its
 	 * added pages.
 	 */
 	WRITE_ONCE(lru_drain_gen, lru_drain_gen + 1);
 	smp_mb();
=20
 	cpumask_clear(&has_work);
 	for_each_online_cpu(cpu) {
-		struct work_struct *work =3D &per_cpu(lru_add_drain_work, cpu);
+		struct qpw_struct *qpw =3D &per_cpu(lru_add_drain_qpw, cpu);
=20
 		if (cpu_needs_drain(cpu)) {
-			INIT_WORK(work, lru_add_drain_per_cpu);
-			queue_work_on(cpu, mm_percpu_wq, work);
+			INIT_QPW(qpw, lru_add_drain_per_cpu, cpu);
+			queue_percpu_work_on(cpu, mm_percpu_wq, qpw);
 			__cpumask_set_cpu(cpu, &has_work);
 		}
 	}
=20
 	for_each_cpu(cpu, &has_work)
-		flush_work(&per_cpu(lru_add_drain_work, cpu));
+		flush_percpu_work(&per_cpu(lru_add_drain_qpw, cpu));
=20
 done:
 	mutex_unlock(&lock);
 }
=20
 void lru_add_drain_all(void)
 {
 	__lru_add_drain_all(false);
 }
 #else
@@ -939,21 +939,21 @@ void lru_cache_disable(void)
 	 *
 	 * Since v5.1 kernel, synchronize_rcu() is guaranteed to wait on
 	 * preempt_disable() regions of code. So any CPU which sees
 	 * lru_disable_count =3D 0 will have exited the critical
 	 * section when synchronize_rcu() returns.
 	 */
 	synchronize_rcu_expedited();
 #ifdef CONFIG_SMP
 	__lru_add_drain_all(true);
 #else
-	lru_add_and_bh_lrus_drain();
+	lru_add_and_bh_lrus_drain(smp_processor_id());
 #endif
 }
=20
 /**
  * folios_put_refs - Reduce the reference count on a batch of folios.
  * @folios: The folios.
  * @refs: The number of refs to subtract from each folio.
  *
  * Like folio_put(), but for a batch of folios.  This is more efficient
  * than writing the loop yourself as it will optimise the locks which need
--=20
2.45.2
From nobody Sat Feb  7 20:13:05 2026
Received: from us-smtp-delivery-124.mimecast.com
 (us-smtp-delivery-124.mimecast.com [170.10.133.124])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 0D4B2C144
	for <linux-kernel@vger.kernel.org>; Sat, 22 Jun 2024 03:59:04 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=170.10.133.124
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1719028746; cv=none;
 b=Rp62md9oMgXtU0USzN0G7Gzdejnm+rZ8STZmnrCfxgZ++sNnukEbfVyRrNZ4Sjtq/riThDNPDN/0c6EEgJ0YnS3IdKujInhqfEDb92upYwRsPdGySPS212FfxYsk+/I8oGiaar+o5S7r53YnEB/AkArsbYHNfAXBiAIBrmmZE+s=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1719028746; c=relaxed/simple;
	bh=tHDi41RRtMNvwBdvauB8kKNY5rC479lW69KRFNCaPX4=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version;
 b=FDt/UvKkheNiJrqwfVRTFxoChx1iezDL9nZDZtE/Rq3T3mgKFemsjZ6M2wICEfuQ8UDAZiahI+UMeL22uJ1pHhJHgFi+Ini21WHoJF8huV+m0J8AlqpivGgJK18EdwFHizhTeYjNxMlvjTwvCuI7kMMptGw9cvTh6b9JP93s+xs=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=redhat.com;
 spf=pass smtp.mailfrom=redhat.com;
 dkim=pass (1024-bit key) header.d=redhat.com header.i=@redhat.com
 header.b=Rbso8GUp; arc=none smtp.client-ip=170.10.133.124
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=redhat.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=pass smtp.mailfrom=redhat.com
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (1024-bit key) header.d=redhat.com header.i=@redhat.com
 header.b="Rbso8GUp"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com;
	s=mimecast20190719; t=1719028744;
	h=from:from:reply-to:subject:subject:date:date:message-id:message-id:
	 to:to:cc:cc:mime-version:mime-version:
	 content-transfer-encoding:content-transfer-encoding:
	 in-reply-to:in-reply-to:references:references;
	bh=kqjqAcyBF16xwLE61W3REKh7p8uOAAMqSPa12WNm3GE=;
	b=Rbso8GUp3H6CyJMd1rLOfHYCAKrNz+lw3PK1y+50mbmJum5Q1+uSUHEHj/hCSYsYLsXxcv
	bbtmrhciN78uUDR7wuoYnx7bPfPBWP/8htfkcWmeOZg8q7bz9kYiGxkPHSsEz1Lykf3iYr
	+9nMYOn0berWZvLk+KyjnvTnsoCr4ns=
Received: from mail-pj1-f70.google.com (mail-pj1-f70.google.com
 [209.85.216.70]) by relay.mimecast.com with ESMTP with STARTTLS
 (version=TLSv1.3, cipher=TLS_AES_256_GCM_SHA384) id
 us-mta-480-6WGa2aIYOc2Wwx61pSgkDA-1; Fri, 21 Jun 2024 23:58:57 -0400
X-MC-Unique: 6WGa2aIYOc2Wwx61pSgkDA-1
Received: by mail-pj1-f70.google.com with SMTP id
 98e67ed59e1d1-2c78c2b7f4bso2698476a91.0
        for <linux-kernel@vger.kernel.org>;
 Fri, 21 Jun 2024 20:58:57 -0700 (PDT)
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
        d=1e100.net; s=20230601; t=1719028736; x=1719633536;
        h=content-transfer-encoding:mime-version:references:in-reply-to
         :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc
         :subject:date:message-id:reply-to;
        bh=kqjqAcyBF16xwLE61W3REKh7p8uOAAMqSPa12WNm3GE=;
        b=tPzXgP/vDNhNOm3uwj395gEY1JPXR1l3ZQN//iPeKby2Fh3VzpmCAItZ00iIBtf5Ag
         rGPFF801Pvm264HxFT1RXR0bTgtErFgRil/EVxEan3E76leR2IxL9zNH4Oyx7kKR/kRi
         P/wD+4d0MkcNQd2YSPGEThU+TZIxXY57Ih6N3NoXkkyHd4ur0xR2Odld/z3+MLXikFM0
         IF8/+/QSrjUXY6Vp9F9OX6sswhJtgBzsxQh3FZi9Q09Vfu4SOtdAiVqK4uqcW/W5pE6r
         Ub9s6i4iiqjoSjssrXE7k1JXMH1axO6i6S495AwiAxv8yof6WLe/qa7JTE1oPfmqkjvj
         cIRg==
X-Gm-Message-State: AOJu0YziK+sH2THBTNhd7zNU+Me0x+bklhMXJeQx8QlbunnsfjvgnosU
	MVxGct5dBsYgPB4ny/HMcRiDC4liox1jGhqawQQZsd3G54R0SFZY1HwEHLnWUeQat1C8DgWm3TO
	VBlTW18yHe9ioqyg10p2As5MCyC87J04Ht+NP9svW6C19Y7bwwdRscj87281ihA==
X-Received: by 2002:a17:903:1cf:b0:1f6:92f1:b01c with SMTP id
 d9443c01a7336-1f9aa481248mr125471935ad.69.1719028736464;
        Fri, 21 Jun 2024 20:58:56 -0700 (PDT)
X-Google-Smtp-Source: 
 AGHT+IGsa/g3e768QiFMvXJv3hzlQlYWrPA/q475LVIhwI75gKqm/BT5XtIgr5pD5KmuF/MDMe+IuQ==
X-Received: by 2002:a17:903:1cf:b0:1f6:92f1:b01c with SMTP id
 d9443c01a7336-1f9aa481248mr125471745ad.69.1719028736115;
        Fri, 21 Jun 2024 20:58:56 -0700 (PDT)
Received: from LeoBras.redhat.com ([2804:1b3:a801:c138:e21d:3579:5747:ad1])
        by smtp.gmail.com with ESMTPSA id
 d9443c01a7336-1f9eb32b9edsm21832365ad.118.2024.06.21.20.58.50
        (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256);
        Fri, 21 Jun 2024 20:58:55 -0700 (PDT)
From: Leonardo Bras <leobras@redhat.com>
To: Johannes Weiner <hannes@cmpxchg.org>,
	Michal Hocko <mhocko@kernel.org>,
	Roman Gushchin <roman.gushchin@linux.dev>,
	Shakeel Butt <shakeel.butt@linux.dev>,
	Muchun Song <muchun.song@linux.dev>,
	Andrew Morton <akpm@linux-foundation.org>,
	Christoph Lameter <cl@linux.com>,
	Pekka Enberg <penberg@kernel.org>,
	David Rientjes <rientjes@google.com>,
	Joonsoo Kim <iamjoonsoo.kim@lge.com>,
	Vlastimil Babka <vbabka@suse.cz>,
	Hyeonggon Yoo <42.hyeyoo@gmail.com>,
	Leonardo Bras <leobras@redhat.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Marcelo Tosatti <mtosatti@redhat.com>
Cc: linux-kernel@vger.kernel.org,
	cgroups@vger.kernel.org,
	linux-mm@kvack.org
Subject: [RFC PATCH v1 3/4] memcontrol: apply new queue_percpu_work_on()
 interface
Date: Sat, 22 Jun 2024 00:58:11 -0300
Message-ID: <20240622035815.569665-4-leobras@redhat.com>
X-Mailer: git-send-email 2.45.2
In-Reply-To: <20240622035815.569665-1-leobras@redhat.com>
References: <20240622035815.569665-1-leobras@redhat.com>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

Make use of the new qpw_{un,}lock*() and queue_percpu_work_on()
interface to improve performance & latency on PREEMTP_RT kernels.

For functions that may be scheduled in a different cpu, replace
local_{un,}lock*() by qpw_{un,}lock*(), and replace schedule_work_on() by
queue_percpu_work_on().

This change requires allocation of qpw_structs instead of a work_structs.

This should bring no relevant performance impact on non-RT kernels:
For functions that may be scheduled in a different cpu, the local_*lock's
this_cpu_ptr() becomes a per_cpu_ptr(smp_processor_id()).

Signed-off-by: Leonardo Bras <leobras@redhat.com>
---
 mm/memcontrol.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 71fe2a95b8bd..18a987f8c998 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -59,20 +59,21 @@
 #include <linux/swap_cgroup.h>
 #include <linux/cpu.h>
 #include <linux/oom.h>
 #include <linux/lockdep.h>
 #include <linux/file.h>
 #include <linux/resume_user_mode.h>
 #include <linux/psi.h>
 #include <linux/seq_buf.h>
 #include <linux/sched/isolation.h>
 #include <linux/kmemleak.h>
+#include <linux/qpw.h>
 #include "internal.h"
 #include <net/sock.h>
 #include <net/ip.h>
 #include "slab.h"
 #include "swap.h"
=20
 #include <linux/uaccess.h>
=20
 #include <trace/events/vmscan.h>
=20
@@ -2415,21 +2416,21 @@ struct memcg_stock_pcp {
 	unsigned int nr_pages;
=20
 #ifdef CONFIG_MEMCG_KMEM
 	struct obj_cgroup *cached_objcg;
 	struct pglist_data *cached_pgdat;
 	unsigned int nr_bytes;
 	int nr_slab_reclaimable_b;
 	int nr_slab_unreclaimable_b;
 #endif
=20
-	struct work_struct work;
+	struct qpw_struct qpw;
 	unsigned long flags;
 #define FLUSHING_CACHED_CHARGE	0
 };
 static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock) =3D {
 	.stock_lock =3D INIT_LOCAL_LOCK(stock_lock),
 };
 static DEFINE_MUTEX(percpu_charge_mutex);
=20
 #ifdef CONFIG_MEMCG_KMEM
 static struct obj_cgroup *drain_obj_stock(struct memcg_stock_pcp *stock);
@@ -2503,39 +2504,40 @@ static void drain_stock(struct memcg_stock_pcp *sto=
ck)
 		if (do_memsw_account())
 			page_counter_uncharge(&old->memsw, stock_pages);
=20
 		WRITE_ONCE(stock->nr_pages, 0);
 	}
=20
 	css_put(&old->css);
 	WRITE_ONCE(stock->cached, NULL);
 }
=20
-static void drain_local_stock(struct work_struct *dummy)
+static void drain_local_stock(struct work_struct *w)
 {
 	struct memcg_stock_pcp *stock;
 	struct obj_cgroup *old =3D NULL;
 	unsigned long flags;
+	int cpu =3D qpw_get_cpu(w);
=20
 	/*
 	 * The only protection from cpu hotplug (memcg_hotplug_cpu_dead) vs.
 	 * drain_stock races is that we always operate on local CPU stock
 	 * here with IRQ disabled
 	 */
-	local_lock_irqsave(&memcg_stock.stock_lock, flags);
+	qpw_lock_irqsave(&memcg_stock.stock_lock, flags, cpu);
=20
-	stock =3D this_cpu_ptr(&memcg_stock);
+	stock =3D per_cpu_ptr(&memcg_stock, cpu);
 	old =3D drain_obj_stock(stock);
 	drain_stock(stock);
 	clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);
=20
-	local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
+	qpw_unlock_irqrestore(&memcg_stock.stock_lock, flags, cpu);
 	obj_cgroup_put(old);
 }
=20
 /*
  * Cache charges(val) to local per_cpu area.
  * This will be consumed by consume_stock() function, later.
  */
 static void __refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
 {
 	struct memcg_stock_pcp *stock;
@@ -2592,23 +2594,23 @@ static void drain_all_stock(struct mem_cgroup *root=
_memcg)
 		if (memcg && READ_ONCE(stock->nr_pages) &&
 		    mem_cgroup_is_descendant(memcg, root_memcg))
 			flush =3D true;
 		else if (obj_stock_flush_required(stock, root_memcg))
 			flush =3D true;
 		rcu_read_unlock();
=20
 		if (flush &&
 		    !test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) {
 			if (cpu =3D=3D curcpu)
-				drain_local_stock(&stock->work);
+				drain_local_stock(&stock->qpw.work);
 			else if (!cpu_is_isolated(cpu))
-				schedule_work_on(cpu, &stock->work);
+				queue_percpu_work_on(cpu, system_wq, &stock->qpw);
 		}
 	}
 	migrate_enable();
 	mutex_unlock(&percpu_charge_mutex);
 }
=20
 static int memcg_hotplug_cpu_dead(unsigned int cpu)
 {
 	struct memcg_stock_pcp *stock;
=20
@@ -7956,22 +7958,22 @@ static int __init mem_cgroup_init(void)
 	 * used for per-memcg-per-cpu caching of per-node statistics. In order
 	 * to work fine, we should make sure that the overfill threshold can't
 	 * exceed S32_MAX / PAGE_SIZE.
 	 */
 	BUILD_BUG_ON(MEMCG_CHARGE_BATCH > S32_MAX / PAGE_SIZE);
=20
 	cpuhp_setup_state_nocalls(CPUHP_MM_MEMCQ_DEAD, "mm/memctrl:dead", NULL,
 				  memcg_hotplug_cpu_dead);
=20
 	for_each_possible_cpu(cpu)
-		INIT_WORK(&per_cpu_ptr(&memcg_stock, cpu)->work,
-			  drain_local_stock);
+		INIT_QPW(&per_cpu_ptr(&memcg_stock, cpu)->qpw,
+			 drain_local_stock, cpu);
=20
 	for_each_node(node) {
 		struct mem_cgroup_tree_per_node *rtpn;
=20
 		rtpn =3D kzalloc_node(sizeof(*rtpn), GFP_KERNEL, node);
=20
 		rtpn->rb_root =3D RB_ROOT;
 		rtpn->rb_rightmost =3D NULL;
 		spin_lock_init(&rtpn->lock);
 		soft_limit_tree.rb_tree_per_node[node] =3D rtpn;
--=20
2.45.2
From nobody Sat Feb  7 20:13:05 2026
Received: from us-smtp-delivery-124.mimecast.com
 (us-smtp-delivery-124.mimecast.com [170.10.133.124])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id CBBBAC13B
	for <linux-kernel@vger.kernel.org>; Sat, 22 Jun 2024 03:59:05 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=170.10.133.124
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1719028747; cv=none;
 b=jP/EBq37vlWE9KdZGPnplpgE87mniPBFNzytluTfJxmakYWPqPbgcll80ykrkgzNrXV2RmizJRmh4vY9Cjwdg3Lv/phXwxMbRu2ogBM50ZBQClee94G4UCFyjl++Pd7+ZGhUzEh/dYqYSEfksWMJyXDyfu8amwIk1BrUCN6jaS0=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1719028747; c=relaxed/simple;
	bh=/ufCsQJXWSGzeKck9gEeAyEHT7TWU+q2u595g0WVpMo=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version;
 b=JAcYT19oJ7Gm8fqA24n1bCEYeAUljWg4e/4SN3h9KG5BPzTOEMXG3bG1y3QdZfpzzzGkbdhHwDLGXwrOT7xXnm0s+d67fv2S9j6cGebRZdDwWED+GsB8DD1d68j6c8zaSDgcZzgPWw8Y/WM+Nshpe+MoUEoYTM2ZpEs3xHpdfKc=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=redhat.com;
 spf=pass smtp.mailfrom=redhat.com;
 dkim=pass (1024-bit key) header.d=redhat.com header.i=@redhat.com
 header.b=TSrKaoiI; arc=none smtp.client-ip=170.10.133.124
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=redhat.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=pass smtp.mailfrom=redhat.com
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (1024-bit key) header.d=redhat.com header.i=@redhat.com
 header.b="TSrKaoiI"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com;
	s=mimecast20190719; t=1719028744;
	h=from:from:reply-to:subject:subject:date:date:message-id:message-id:
	 to:to:cc:cc:mime-version:mime-version:
	 content-transfer-encoding:content-transfer-encoding:
	 in-reply-to:in-reply-to:references:references;
	bh=gr/4mKEcg1BO/1nqaoL0eifI3AwTZgGcUhIFK99x5ho=;
	b=TSrKaoiIt/CS/qzShgkJYqq4FOLzYCw0sKEi1Y6qUtH4ciglOqKZ1qL3x9eohq9sSr/kH4
	3/Mo20ryTctWOgve1oRD9+Y+IofCouyZdzbawMNtfASVRAGdxH9GhT+Tfp9X4fypaeCxNx
	xxpX/h+PCVrVNYAvhIrWJbLZWd0m1K8=
Received: from mail-pj1-f70.google.com (mail-pj1-f70.google.com
 [209.85.216.70]) by relay.mimecast.com with ESMTP with STARTTLS
 (version=TLSv1.3, cipher=TLS_AES_256_GCM_SHA384) id
 us-mta-530-v2DIr53ZMMi8GRPC2KhddA-1; Fri, 21 Jun 2024 23:59:03 -0400
X-MC-Unique: v2DIr53ZMMi8GRPC2KhddA-1
Received: by mail-pj1-f70.google.com with SMTP id
 98e67ed59e1d1-2c7430b3c4bso2866720a91.1
        for <linux-kernel@vger.kernel.org>;
 Fri, 21 Jun 2024 20:59:03 -0700 (PDT)
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
        d=1e100.net; s=20230601; t=1719028742; x=1719633542;
        h=content-transfer-encoding:mime-version:references:in-reply-to
         :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc
         :subject:date:message-id:reply-to;
        bh=gr/4mKEcg1BO/1nqaoL0eifI3AwTZgGcUhIFK99x5ho=;
        b=R7MFGMTHcNrPeiNYfRFE47mDNL35MjQdu+x/kIsFTfhd9i5pCRtVrIan+2SDfrBo9J
         0BuzUv2WCeX3OWUIw9LyGPf6K0adMM6d+YR4YjNmpXO8ITO2M1rXclEjJUD6YKPp0iMk
         oGOOHgMmT8jmDn5BGCNDY6O0hnp/uelqFH3tjsDt3U1106T7vauohaNEnlKnkwcDSt+n
         tQuIeUQcWwnLIGqzjnd31V3a9+mPnS2b+wy6k/s2mII5zQflPQLK1LFmeQAlj2VtWH4i
         rhTJLE4bsi2r7iW/+oYamx1S7O/2BS40mglD+seFeuZ+7uIoktOZVE76ZBPWtSA6XzlQ
         eUTw==
X-Gm-Message-State: AOJu0YzkgqWm3JdBBknsAp+SZDsySZiAJXLHr/NoixIrf3OBVRXavyRq
	hiMflaNgdQVzHcPAGVVRu/klKob4tmRrwNaqLHPGTgQhsFUrJrtXr9ClruRwMMm5ar9EvU3RyPu
	C8wCtcZ92k3ZD2exX3O22ukOpZFl/o2ScX+RlsQBIQYKzD2zVtCw1l3d6DYGT/g==
X-Received: by 2002:a17:90a:6006:b0:2c3:cc6:636e with SMTP id
 98e67ed59e1d1-2c7b59fa68cmr10387325a91.2.1719028742277;
        Fri, 21 Jun 2024 20:59:02 -0700 (PDT)
X-Google-Smtp-Source: 
 AGHT+IHbqsAmN/2usMDnEbfUaqXha6LBh1OFap3JUlsvia0ZFkQTobQ3J2rvz24LicWySb2pIwxdrA==
X-Received: by 2002:a17:90a:6006:b0:2c3:cc6:636e with SMTP id
 98e67ed59e1d1-2c7b59fa68cmr10387308a91.2.1719028741834;
        Fri, 21 Jun 2024 20:59:01 -0700 (PDT)
Received: from LeoBras.redhat.com ([2804:1b3:a801:c138:e21d:3579:5747:ad1])
        by smtp.gmail.com with ESMTPSA id
 d9443c01a7336-1f9eb32b9edsm21832365ad.118.2024.06.21.20.58.56
        (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256);
        Fri, 21 Jun 2024 20:59:01 -0700 (PDT)
From: Leonardo Bras <leobras@redhat.com>
To: Johannes Weiner <hannes@cmpxchg.org>,
	Michal Hocko <mhocko@kernel.org>,
	Roman Gushchin <roman.gushchin@linux.dev>,
	Shakeel Butt <shakeel.butt@linux.dev>,
	Muchun Song <muchun.song@linux.dev>,
	Andrew Morton <akpm@linux-foundation.org>,
	Christoph Lameter <cl@linux.com>,
	Pekka Enberg <penberg@kernel.org>,
	David Rientjes <rientjes@google.com>,
	Joonsoo Kim <iamjoonsoo.kim@lge.com>,
	Vlastimil Babka <vbabka@suse.cz>,
	Hyeonggon Yoo <42.hyeyoo@gmail.com>,
	Leonardo Bras <leobras@redhat.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Marcelo Tosatti <mtosatti@redhat.com>
Cc: linux-kernel@vger.kernel.org,
	cgroups@vger.kernel.org,
	linux-mm@kvack.org
Subject: [RFC PATCH v1 4/4] slub: apply new queue_percpu_work_on() interface
Date: Sat, 22 Jun 2024 00:58:12 -0300
Message-ID: <20240622035815.569665-5-leobras@redhat.com>
X-Mailer: git-send-email 2.45.2
In-Reply-To: <20240622035815.569665-1-leobras@redhat.com>
References: <20240622035815.569665-1-leobras@redhat.com>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

Make use of the new qpw_{un,}lock*() and queue_percpu_work_on()
interface to improve performance & latency on PREEMTP_RT kernels.

For functions that may be scheduled in a different cpu, replace
local_{un,}lock*() by qpw_{un,}lock*(), and replace schedule_work_on() by
queue_percpu_work_on(). The same happens for flush_work() and
flush_percpu_work().

This change requires allocation of qpw_structs instead of a work_structs,
and changing parameters of a few functions to include the cpu parameter.

This should bring no relevant performance impact on non-RT kernels:
For functions that may be scheduled in a different cpu, the local_*lock's
this_cpu_ptr() becomes a per_cpu_ptr(smp_processor_id()).

Signed-off-by: Leonardo Bras <leobras@redhat.com>
---
 mm/slub.c | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index 1373ac365a46..5cd91541906e 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -35,20 +35,21 @@
 #include <linux/math64.h>
 #include <linux/fault-inject.h>
 #include <linux/kmemleak.h>
 #include <linux/stacktrace.h>
 #include <linux/prefetch.h>
 #include <linux/memcontrol.h>
 #include <linux/random.h>
 #include <kunit/test.h>
 #include <kunit/test-bug.h>
 #include <linux/sort.h>
+#include <linux/qpw.h>
=20
 #include <linux/debugfs.h>
 #include <trace/events/kmem.h>
=20
 #include "internal.h"
=20
 /*
  * Lock order:
  *   1. slab_mutex (Global Mutex)
  *   2. node->list_lock (Spinlock)
@@ -3073,36 +3074,37 @@ static void put_cpu_partial(struct kmem_cache *s, s=
truct slab *slab, int drain)
 }
=20
 #else	/* CONFIG_SLUB_CPU_PARTIAL */
=20
 static inline void put_partials(struct kmem_cache *s) { }
 static inline void put_partials_cpu(struct kmem_cache *s,
 				    struct kmem_cache_cpu *c) { }
=20
 #endif	/* CONFIG_SLUB_CPU_PARTIAL */
=20
-static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu =
*c)
+static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu =
*c,
+			      int cpu)
 {
 	unsigned long flags;
 	struct slab *slab;
 	void *freelist;
=20
-	local_lock_irqsave(&s->cpu_slab->lock, flags);
+	qpw_lock_irqsave(&s->cpu_slab->lock, flags, cpu);
=20
 	slab =3D c->slab;
 	freelist =3D c->freelist;
=20
 	c->slab =3D NULL;
 	c->freelist =3D NULL;
 	c->tid =3D next_tid(c->tid);
=20
-	local_unlock_irqrestore(&s->cpu_slab->lock, flags);
+	qpw_unlock_irqrestore(&s->cpu_slab->lock, flags, cpu);
=20
 	if (slab) {
 		deactivate_slab(s, slab, freelist);
 		stat(s, CPUSLAB_FLUSH);
 	}
 }
=20
 static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
 {
 	struct kmem_cache_cpu *c =3D per_cpu_ptr(s->cpu_slab, cpu);
@@ -3115,82 +3117,84 @@ static inline void __flush_cpu_slab(struct kmem_cac=
he *s, int cpu)
=20
 	if (slab) {
 		deactivate_slab(s, slab, freelist);
 		stat(s, CPUSLAB_FLUSH);
 	}
=20
 	put_partials_cpu(s, c);
 }
=20
 struct slub_flush_work {
-	struct work_struct work;
+	struct qpw_struct qpw;
 	struct kmem_cache *s;
 	bool skip;
 };
=20
+static DEFINE_PER_CPU(struct slub_flush_work, slub_flush);
+
 /*
  * Flush cpu slab.
  *
  * Called from CPU work handler with migration disabled.
  */
 static void flush_cpu_slab(struct work_struct *w)
 {
 	struct kmem_cache *s;
 	struct kmem_cache_cpu *c;
 	struct slub_flush_work *sfw;
+	int cpu =3D qpw_get_cpu(w);
=20
-	sfw =3D container_of(w, struct slub_flush_work, work);
+	sfw =3D &per_cpu(slub_flush, cpu);
=20
 	s =3D sfw->s;
-	c =3D this_cpu_ptr(s->cpu_slab);
+	c =3D per_cpu_ptr(s->cpu_slab, cpu);
=20
 	if (c->slab)
-		flush_slab(s, c);
+		flush_slab(s, c, cpu);
=20
 	put_partials(s);
 }
=20
 static bool has_cpu_slab(int cpu, struct kmem_cache *s)
 {
 	struct kmem_cache_cpu *c =3D per_cpu_ptr(s->cpu_slab, cpu);
=20
 	return c->slab || slub_percpu_partial(c);
 }
=20
 static DEFINE_MUTEX(flush_lock);
-static DEFINE_PER_CPU(struct slub_flush_work, slub_flush);
=20
 static void flush_all_cpus_locked(struct kmem_cache *s)
 {
 	struct slub_flush_work *sfw;
 	unsigned int cpu;
=20
 	lockdep_assert_cpus_held();
 	mutex_lock(&flush_lock);
=20
 	for_each_online_cpu(cpu) {
 		sfw =3D &per_cpu(slub_flush, cpu);
 		if (!has_cpu_slab(cpu, s)) {
 			sfw->skip =3D true;
 			continue;
 		}
-		INIT_WORK(&sfw->work, flush_cpu_slab);
+		INIT_QPW(&sfw->qpw, flush_cpu_slab, cpu);
 		sfw->skip =3D false;
 		sfw->s =3D s;
-		queue_work_on(cpu, flushwq, &sfw->work);
+		queue_percpu_work_on(cpu, flushwq, &sfw->qpw);
 	}
=20
 	for_each_online_cpu(cpu) {
 		sfw =3D &per_cpu(slub_flush, cpu);
 		if (sfw->skip)
 			continue;
-		flush_work(&sfw->work);
+		flush_percpu_work(&sfw->qpw);
 	}
=20
 	mutex_unlock(&flush_lock);
 }
=20
 static void flush_all(struct kmem_cache *s)
 {
 	cpus_read_lock();
 	flush_all_cpus_locked(s);
 	cpus_read_unlock();
--=20
2.45.2