From nobody Fri Dec 26 23:19:35 2025 Received: from mail-pg1-f182.google.com (mail-pg1-f182.google.com [209.85.215.182]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 7E0704596D for ; Wed, 27 Dec 2023 14:49:46 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=gmail.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=gmail.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=gmail.com header.i=@gmail.com header.b="gc3dgyBP" Received: by mail-pg1-f182.google.com with SMTP id 41be03b00d2f7-5c229dabbb6so1345090a12.0 for ; Wed, 27 Dec 2023 06:49:46 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20230601; t=1703688585; x=1704293385; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=bZIdcLBe5AtoEgMrZW8B5XtJiAwQZuHkmbazdIfOtJ0=; b=gc3dgyBPaPukg07mXkuyXuXi/6QK3hl4RBw/D6GMUHyYjNGr7vrd4/sM0yCn9aLo9Q Qp9aT4IkXD88SVNGpxQuG4IsRqK8Pe4yy5j817CI6GvwwxJ5vAFviSakEzTtjIiOPEgD arXNIXNqY1FjtLvT2tj5Uh96TZEBXE2xcljKCqSXCunctj8iz2x2t2RnHyKCjhRqcHkp 0DtvjNmfCJDsureKMEeIvGvIkK1Fpxsl1EphQt8G5peKLu0JU0e9B0bTY+eJJP4HutSu dJsFcR3XpzfHEvoxAOlG/vJcCsZxe2ZwEXlat/pZXtAg/9rvxjjkPY+hKmijTAzcw//w X6XA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1703688585; x=1704293385; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=bZIdcLBe5AtoEgMrZW8B5XtJiAwQZuHkmbazdIfOtJ0=; b=hR5bOFandLeNCGiSuaDdHCVkezYOHRyO7Ljplw/1aPyAU5emm451J429SJdtP14wdi 9jFMOKd2Q700lMymTBOL28Pu/XFivTh2cSsh1jTErVuVWsa+5St/gQii4scMdnx+KGlO mUgooqGxzd+hRuql8m4YO39MVdYA6bmwlOD5NU5xxGOu12Vda3EZmU7qZ4BIDo2hey0t 0Pejjt73tlTuGb2S7mDnPzhceOkl6/lil+3tgHXqGFG8MsgY08iioAE7+HnDW48SX2xI XlFVW4yv2YrqjaQueW5BQdVLWUSJ0bx1aSF+Cg1AcxIlEuc/T0dFSKiRU76Z/+GnBJEg YZ3Q== X-Gm-Message-State: AOJu0YwCqRJ/a+P9hxBiZk6ZADubTPqOJ8y9qTJlSyF3uU9Yjto6NyFV 7xQWw5OLcaTMrqq7VC5FDBE1l+jNp5g= X-Google-Smtp-Source: AGHT+IG+CWEYs35r8ECOO6GjjQlw+1c1932pOSzoGAWMTMR/juLSTmGLl/LzjZEDsUxTDx2ZdxjSpw== X-Received: by 2002:a17:90a:7e96:b0:28b:c10e:2f1e with SMTP id j22-20020a17090a7e9600b0028bc10e2f1emr2562890pjl.23.1703688585192; Wed, 27 Dec 2023 06:49:45 -0800 (PST) Received: from localhost ([198.11.178.15]) by smtp.gmail.com with ESMTPSA id h12-20020a17090aea8c00b0028c89298d36sm2178718pjz.27.2023.12.27.06.49.44 (version=TLS1_2 cipher=ECDHE-ECDSA-AES128-GCM-SHA256 bits=128/128); Wed, 27 Dec 2023 06:49:44 -0800 (PST) From: Lai Jiangshan To: linux-kernel@vger.kernel.org Cc: Tejun Heo , Naohiro.Aota@wdc.com, Lai Jiangshan , Lai Jiangshan Subject: [PATCH 1/7] workqueue: Reuse the default PWQ as much as possible Date: Wed, 27 Dec 2023 22:51:37 +0800 Message-Id: <20231227145143.2399-2-jiangshanlai@gmail.com> X-Mailer: git-send-email 2.19.1.6.gb485710b In-Reply-To: <20231227145143.2399-1-jiangshanlai@gmail.com> References: <20231227145143.2399-1-jiangshanlai@gmail.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Lai Jiangshan If the PWQ to be allocated has the same __pod_cpumask as the default one, just reuse the default one. No functionality changes intend. Signed-off-by: Lai Jiangshan --- kernel/workqueue.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 2989b57e154a..e734625fc8ce 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4270,7 +4270,7 @@ static void wq_calc_pod_cpumask(struct workqueue_attr= s *attrs, int cpu, if (cpu_going_down >=3D 0) cpumask_clear_cpu(cpu_going_down, attrs->__pod_cpumask); =20 - if (cpumask_empty(attrs->__pod_cpumask)) { + if (attrs->ordered || cpumask_empty(attrs->__pod_cpumask)) { cpumask_copy(attrs->__pod_cpumask, attrs->cpumask); return; } @@ -4360,15 +4360,15 @@ apply_wqattrs_prepare(struct workqueue_struct *wq, goto out_free; =20 for_each_possible_cpu(cpu) { - if (new_attrs->ordered) { + wq_calc_pod_cpumask(new_attrs, cpu, -1); + if (cpumask_equal(new_attrs->cpumask, new_attrs->__pod_cpumask)) { ctx->dfl_pwq->refcnt++; ctx->pwq_tbl[cpu] =3D ctx->dfl_pwq; - } else { - wq_calc_pod_cpumask(new_attrs, cpu, -1); - ctx->pwq_tbl[cpu] =3D alloc_unbound_pwq(wq, new_attrs); - if (!ctx->pwq_tbl[cpu]) - goto out_free; + continue; } + ctx->pwq_tbl[cpu] =3D alloc_unbound_pwq(wq, new_attrs); + if (!ctx->pwq_tbl[cpu]) + goto out_free; } =20 /* save the user configured attrs and sanitize it. */ @@ -4530,6 +4530,8 @@ static void wq_update_pod(struct workqueue_struct *wq= , int cpu, lockdep_is_held(&wq_pool_mutex)); if (wqattrs_equal(target_attrs, pwq->pool->attrs)) return; + if (cpumask_equal(target_attrs->cpumask, target_attrs->__pod_cpumask)) + goto use_dfl_pwq; =20 /* create a new pwq */ pwq =3D alloc_unbound_pwq(wq, target_attrs); --=20 2.19.1.6.gb485710b From nobody Fri Dec 26 23:19:35 2025 Received: from mail-pj1-f51.google.com (mail-pj1-f51.google.com [209.85.216.51]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id BB8B945BF1 for ; Wed, 27 Dec 2023 14:49:50 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=gmail.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=gmail.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=gmail.com header.i=@gmail.com header.b="QmO4K4Ao" Received: by mail-pj1-f51.google.com with SMTP id 98e67ed59e1d1-28c0df4b42eso2407449a91.1 for ; Wed, 27 Dec 2023 06:49:50 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20230601; t=1703688590; x=1704293390; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=LKo1wBDCqtIzP+GSKPuy7WewEJssgRVowRe//gyJUC0=; b=QmO4K4AoZMSeXOxYB1i8sOoWnDzdIQIICbyHdtJfuec/Zqvm2DvOSZDQ55pAkT5k/D 7PFtr9TV7sN9dqRFxzq2Oj/sltQ5uRbe4qiQpGTdxrQeVPssJw2KG4TMCXV5yepKRpr6 K+aQ7UOf7GzK+HOAHhpX7THuWHKXyQlh00TjqqFKrjYjtwQuAmRqkR1KJwvpalBDxNeG c5Wi4RIUP6wX6150fcZeY7q8oLI4kC0i8m/4yhjcbZne9MUZJgPCe2RbNQRRDO98DkAh DAnXwoimUF3bfpdPb3HAZvbHnMk0iqmIVZa6x4ozPY9j/X6mty21HJuzfzHckWmytiBK EuHQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1703688590; x=1704293390; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=LKo1wBDCqtIzP+GSKPuy7WewEJssgRVowRe//gyJUC0=; b=nyqHqUwCFf+2dFEfmBNe+ElODDVJNgBFFVVt9qJyKEjjH0EPAEDbvlwFZYXAdCvkup w3o1Kf88xwNgAYc6tb7hlacgf33uOzGq/IfTt6hqYhovk7CuZqcHZyS4CQVRtI7ndA+z eSh2vr5L4Mtsus0/V7mLk/OHW1ZS4OF6N0t05dZrduzTT3pTsL6LRGIyjl53XfkPRlgG SFZc4c579mm4yD7ndATecxXurSd1QNc3dp+Yiipo/AsK0AX7eCxrChPggHOpFl0lfelG j0O0l9Ty1AThKFt6iIWkavJawef4EaW54NMQYskc8jcMiPyGtdexQnsg8zzDdgFENoK6 NGgA== X-Gm-Message-State: AOJu0YzWc9r3vAZN0czlygkU8aisde/seKm/VG8IL2eaRJxwf6kWU14m MgTBNP59gIH1BBT+9kVEJFQ2eHkRP7I= X-Google-Smtp-Source: AGHT+IHeXSarFXe61pBQRVue6pVDu42IhdPayF6IIWlrCZTUR9pCiNTUAeUbWMdYixrsUEA1B/uLsQ== X-Received: by 2002:a17:90a:b006:b0:28c:4527:ef9a with SMTP id x6-20020a17090ab00600b0028c4527ef9amr6353789pjq.40.1703688589589; Wed, 27 Dec 2023 06:49:49 -0800 (PST) Received: from localhost ([47.88.5.130]) by smtp.gmail.com with ESMTPSA id li16-20020a17090b48d000b0028bf0b91f6bsm12423701pjb.21.2023.12.27.06.49.48 (version=TLS1_2 cipher=ECDHE-ECDSA-AES128-GCM-SHA256 bits=128/128); Wed, 27 Dec 2023 06:49:49 -0800 (PST) From: Lai Jiangshan To: linux-kernel@vger.kernel.org Cc: Tejun Heo , Naohiro.Aota@wdc.com, Lai Jiangshan , Lai Jiangshan Subject: [PATCH 2/7] workqueue: Share the same PWQ for the CPUs of a pod Date: Wed, 27 Dec 2023 22:51:38 +0800 Message-Id: <20231227145143.2399-3-jiangshanlai@gmail.com> X-Mailer: git-send-email 2.19.1.6.gb485710b In-Reply-To: <20231227145143.2399-1-jiangshanlai@gmail.com> References: <20231227145143.2399-1-jiangshanlai@gmail.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Lai Jiangshan PWQs with the same attrs shared the same pool. So just share the same PWQ for all the CPUs of a pod instead of duplicating them. Signed-off-by: Lai Jiangshan --- kernel/workqueue.c | 78 +++++++++++++++++++++++----------------------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index e734625fc8ce..1f52685498f1 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4360,15 +4360,29 @@ apply_wqattrs_prepare(struct workqueue_struct *wq, goto out_free; =20 for_each_possible_cpu(cpu) { + struct pool_workqueue *pwq; + int tcpu; + + if (ctx->pwq_tbl[cpu]) + continue; wq_calc_pod_cpumask(new_attrs, cpu, -1); if (cpumask_equal(new_attrs->cpumask, new_attrs->__pod_cpumask)) { ctx->dfl_pwq->refcnt++; ctx->pwq_tbl[cpu] =3D ctx->dfl_pwq; continue; } - ctx->pwq_tbl[cpu] =3D alloc_unbound_pwq(wq, new_attrs); - if (!ctx->pwq_tbl[cpu]) + pwq =3D alloc_unbound_pwq(wq, new_attrs); + if (!pwq) goto out_free; + /* + * Reinitialize pwq->refcnt and prepare the new pwd for + * all the CPU of the pod. + */ + pwq->refcnt =3D 0; + for_each_cpu(tcpu, new_attrs->__pod_cpumask) { + pwq->refcnt++; + ctx->pwq_tbl[tcpu] =3D pwq; + } } =20 /* save the user configured attrs and sanitize it. */ @@ -4483,15 +4497,13 @@ int apply_workqueue_attrs(struct workqueue_struct *= wq, /** * wq_update_pod - update pod affinity of a wq for CPU hot[un]plug * @wq: the target workqueue - * @cpu: the CPU to update pool association for - * @hotplug_cpu: the CPU coming up or going down + * @cpu: the CPU coming up or going down * @online: whether @cpu is coming up or going down * * This function is to be called from %CPU_DOWN_PREPARE, %CPU_ONLINE and * %CPU_DOWN_FAILED. @cpu is being hot[un]plugged, update pod affinity of * @wq accordingly. * - * * If pod affinity can't be adjusted due to memory allocation failure, it = falls * back to @wq->dfl_pwq which may not be optimal but is always correct. * @@ -4502,11 +4514,11 @@ int apply_workqueue_attrs(struct workqueue_struct *= wq, * CPU_DOWN. If a workqueue user wants strict affinity, it's the user's * responsibility to flush the work item from CPU_DOWN_PREPARE. */ -static void wq_update_pod(struct workqueue_struct *wq, int cpu, - int hotplug_cpu, bool online) +static void wq_update_pod(struct workqueue_struct *wq, int cpu, bool onlin= e) { - int off_cpu =3D online ? -1 : hotplug_cpu; - struct pool_workqueue *old_pwq =3D NULL, *pwq; + int off_cpu =3D online ? -1 : cpu; + int tcpu; + struct pool_workqueue *pwq; struct workqueue_attrs *target_attrs; =20 lockdep_assert_held(&wq_pool_mutex); @@ -4541,20 +4553,24 @@ static void wq_update_pod(struct workqueue_struct *= wq, int cpu, goto use_dfl_pwq; } =20 - /* Install the new pwq. */ + /* Install the new pwq for all the cpus of the pod */ mutex_lock(&wq->mutex); - old_pwq =3D install_unbound_pwq(wq, cpu, pwq); - goto out_unlock; + /* reinitialize pwq->refcnt before installing */ + pwq->refcnt =3D 0; + for_each_cpu(tcpu, target_attrs->__pod_cpumask) + pwq->refcnt++; + for_each_cpu(tcpu, target_attrs->__pod_cpumask) + put_pwq_unlocked(install_unbound_pwq(wq, tcpu, pwq)); + mutex_unlock(&wq->mutex); + return; =20 use_dfl_pwq: mutex_lock(&wq->mutex); raw_spin_lock_irq(&wq->dfl_pwq->pool->lock); get_pwq(wq->dfl_pwq); raw_spin_unlock_irq(&wq->dfl_pwq->pool->lock); - old_pwq =3D install_unbound_pwq(wq, cpu, wq->dfl_pwq); -out_unlock: + put_pwq_unlocked(install_unbound_pwq(wq, cpu, wq->dfl_pwq)); mutex_unlock(&wq->mutex); - put_pwq_unlocked(old_pwq); } =20 static int alloc_and_link_pwqs(struct workqueue_struct *wq) @@ -5563,15 +5579,8 @@ int workqueue_online_cpu(unsigned int cpu) =20 /* update pod affinity of unbound workqueues */ list_for_each_entry(wq, &workqueues, list) { - struct workqueue_attrs *attrs =3D wq->unbound_attrs; - - if (attrs) { - const struct wq_pod_type *pt =3D wqattrs_pod_type(attrs); - int tcpu; - - for_each_cpu(tcpu, pt->pod_cpus[pt->cpu_pod[cpu]]) - wq_update_pod(wq, tcpu, cpu, true); - } + if (wq->unbound_attrs) + wq_update_pod(wq, cpu, true); } =20 mutex_unlock(&wq_pool_mutex); @@ -5591,15 +5600,8 @@ int workqueue_offline_cpu(unsigned int cpu) /* update pod affinity of unbound workqueues */ mutex_lock(&wq_pool_mutex); list_for_each_entry(wq, &workqueues, list) { - struct workqueue_attrs *attrs =3D wq->unbound_attrs; - - if (attrs) { - const struct wq_pod_type *pt =3D wqattrs_pod_type(attrs); - int tcpu; - - for_each_cpu(tcpu, pt->pod_cpus[pt->cpu_pod[cpu]]) - wq_update_pod(wq, tcpu, cpu, false); - } + if (wq->unbound_attrs) + wq_update_pod(wq, cpu, false); } mutex_unlock(&wq_pool_mutex); =20 @@ -5891,9 +5893,8 @@ static int wq_affn_dfl_set(const char *val, const str= uct kernel_param *kp) wq_affn_dfl =3D affn; =20 list_for_each_entry(wq, &workqueues, list) { - for_each_online_cpu(cpu) { - wq_update_pod(wq, cpu, cpu, true); - } + for_each_online_cpu(cpu) + wq_update_pod(wq, cpu, true); } =20 mutex_unlock(&wq_pool_mutex); @@ -6803,9 +6804,8 @@ void __init workqueue_init_topology(void) * combinations to apply per-pod sharing. */ list_for_each_entry(wq, &workqueues, list) { - for_each_online_cpu(cpu) { - wq_update_pod(wq, cpu, cpu, true); - } + for_each_online_cpu(cpu) + wq_update_pod(wq, cpu, true); } =20 mutex_unlock(&wq_pool_mutex); --=20 2.19.1.6.gb485710b From nobody Fri Dec 26 23:19:35 2025 Received: from mail-pl1-f175.google.com (mail-pl1-f175.google.com [209.85.214.175]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id C91DB4643A for ; Wed, 27 Dec 2023 14:49:54 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=gmail.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=gmail.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=gmail.com header.i=@gmail.com header.b="WF771chD" Received: by mail-pl1-f175.google.com with SMTP id d9443c01a7336-1d427518d52so23302385ad.0 for ; Wed, 27 Dec 2023 06:49:54 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20230601; t=1703688594; x=1704293394; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=OEkOlw6eKaiG2LU3fXepVKKsRgGZTje8n42JsBJJYDI=; b=WF771chDp+6AOt7o9/GeMsCo77s+GKRaYP6fpNgXyn/03IgZzPaYYN9lsxdpDsxoZE yTT7YaYZbGfHbyEwQTxdH8DIFQHwEXMR5AH+CPfTpxH/Dri/fUaBYZ6kbOziGgVkhWZP tGYe1rIdhdpLJRdHDXGjJpigK1GttrVFWcdoBy9aNnRFMjoUTA64ff54AiAkQeHVl/57 HRPcy1Xg2fSGBGOpODQ3trJ7ZH1IwPOlZ8mUuc+v5MmSJmvYYMcqLu904VMdSNBOsh5U BymMMvZPPTNGC9tQkWB6ZwWk8T/ErPf5gH6SAz9FWY9C3eMp2Xbv2lnVqLqKcH8coIrF UBkw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1703688594; x=1704293394; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=OEkOlw6eKaiG2LU3fXepVKKsRgGZTje8n42JsBJJYDI=; b=H54VYtQ+Utq7RaSPFVae2pIRW4r9qOIDqsBZC9U7ZGbDuuPuumxe8AM5y0g6cdVVDm 6lU80f3ocxU93J/DVqcS6gW4zCYcZLCKIDz1CUsK7/xbtaxIB7nkLsOWfWjy/ae8y5Ar L4S60xn7MLiszrpcn7exZJP28Sx2P1xfXoOfIgVLGHIvG22MRM7a1vw9vsKQH6lKTHXQ MAC7oeRc19y+Z2qxwcz9GNdoGHbOppuhuL+x71HLesXyZDteafrY7qA2LkFqcRH8oqjP W/jCaPVbCxDqPBHOHCJRuRxzE0iGztH/ootfD4ZbU5ml+SyJd8vL5t3mH2L5N3d/tqHe 5+LA== X-Gm-Message-State: AOJu0Yyw9wute9xtu5AOAwfhOOtDjJFWEyuHl+3WBY/nOJP2qiZlNoq5 nsMFghOfnqHG1HqQHMJcoxBBrN6N+5g= X-Google-Smtp-Source: AGHT+IHMdNlH0PkgFshshrYxah0QlCi/NG+epxLHJDp5aHBWD29Rbgf5ZWxnqwfgsrJ9xAlWetXsSg== X-Received: by 2002:a17:902:6846:b0:1d4:81bc:a2c2 with SMTP id f6-20020a170902684600b001d481bca2c2mr1093014pln.2.1703688593655; Wed, 27 Dec 2023 06:49:53 -0800 (PST) Received: from localhost ([47.88.5.130]) by smtp.gmail.com with ESMTPSA id ix9-20020a170902f80900b001cff026df52sm12148249plb.221.2023.12.27.06.49.52 (version=TLS1_2 cipher=ECDHE-ECDSA-AES128-GCM-SHA256 bits=128/128); Wed, 27 Dec 2023 06:49:53 -0800 (PST) From: Lai Jiangshan To: linux-kernel@vger.kernel.org Cc: Tejun Heo , Naohiro.Aota@wdc.com, Lai Jiangshan , Lai Jiangshan Subject: [PATCH 3/7] workqueue: Add pwq_calculate_max_active() Date: Wed, 27 Dec 2023 22:51:39 +0800 Message-Id: <20231227145143.2399-4-jiangshanlai@gmail.com> X-Mailer: git-send-email 2.19.1.6.gb485710b In-Reply-To: <20231227145143.2399-1-jiangshanlai@gmail.com> References: <20231227145143.2399-1-jiangshanlai@gmail.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Lai Jiangshan Abstract the code of calculating max_active from pwq_adjust_max_active() into pwq_calculate_max_active() to make the logic clearer. Signed-off-by: Lai Jiangshan --- kernel/workqueue.c | 44 +++++++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 1f52685498f1..3347ba3a734f 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4136,6 +4136,25 @@ static void pwq_release_workfn(struct kthread_work *= work) } } =20 +/** + * pwq_calculate_max_active - Determine max_active to use + * @pwq: pool_workqueue of interest + * + * Determine the max_active @pwq should use. + */ +static int pwq_calculate_max_active(struct pool_workqueue *pwq) +{ + /* + * During [un]freezing, the caller is responsible for ensuring + * that pwq_adjust_max_active() is called at least once after + * @workqueue_freezing is updated and visible. + */ + if ((pwq->wq->flags & WQ_FREEZABLE) && workqueue_freezing) + return 0; + + return pwq->wq->saved_max_active; +} + /** * pwq_adjust_max_active - update a pwq's max_active to the current setting * @pwq: target pool_workqueue @@ -4147,35 +4166,26 @@ static void pwq_release_workfn(struct kthread_work = *work) static void pwq_adjust_max_active(struct pool_workqueue *pwq) { struct workqueue_struct *wq =3D pwq->wq; - bool freezable =3D wq->flags & WQ_FREEZABLE; + int max_active =3D pwq_calculate_max_active(pwq); unsigned long flags; =20 /* for @wq->saved_max_active */ lockdep_assert_held(&wq->mutex); =20 - /* fast exit for non-freezable wqs */ - if (!freezable && pwq->max_active =3D=3D wq->saved_max_active) + /* fast exit if unchanged */ + if (pwq->max_active =3D=3D max_active) return; =20 /* this function can be called during early boot w/ irq disabled */ raw_spin_lock_irqsave(&pwq->pool->lock, flags); =20 - /* - * During [un]freezing, the caller is responsible for ensuring that - * this function is called at least once after @workqueue_freezing - * is updated and visible. - */ - if (!freezable || !workqueue_freezing) { - pwq->max_active =3D wq->saved_max_active; + pwq->max_active =3D max_active; =20 - while (!list_empty(&pwq->inactive_works) && - pwq->nr_active < pwq->max_active) - pwq_activate_first_inactive(pwq); + while (!list_empty(&pwq->inactive_works) && + pwq->nr_active < pwq->max_active) + pwq_activate_first_inactive(pwq); =20 - kick_pool(pwq->pool); - } else { - pwq->max_active =3D 0; - } + kick_pool(pwq->pool); =20 raw_spin_unlock_irqrestore(&pwq->pool->lock, flags); } --=20 2.19.1.6.gb485710b From nobody Fri Dec 26 23:19:35 2025 Received: from mail-pl1-f182.google.com (mail-pl1-f182.google.com [209.85.214.182]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id BF70A4645C for ; Wed, 27 Dec 2023 14:49:58 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=gmail.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=gmail.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=gmail.com header.i=@gmail.com header.b="NN/5vIBk" Received: by mail-pl1-f182.google.com with SMTP id d9443c01a7336-1d3ea5cc137so41536835ad.0 for ; Wed, 27 Dec 2023 06:49:58 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20230601; t=1703688598; x=1704293398; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=Ykeys9/b81hQucL41oKnYrH4NAgYOfGtA2bT2pIxo5Q=; b=NN/5vIBkuMEgYFRUg56ViHsz6i0CXAPj1jkWMWTSuL+MVuBz6am6ZAV3e7oJvLaErg i2ewoS4ZxSnC+Y0Wu3u1SSLWJVOokwbXmpWwjEjaP5Jo+Ktx4X9NYOcINEf4XRF69Vtk W1Z2c0acQF3t0I+/oF39onoQCFIxp3DSCuyVk/pppKBQi/v3pyxMD5FNmmhMPe6U+2wY xMAjioZHWbXo7QYk8y+ru/xjshGkCFLhdrAFlrtMxUDMpyBdo/cV6bz5n3PW5Bwng86A 4ZxoCAsNKf+Wrm18b/cELUJCvw/KsPhEGbe0iIIOdjmNOICuQ0j+o9K/tmWNhicocZpW 23kw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1703688598; x=1704293398; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=Ykeys9/b81hQucL41oKnYrH4NAgYOfGtA2bT2pIxo5Q=; b=LjBA3gYQfVdhc+IZjMsDlNIPLSW6mmAGAEiXiTPUgGrmOeDmW4yUY3062nyhV6bgYk d2kvjcggSewtlYXFIPrbNWDSRGPwBDXQ/FjwuKXXFag8E5pdFqNqsKQlGj4c+QX5Q3vB tiRVUcY0PfqYTBopNMp50Sk4JEd7B3/fDMOJUf+pKNf791ybk8ZDSKAyCFkjBR+7glf+ ZZEjHOJMl+/y8VjTo6JZgSWQ79subfMDDsHJ197UoN8y+Rec/OB21NgI3/vOQTjAclDW KW0b1Cc5tY2ag3gRvvw3113AWR6+TIpTvn9LiBO/p6JnQ+gIIHdj5iNTN+D1nNuIjXrR Un1Q== X-Gm-Message-State: AOJu0YxRxQk0w9i9aDSBVg5W+st+yLBACHjavbBEXyjAgitgeYu/Y70c 0MD3u1oBuYHlYZbaxZ++V8ldL++DJqQ= X-Google-Smtp-Source: AGHT+IHcTxjTwPjjGkbJJK3nbpQrTZXlHguLc+5SBzxWmI2Zv2rylsUjKPCPppedRvBEwAeyFRO5yA== X-Received: by 2002:a17:902:ec92:b0:1d0:6ffe:9f5 with SMTP id x18-20020a170902ec9200b001d06ffe09f5mr8166768plg.83.1703688597875; Wed, 27 Dec 2023 06:49:57 -0800 (PST) Received: from localhost ([47.89.225.180]) by smtp.gmail.com with ESMTPSA id x22-20020a170902821600b001cfc1b931a9sm11995380pln.249.2023.12.27.06.49.57 (version=TLS1_2 cipher=ECDHE-ECDSA-AES128-GCM-SHA256 bits=128/128); Wed, 27 Dec 2023 06:49:57 -0800 (PST) From: Lai Jiangshan To: linux-kernel@vger.kernel.org Cc: Tejun Heo , Naohiro.Aota@wdc.com, Lai Jiangshan , Lai Jiangshan Subject: [PATCH 4/7] workqueue: Wrap common code into wq_adjust_pwqs_max_active() Date: Wed, 27 Dec 2023 22:51:40 +0800 Message-Id: <20231227145143.2399-5-jiangshanlai@gmail.com> X-Mailer: git-send-email 2.19.1.6.gb485710b In-Reply-To: <20231227145143.2399-1-jiangshanlai@gmail.com> References: <20231227145143.2399-1-jiangshanlai@gmail.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Lai Jiangshan There are 3 places using the same code, so wrap them into a common helper. Signed-off-by: Lai Jiangshan --- kernel/workqueue.c | 37 +++++++++++++++---------------------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 3347ba3a734f..e0101b2b5fa3 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4190,6 +4190,16 @@ static void pwq_adjust_max_active(struct pool_workqu= eue *pwq) raw_spin_unlock_irqrestore(&pwq->pool->lock, flags); } =20 +static void wq_adjust_pwqs_max_active(struct workqueue_struct *wq) +{ + struct pool_workqueue *pwq; + + mutex_lock(&wq->mutex); + for_each_pwq(pwq, wq) + pwq_adjust_max_active(pwq); + mutex_unlock(&wq->mutex); +} + /* initialize newly allocated @pwq which is associated with @wq and @pool = */ static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *= wq, struct worker_pool *pool) @@ -4700,7 +4710,6 @@ struct workqueue_struct *alloc_workqueue(const char *= fmt, { va_list args; struct workqueue_struct *wq; - struct pool_workqueue *pwq; =20 /* * Unbound && max_active =3D=3D 1 used to imply ordered, which is no long= er @@ -4761,14 +4770,8 @@ struct workqueue_struct *alloc_workqueue(const char = *fmt, * list. */ mutex_lock(&wq_pool_mutex); - - mutex_lock(&wq->mutex); - for_each_pwq(pwq, wq) - pwq_adjust_max_active(pwq); - mutex_unlock(&wq->mutex); - + wq_adjust_pwqs_max_active(wq); list_add_tail_rcu(&wq->list, &workqueues); - mutex_unlock(&wq_pool_mutex); =20 return wq; @@ -5698,19 +5701,14 @@ EXPORT_SYMBOL_GPL(work_on_cpu_safe_key); void freeze_workqueues_begin(void) { struct workqueue_struct *wq; - struct pool_workqueue *pwq; =20 mutex_lock(&wq_pool_mutex); =20 WARN_ON_ONCE(workqueue_freezing); workqueue_freezing =3D true; =20 - list_for_each_entry(wq, &workqueues, list) { - mutex_lock(&wq->mutex); - for_each_pwq(pwq, wq) - pwq_adjust_max_active(pwq); - mutex_unlock(&wq->mutex); - } + list_for_each_entry(wq, &workqueues, list) + wq_adjust_pwqs_max_active(wq); =20 mutex_unlock(&wq_pool_mutex); } @@ -5773,7 +5771,6 @@ bool freeze_workqueues_busy(void) void thaw_workqueues(void) { struct workqueue_struct *wq; - struct pool_workqueue *pwq; =20 mutex_lock(&wq_pool_mutex); =20 @@ -5783,12 +5780,8 @@ void thaw_workqueues(void) workqueue_freezing =3D false; =20 /* restore max_active and repopulate worklist */ - list_for_each_entry(wq, &workqueues, list) { - mutex_lock(&wq->mutex); - for_each_pwq(pwq, wq) - pwq_adjust_max_active(pwq); - mutex_unlock(&wq->mutex); - } + list_for_each_entry(wq, &workqueues, list) + wq_adjust_pwqs_max_active(wq); =20 out_unlock: mutex_unlock(&wq_pool_mutex); --=20 2.19.1.6.gb485710b From nobody Fri Dec 26 23:19:35 2025 Received: from mail-pl1-f170.google.com (mail-pl1-f170.google.com [209.85.214.170]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 349484655F for ; Wed, 27 Dec 2023 14:50:02 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=gmail.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=gmail.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=gmail.com header.i=@gmail.com header.b="cHaVO3hL" Received: by mail-pl1-f170.google.com with SMTP id d9443c01a7336-1d422c9f894so30418925ad.3 for ; Wed, 27 Dec 2023 06:50:02 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20230601; t=1703688602; x=1704293402; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=5JgQdBp97fYnSauGF41qAWZ2C5akEqSt+yyiFx6LPIY=; b=cHaVO3hLRUlKmmjxZcKzlxdAb0fd8WomUt9kUGiEdQIZOtWsAPr9xWW10JZgjThK+V X7NCgikZviCJPitmvWoG50SBwP6T8jX5rjLcJnpfZ9Azp7+GxEtPIfOSh+Qy+NNjNc8L YHmw/YpjtTlMR8MjV2wK9BgoTBAQxhjX8lMTBFj61u8Ckwxrh0IAmO/1C6+VXDKJ1Rv6 G/n3432z13SjnYRUz4qL7lRlpTFLx91LGJdZ/JSZK0fygafyapradebo2wSpaJxpVo7S uSLaIa8HVaoj7fB7CFHJyAN66w4jyVlfaHu1Hfs2QFCEGQJOnmD2myQ4PczsIsYDuU9F qFhQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1703688602; x=1704293402; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=5JgQdBp97fYnSauGF41qAWZ2C5akEqSt+yyiFx6LPIY=; b=RaHSQ+tu5wR3+aKzJgnLMjL54iPxfbHRE2SB1aZ+GM9xEFM7JuAS5iBIHUn1rUelj8 HAZY62LwU8yhuCWIxW4lReuwGdJr0WTxaUS0pYq2MG6/+45Hur+u5BF+0irjd2+fMENS QUmbzHokcHH2eFaKRtZoF7MyHuZUDZlofvyuqhq9zXGvmSroH/lWwLVat++MFKJBdtSc s6rbOW+1IUMQefcweE951TjbkJs2/65KGgWlhx6vfbYAhjdXYIzaGktP8wsVvYn/GU+L ea373FqrxicUrwI8ZFpi9VcieL+wwNjFYT4bUbpNL/JczTQco1dkWSjgMfdsgJJRXlQR hTfw== X-Gm-Message-State: AOJu0YxUFkXBOxsUwaIho7hC/q4W5MGuuRkgSxMpRlHghiAlE6mM2zBo Odma2oVXzkHRIfFvb+L66TNo5XbPv+s= X-Google-Smtp-Source: AGHT+IFAV9fT4/UlYoIxvVmVvRqXhdm68xNmrtrB/CayWXP6x28lq8flOhfvb7GMHeix2K8x1fM0/A== X-Received: by 2002:a17:902:bf42:b0:1d3:bedd:ad with SMTP id u2-20020a170902bf4200b001d3bedd00admr9265671pls.35.1703688602065; Wed, 27 Dec 2023 06:50:02 -0800 (PST) Received: from localhost ([198.11.176.14]) by smtp.gmail.com with ESMTPSA id r22-20020a170902be1600b001d3aa7604c5sm9806269pls.0.2023.12.27.06.50.01 (version=TLS1_2 cipher=ECDHE-ECDSA-AES128-GCM-SHA256 bits=128/128); Wed, 27 Dec 2023 06:50:01 -0800 (PST) From: Lai Jiangshan To: linux-kernel@vger.kernel.org Cc: Tejun Heo , Naohiro.Aota@wdc.com, Lai Jiangshan , Lai Jiangshan Subject: [PATCH 5/7] workqueue: Addjust pwq's max_active when CPU online/offine Date: Wed, 27 Dec 2023 22:51:41 +0800 Message-Id: <20231227145143.2399-6-jiangshanlai@gmail.com> X-Mailer: git-send-email 2.19.1.6.gb485710b In-Reply-To: <20231227145143.2399-1-jiangshanlai@gmail.com> References: <20231227145143.2399-1-jiangshanlai@gmail.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Lai Jiangshan pwq->max_active is going to be set based on the CPU online distribution which might be changed when CPU online/offine. Call into wq_adjust_pwqs_max_active() to update them when needed. Signed-off-by: Lai Jiangshan --- kernel/workqueue.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index e0101b2b5fa3..d1c671597289 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -5590,10 +5590,15 @@ int workqueue_online_cpu(unsigned int cpu) mutex_unlock(&wq_pool_attach_mutex); } =20 - /* update pod affinity of unbound workqueues */ + /* + * Update pod affinity of unbound workqueues, and update max_active + * for PWQs of all pods due to CPU online distribution changed. + */ list_for_each_entry(wq, &workqueues, list) { - if (wq->unbound_attrs) + if (wq->unbound_attrs) { wq_update_pod(wq, cpu, true); + wq_adjust_pwqs_max_active(wq); + } } =20 mutex_unlock(&wq_pool_mutex); @@ -5610,11 +5615,16 @@ int workqueue_offline_cpu(unsigned int cpu) =20 unbind_workers(cpu); =20 - /* update pod affinity of unbound workqueues */ + /* + * Update pod affinity of unbound workqueues, and update max_active + * for PWQs of all pods due to CPU online distribution changed. + */ mutex_lock(&wq_pool_mutex); list_for_each_entry(wq, &workqueues, list) { - if (wq->unbound_attrs) + if (wq->unbound_attrs) { wq_update_pod(wq, cpu, false); + wq_adjust_pwqs_max_active(wq); + } } mutex_unlock(&wq_pool_mutex); =20 --=20 2.19.1.6.gb485710b From nobody Fri Dec 26 23:19:35 2025 Received: from mail-pf1-f179.google.com (mail-pf1-f179.google.com [209.85.210.179]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id CC34F47777 for ; Wed, 27 Dec 2023 14:50:08 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=gmail.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=gmail.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=gmail.com header.i=@gmail.com header.b="Ssqk1lF/" Received: by mail-pf1-f179.google.com with SMTP id d2e1a72fcca58-6d98f6e8de1so1241294b3a.0 for ; Wed, 27 Dec 2023 06:50:08 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20230601; t=1703688608; x=1704293408; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=StLDtPNe1T869+urW8aS/ebeNeq1NbYI1cn0RgdFPCE=; b=Ssqk1lF/16qoiO+qRT2oRHmn6TdXNs0jB0PBOGGticxkHdOWWcNCOIIvTK1AuDJmnY Vdnfc+Hg03BfCey6ODBtG0kxuVXjlnsS4t/OI1Oi7XU/fj0haFFDEFsfM2ZfY3dFvgnk qVRTWEWuPD+gu1hdWux5209mlUbd5XLMFmiJ69e7U7SZ0IYkV3M9NWFJR4pWWSTlN1sn FSuIMJ1fU5PEmqtREw02sz/kZFBVJ69K+wzyS62SMbZ6JGAnzShUlxhtmXroXdUz2wf0 uSRCXARwo4icgM3JIR7u6MJVJqcOVaiaCxHpAkiKhttFgb/ktpdv64olkXmIpjDSIQb3 abcw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1703688608; x=1704293408; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=StLDtPNe1T869+urW8aS/ebeNeq1NbYI1cn0RgdFPCE=; b=lWveV/LQffALVziaUkBXyLpxwUxNvO/BGUjpx/QSpMp564QW2vI6VhbWV3/G6eJ6z+ 5IkDF8gQ/blhERDITN4cJDJVUBRcLTmHb3N9QYrIoZJIvsEVXuR8Sw6QeUQ2jlX0GxIS yzN//8b9WU0WGKxcbL1Xsz3lkftkdo38oL9KlKup/mFs/20ZGnlmb0zlkuXEB3nLXvbI aWV+sRifcPOqRuPjuANbDlRq/OvzqtBuYUBBBmG7WpCqNYQpSug6X7zKvwW8GTYu5tVc l7ifdkoEokw3HJXhihci9exROEz/4eBMtFgTL0CxZ3SdGZCf4fzcqwPUX7dPNJ3fzdph RZ5g== X-Gm-Message-State: AOJu0YyguvL9bVpn5j/DiUfu3LWuaCYbErMdjrthp9efZUjcxnXzqQ8v AlwDT92Fzhkwr8JRiujj0LE3tlZHB0s= X-Google-Smtp-Source: AGHT+IHgr1p6PzXqZZTbMHVJqj/3u94YgJU5urmTIOrK4Fr1tJ7RrU8OUX1lUsUHQOBbLZvM1+8hqQ== X-Received: by 2002:a05:6a20:38a5:b0:187:67b0:73e with SMTP id n37-20020a056a2038a500b0018767b0073emr3304953pzf.14.1703688607725; Wed, 27 Dec 2023 06:50:07 -0800 (PST) Received: from localhost ([47.89.225.180]) by smtp.gmail.com with ESMTPSA id d14-20020a170902aa8e00b001bf52834696sm11989663plr.207.2023.12.27.06.50.05 (version=TLS1_2 cipher=ECDHE-ECDSA-AES128-GCM-SHA256 bits=128/128); Wed, 27 Dec 2023 06:50:06 -0800 (PST) From: Lai Jiangshan To: linux-kernel@vger.kernel.org Cc: Tejun Heo , Naohiro.Aota@wdc.com, Lai Jiangshan , Lai Jiangshan , Dennis Dalessandro Subject: [PATCH 6/7] workqueue: Implement system-wide max_active enforcement for unbound workqueues Date: Wed, 27 Dec 2023 22:51:42 +0800 Message-Id: <20231227145143.2399-7-jiangshanlai@gmail.com> X-Mailer: git-send-email 2.19.1.6.gb485710b In-Reply-To: <20231227145143.2399-1-jiangshanlai@gmail.com> References: <20231227145143.2399-1-jiangshanlai@gmail.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Tejun Heo A pool_workqueue (pwq) represents the connection between a workqueue and a worker_pool. One of the roles that a pwq plays is enforcement of the max_active concurrency limit. Before 636b927eba5b ("workqueue: Make unbound workqueues to use per-cpu pool_workqueues"), there was one pwq per each CPU for per-cpu workqueues and per each NUMA node for unbound workqueues, which was a natural result of per-cpu workqueues being served by per-cpu pools and unbound by per-NUMA pools. In terms of max_active enforcement, this was, while not perfect, workable. For per-cpu workqueues, it was fine. For unbound, it wasn't great in that NUMA machines would get max_active that's multiplied by the number of nodes but didn't cause huge problems because NUMA machines are relatively rare and the node count is usually pretty low. However, cache layouts are more complex now and sharing a PWQ across a whole node didn't really work well for unbound workqueues. Thus, a series of commits culminating on 8639ecebc9b1 ("workqueue: Make unbound workqueues to use per-cpu pool_workqueues") implemented more flexible affinity mechanism for unbound workqueues which enables using e.g. last-level-cache aligned pools. In the process, 636b927eba5b ("workqueue: Make unbound workqueues to use per-cpu pool_workqueues") made unbound workqueues use per-cpu pwqs like per-cpu workqueues. While the change was necessary to enable more flexible affinity scopes, this came with the side effect of blowing up the effective max_active for unbound workqueues. Before, the effective max_active for unbound workqueues was multiplied by the number of nodes. After, by the number of CPUs. 636b927eba5b ("workqueue: Make unbound workqueues to use per-cpu pool_workqueues") claims that this should generally be okay. It is okay for users which self-regulates concurrency level which are the vast majority; however, there are enough use cases which actually depend on max_active to prevent the level of concurrency from going bonkers including several IO handling workqueues that can issue a work item for each in-flight IO. With targeted benchmarks, the misbehavior can easily be exposed as reported in http://lkml.kernel.org/r/dbu6wiwu3sdhmhikb2w6lns7b27gbobfavhjj57kwi2quafgwl= @htjcc5oikcr3. Unfortunately, there is no way to express what these use cases need using per-cpu max_active. A CPU may issue most of in-flight IOs, so we don't want to set max_active too low but as soon as we increase max_active a bit, we can end up with unreasonable number of in-flight work items when many CPUs issue IOs at the same time. ie. The acceptable lowest max_active is higher than the acceptable highest max_active. Ideally, max_active for an unbound workqueue should be system-wide so that the users can regulate the total level of concurrency regardless of node and cache layout. The reasons workqueue hasn't implemented that yet are: - One max_active enforcement decouples from pool boundaires, chaining execution after a work item finishes requires inter-pool operations which would require lock dancing, which is nasty. - Sharing a single nr_active count across the whole system can be pretty expensive on NUMA machines. - Per-pwq enforcement had been more or less okay while we were using per-node pools. Instead of forcing max_active enforcement system-wide and PWQ-across, this patch distributes max_active among pods based on a previous patch that changes per-cpu PWQ to per-pod PWQ. With per-pod PWQ, max_active is distributed into each PWQ based on the proportion of online CPUs in a PWQ to the total system's online CPU count. - Using per-pod PWQ max_active enforcement can avoid sharing a single count= er across multiple worker_pools and avoid complicating locking mechanism. - Workqueue used to be able to process a chain of interdependent work items which is as long as max_active. We can't do this anymore as max_active is distributed across the pods. Instead, a new parameter min_active is introduced which determines the minimum level of concurrency within a pod regardless of how max_active distribution comes out to be. It is set to the smaller of max_active and WQ_DFL_MIN_ACTIVE which is 8. This can lead to higher effective max_active than configured and also deadlocks if a workqueue was depending on being able to handle chains of interdependent work items that are longer than 8. If either case happens, we'll need to add an interface to adjust min_active and users are required to adjust affinity manually. higher effective max_active can happens when: - uninstalled PWQs. They will be gone when they finished all their pending works. - default PWQ. It is normally dormant unless it is the solo active PWQ. - div round up It can cause the effective max_active more than configured by nr_pods-1 a= t most. - clamp up to min_active It can cause the effective max_active at least to be min_active*nr_pods. Signed-off-by: Tejun Heo Reported-by: Naohiro Aota Link: http://lkml.kernel.org/r/dbu6wiwu3sdhmhikb2w6lns7b27gbobfavhjj57kwi2q= uafgwl@htjcc5oikcr3 Fixes: 636b927eba5b ("workqueue: Make unbound workqueues to use per-cpu poo= l_workqueues") Signed-off-by: Lai Jiangshan --- include/linux/workqueue.h | 34 +++++++++++++++++++++++++++++++--- kernel/workqueue.c | 28 ++++++++++++++++++++++++---- 2 files changed, 55 insertions(+), 7 deletions(-) diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 24b1e5070f4d..4ba2554f71a2 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -405,6 +405,13 @@ enum { WQ_MAX_ACTIVE =3D 512, /* I like 512, better ideas? */ WQ_UNBOUND_MAX_ACTIVE =3D WQ_MAX_ACTIVE, WQ_DFL_ACTIVE =3D WQ_MAX_ACTIVE / 2, + + /* + * Per-PWQ default cap on min_active. Unless explicitly set, min_active + * is set to min(max_active, WQ_DFL_MIN_ACTIVE). For more details, see + * workqueue_struct->min_active definition. + */ + WQ_DFL_MIN_ACTIVE =3D 8, }; =20 /* @@ -447,11 +454,32 @@ extern struct workqueue_struct *system_freezable_powe= r_efficient_wq; * alloc_workqueue - allocate a workqueue * @fmt: printf format for the name of the workqueue * @flags: WQ_* flags - * @max_active: max in-flight work items per CPU, 0 for default + * @max_active: max in-flight work items, 0 for default * remaining args: args for @fmt * - * Allocate a workqueue with the specified parameters. For detailed - * information on WQ_* flags, please refer to + * For a per-cpu workqueue, @max_active limits the number of in-flight work + * items for each CPU. e.g. @max_active of 1 indicates that each CPU can be + * executing at most one work item for the workqueue. + * + * For unbound workqueues, @max_active limits the number of in-flight work= items + * for the whole system. e.g. @max_active of 16 indicates that that there = can be + * at most 16 work items executing for the workqueue in the whole system. + * + * As sharing the same active counter for an unbound workqueue across mult= iple + * PWQs can be expensive, @max_active is distributed to each PWQ according + * to the proportion of the number of online CPUs and enforced independent= ly. + * + * Depending on online CPU distribution, a PWQ may end up with assigned + * max_active which is significantly lower than @max_active, which can lea= d to + * deadlocks if the concurrency limit is lower than the maximum number + * of interdependent work items for the workqueue. + * + * To guarantee forward progress regardless of online CPU distribution, the + * concurrency limit on every PWQ is guaranteed to be equal to or greater = than + * min_active which is set to min(@max_active, %WQ_DFL_MIN_ACTIVE). This m= eans + * that the sum of per-PWQ max_active's may be larger than @max_active. + * + * For detailed information on %WQ_* flags, please refer to * Documentation/core-api/workqueue.rst. * * RETURNS: diff --git a/kernel/workqueue.c b/kernel/workqueue.c index d1c671597289..382c53f89cb4 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -298,7 +298,8 @@ struct workqueue_struct { struct worker *rescuer; /* MD: rescue worker */ =20 int nr_drainers; /* WQ: drain in progress */ - int saved_max_active; /* WQ: saved pwq max_active */ + int saved_max_active; /* WQ: saved max_active */ + int min_active; /* WQ: pwq min_active */ =20 struct workqueue_attrs *unbound_attrs; /* PW: only for unbound wqs */ struct pool_workqueue *dfl_pwq; /* PW: only for unbound wqs */ @@ -4140,10 +4141,15 @@ static void pwq_release_workfn(struct kthread_work = *work) * pwq_calculate_max_active - Determine max_active to use * @pwq: pool_workqueue of interest * - * Determine the max_active @pwq should use. + * Determine the max_active @pwq should use based on the proportion of + * online CPUs in the @pwq to the total system's online CPU count if + * @pwq->wq is unbound. */ static int pwq_calculate_max_active(struct pool_workqueue *pwq) { + int pwq_nr_online_cpus; + int max_active; + /* * During [un]freezing, the caller is responsible for ensuring * that pwq_adjust_max_active() is called at least once after @@ -4152,7 +4158,18 @@ static int pwq_calculate_max_active(struct pool_work= queue *pwq) if ((pwq->wq->flags & WQ_FREEZABLE) && workqueue_freezing) return 0; =20 - return pwq->wq->saved_max_active; + if (!(pwq->wq->flags & WQ_UNBOUND)) + return pwq->wq->saved_max_active; + + pwq_nr_online_cpus =3D cpumask_weight_and(pwq->pool->attrs->__pod_cpumask= , cpu_online_mask); + max_active =3D DIV_ROUND_UP(pwq->wq->saved_max_active * pwq_nr_online_cpu= s, num_online_cpus()); + + /* + * To guarantee forward progress regardless of online CPU distribution, + * the concurrency limit on every pwq is guaranteed to be equal to or + * greater than wq->min_active. + */ + return clamp(max_active, pwq->wq->min_active, pwq->wq->saved_max_active); } =20 /** @@ -4745,6 +4762,7 @@ struct workqueue_struct *alloc_workqueue(const char *= fmt, /* init wq */ wq->flags =3D flags; wq->saved_max_active =3D max_active; + wq->min_active =3D min(max_active, WQ_DFL_MIN_ACTIVE); mutex_init(&wq->mutex); atomic_set(&wq->nr_pwqs_to_flush, 0); INIT_LIST_HEAD(&wq->pwqs); @@ -4898,7 +4916,8 @@ EXPORT_SYMBOL_GPL(destroy_workqueue); * @wq: target workqueue * @max_active: new max_active value. * - * Set max_active of @wq to @max_active. + * Set max_active of @wq to @max_active. See the alloc_workqueue() function + * comment. * * CONTEXT: * Don't call from IRQ context. @@ -4917,6 +4936,7 @@ void workqueue_set_max_active(struct workqueue_struct= *wq, int max_active) =20 wq->flags &=3D ~__WQ_ORDERED; wq->saved_max_active =3D max_active; + wq->min_active =3D min(wq->min_active, max_active); =20 for_each_pwq(pwq, wq) pwq_adjust_max_active(pwq); --=20 2.19.1.6.gb485710b From nobody Fri Dec 26 23:19:35 2025 Received: from mail-pg1-f170.google.com (mail-pg1-f170.google.com [209.85.215.170]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id E997847F4D for ; Wed, 27 Dec 2023 14:50:12 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=gmail.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=gmail.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=gmail.com header.i=@gmail.com header.b="ajXoVreg" Received: by mail-pg1-f170.google.com with SMTP id 41be03b00d2f7-5cd5cdba609so3954685a12.0 for ; Wed, 27 Dec 2023 06:50:12 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20230601; t=1703688612; x=1704293412; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=dBhAD2KkbriEp2qNC+ghO6AuutHK13Eu3XnJAL8S1+o=; b=ajXoVregn/SHzWsoS+oENP/XcxH1cNJ/lhGGiGn+u8OoZ6nZhZdeu0Y+3ZsbwkCtAa FfsQHWn1kNFAWkcaKbR2AUYoiz+eG0bvc/X5VRz2iIAe/H4ksDuK8S3HGca5+mzKC6SQ lck+4mY2rwWqx4i4JckP4cC48rTklsRxyMp+5HToR0zOuUrQndWcRQw56RSxRkPdjo3q STaqbgPbxNcsj9flTvZg/QWhfV4pta7C8m+vMjXJybcHwr/JDnjpjeKR293koDQ9J6nb XlEAFmIxfwk4bse4Ktv8psX6HsmDdyhtF02sG9Tq0xT/uozeeoGRVehR7FRqtLouiVnZ xy1Q== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1703688612; x=1704293412; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=dBhAD2KkbriEp2qNC+ghO6AuutHK13Eu3XnJAL8S1+o=; b=Q8Y+cmxAmCWtW/AteV0y/tBCmL9ajbPBnaKF9IQBTFX8on8sx+ZXXB6woQYyO7UWF2 1f3IyAshWhrIrezuF4o7sAG9axyUz6fMuvSLTLa4N6itZ1ZjqVOE0X2PHBMCUjtcLgqx Z4hoYUQXufEpyfiMjH4PK7bWdYJiN+mm213ekVh2ebWJivKyPV+180xV6eMCT+ie1KS1 ryp7GHq51EUj1WtOnp8x7HpdVhiSj1SZ5QrIgvgF+Ybd889ObfzCnY7msvs0xG3fV50E NRymNhx4euPINAhhYlHF2kOX5p0B/50CSGFe30XikQmLGZy4og4gWw3mQsm4TL4tXRBB Nrcw== X-Gm-Message-State: AOJu0YzcO+jCuEt3tO3mT4bCRBustoUdyb/npowYxLgGbpwJAkgWmD5e Vn/JuhLeF0nB1DBEm++Ijo4tD1x3CV0= X-Google-Smtp-Source: AGHT+IEKVvwlf1bFhDODYLApQeMtBwkhw43sIPpWdIdPa1YiE1pczTAvikL0QjsZI/5Hv8dk2/k35A== X-Received: by 2002:a05:6a21:99a9:b0:196:16b0:b7b3 with SMTP id ve41-20020a056a2199a900b0019616b0b7b3mr1480281pzb.78.1703688612010; Wed, 27 Dec 2023 06:50:12 -0800 (PST) Received: from localhost ([198.11.176.14]) by smtp.gmail.com with ESMTPSA id f6-20020a056a001ac600b006d9a48882f7sm7651268pfv.118.2023.12.27.06.50.11 (version=TLS1_2 cipher=ECDHE-ECDSA-AES128-GCM-SHA256 bits=128/128); Wed, 27 Dec 2023 06:50:11 -0800 (PST) From: Lai Jiangshan To: linux-kernel@vger.kernel.org Cc: Tejun Heo , Naohiro.Aota@wdc.com, Lai Jiangshan , Lai Jiangshan Subject: [PATCH 7/7] workqueue: Rename wq->saved_max_active to wq->max_active Date: Wed, 27 Dec 2023 22:51:43 +0800 Message-Id: <20231227145143.2399-8-jiangshanlai@gmail.com> X-Mailer: git-send-email 2.19.1.6.gb485710b In-Reply-To: <20231227145143.2399-1-jiangshanlai@gmail.com> References: <20231227145143.2399-1-jiangshanlai@gmail.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Lai Jiangshan The name max_active is clearer. Signed-off-by: Lai Jiangshan --- kernel/workqueue.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 382c53f89cb4..0458545642f7 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -298,7 +298,7 @@ struct workqueue_struct { struct worker *rescuer; /* MD: rescue worker */ =20 int nr_drainers; /* WQ: drain in progress */ - int saved_max_active; /* WQ: saved max_active */ + int max_active; /* WQ: percpu or total max_active */ int min_active; /* WQ: pwq min_active */ =20 struct workqueue_attrs *unbound_attrs; /* PW: only for unbound wqs */ @@ -3376,7 +3376,7 @@ static bool start_flush_work(struct work_struct *work= , struct wq_barrier *barr, * forward progress. */ if (!from_cancel && - (pwq->wq->saved_max_active =3D=3D 1 || pwq->wq->rescuer)) { + (pwq->wq->max_active =3D=3D 1 || pwq->wq->rescuer)) { lock_map_acquire(&pwq->wq->lockdep_map); lock_map_release(&pwq->wq->lockdep_map); } @@ -4159,17 +4159,17 @@ static int pwq_calculate_max_active(struct pool_wor= kqueue *pwq) return 0; =20 if (!(pwq->wq->flags & WQ_UNBOUND)) - return pwq->wq->saved_max_active; + return pwq->wq->max_active; =20 pwq_nr_online_cpus =3D cpumask_weight_and(pwq->pool->attrs->__pod_cpumask= , cpu_online_mask); - max_active =3D DIV_ROUND_UP(pwq->wq->saved_max_active * pwq_nr_online_cpu= s, num_online_cpus()); + max_active =3D DIV_ROUND_UP(pwq->wq->max_active * pwq_nr_online_cpus, num= _online_cpus()); =20 /* * To guarantee forward progress regardless of online CPU distribution, * the concurrency limit on every pwq is guaranteed to be equal to or * greater than wq->min_active. */ - return clamp(max_active, pwq->wq->min_active, pwq->wq->saved_max_active); + return clamp(max_active, pwq->wq->min_active, pwq->wq->max_active); } =20 /** @@ -4177,7 +4177,7 @@ static int pwq_calculate_max_active(struct pool_workq= ueue *pwq) * @pwq: target pool_workqueue * * If @pwq isn't freezing, set @pwq->max_active to the associated - * workqueue's saved_max_active and activate inactive work items + * workqueue's max_active and activate inactive work items * accordingly. If @pwq is freezing, clear @pwq->max_active to zero. */ static void pwq_adjust_max_active(struct pool_workqueue *pwq) @@ -4186,7 +4186,7 @@ static void pwq_adjust_max_active(struct pool_workque= ue *pwq) int max_active =3D pwq_calculate_max_active(pwq); unsigned long flags; =20 - /* for @wq->saved_max_active */ + /* for @wq->max_active */ lockdep_assert_held(&wq->mutex); =20 /* fast exit if unchanged */ @@ -4761,7 +4761,7 @@ struct workqueue_struct *alloc_workqueue(const char *= fmt, =20 /* init wq */ wq->flags =3D flags; - wq->saved_max_active =3D max_active; + wq->max_active =3D max_active; wq->min_active =3D min(max_active, WQ_DFL_MIN_ACTIVE); mutex_init(&wq->mutex); atomic_set(&wq->nr_pwqs_to_flush, 0); @@ -4935,7 +4935,7 @@ void workqueue_set_max_active(struct workqueue_struct= *wq, int max_active) mutex_lock(&wq->mutex); =20 wq->flags &=3D ~__WQ_ORDERED; - wq->saved_max_active =3D max_active; + wq->max_active =3D max_active; wq->min_active =3D min(wq->min_active, max_active); =20 for_each_pwq(pwq, wq) @@ -5990,7 +5990,7 @@ static ssize_t max_active_show(struct device *dev, { struct workqueue_struct *wq =3D dev_to_wq(dev); =20 - return scnprintf(buf, PAGE_SIZE, "%d\n", wq->saved_max_active); + return scnprintf(buf, PAGE_SIZE, "%d\n", wq->max_active); } =20 static ssize_t max_active_store(struct device *dev, --=20 2.19.1.6.gb485710b