From nobody Fri Dec 26 19:23:15 2025 Received: from mail-lf1-f45.google.com (mail-lf1-f45.google.com [209.85.167.45]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id BE34B15AC7 for ; Tue, 2 Jan 2024 18:46:39 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=gmail.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=gmail.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=gmail.com header.i=@gmail.com header.b="dBGQgmxw" Received: by mail-lf1-f45.google.com with SMTP id 2adb3069b0e04-50e7ddd999bso6706737e87.1 for ; Tue, 02 Jan 2024 10:46:39 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20230601; t=1704221198; x=1704825998; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=K5/Q6NIxJkZyoBWCsC5LnGeneeGSY9jSGkshkmqw6aM=; b=dBGQgmxwEZSTXF+hM2Sez2L2cbcv5mW7jmCqeVZHEnI8UzZCnXHETFkQVXhPjUH9fz eoc7JnduGEeEdbkQ8OmfWMHxg09si4QuM0YngLhs5wHuNayriqOvyCkvlpJmGSzAFymo W1YA0pYN9SLehNYFX4SGlSIsfbdy4kyZrNsRTItdHbNkO+Vt6Q4kps1VZQxIynoQAmuL wT2qKBrIBeKtxVk4RxldtnfPoP5bAKpA9LTlcRDPxDmGajz+azs/GpAgB1FAtusZ4SB/ M9p/2SnhALuqAB9MPbLaQ/4wvXN6puC1nNXGx+P1hq+1OQdvfY750PcEBDdqwv9DkBxt k2Mw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1704221198; x=1704825998; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=K5/Q6NIxJkZyoBWCsC5LnGeneeGSY9jSGkshkmqw6aM=; b=MgR81d6vZ6d9uYF2NEr6wmLR1+DSwViR0YPlsPSBXaWEtXfjkscU37/hSgR460uvAq NZ3cECJfdIaVvy8UoBMiO2tZfYd6ekEVrfX+G+njaqkF8xaLRGT9JyIujJzlOHyJOos7 uezpEKtaKYpPyRPLw36ygQGvGT/PIfjq0qHVkQ5pICIINIPHv0r6XoJCK2YLNJoxinnu OpkHPnhh4f01tYwO9MvJO3y0xmWqAfTfrIQJKldvnE6H1zGk0a4vW9QT7FSCAIBXxjMk JZNohHC2wsP5Wp/4eZuAViVIdlPTw0iD+cx8fU2JFvm+eOt9uYJyOjh6RQjirPOxELKl KNgw== X-Gm-Message-State: AOJu0YxQ3iSTfm4PJ+qJotsOuwFYMH2Y11VAocCje1nrrAUxsBWUQx64 LxL8LCAIEKfgZ8QsmVF9yhs= X-Google-Smtp-Source: AGHT+IE9SbS94bEP8HryLcFLJIOhLEWknY4qeLso9+WpHYNNJ55PDxr0/9mn78utpteQ9g4ePL4CRw== X-Received: by 2002:a05:6512:1055:b0:50e:7e93:4d34 with SMTP id c21-20020a056512105500b0050e7e934d34mr5999337lfb.128.1704221197571; Tue, 02 Jan 2024 10:46:37 -0800 (PST) Received: from pc638.lan (host-185-121-47-193.sydskane.nu. [185.121.47.193]) by smtp.gmail.com with ESMTPSA id q1-20020ac246e1000000b0050e7be886d9sm2592656lfo.56.2024.01.02.10.46.36 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 02 Jan 2024 10:46:37 -0800 (PST) From: "Uladzislau Rezki (Sony)" To: linux-mm@kvack.org, Andrew Morton Cc: LKML , Baoquan He , Lorenzo Stoakes , Christoph Hellwig , Matthew Wilcox , "Liam R . Howlett" , Dave Chinner , "Paul E . McKenney" , Joel Fernandes , Uladzislau Rezki , Oleksiy Avramchenko , Christoph Hellwig Subject: [PATCH v3 01/11] mm: vmalloc: Add va_alloc() helper Date: Tue, 2 Jan 2024 19:46:23 +0100 Message-Id: <20240102184633.748113-2-urezki@gmail.com> X-Mailer: git-send-email 2.39.2 In-Reply-To: <20240102184633.748113-1-urezki@gmail.com> References: <20240102184633.748113-1-urezki@gmail.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Currently __alloc_vmap_area() function contains an open codded logic that finds and adjusts a VA based on allocation request. Introduce a va_alloc() helper that adjusts found VA only. There is no a functional change as a result of this patch. Reviewed-by: Baoquan He Reviewed-by: Christoph Hellwig Reviewed-by: Lorenzo Stoakes Signed-off-by: Uladzislau Rezki (Sony) --- mm/vmalloc.c | 41 ++++++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index d12a17fc0c17..739401a9eafc 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1481,6 +1481,32 @@ adjust_va_to_fit_type(struct rb_root *root, struct l= ist_head *head, return 0; } =20 +static unsigned long +va_alloc(struct vmap_area *va, + struct rb_root *root, struct list_head *head, + unsigned long size, unsigned long align, + unsigned long vstart, unsigned long vend) +{ + unsigned long nva_start_addr; + int ret; + + if (va->va_start > vstart) + nva_start_addr =3D ALIGN(va->va_start, align); + else + nva_start_addr =3D ALIGN(vstart, align); + + /* Check the "vend" restriction. */ + if (nva_start_addr + size > vend) + return vend; + + /* Update the free vmap_area. */ + ret =3D adjust_va_to_fit_type(root, head, va, nva_start_addr, size); + if (WARN_ON_ONCE(ret)) + return vend; + + return nva_start_addr; +} + /* * Returns a start address of the newly allocated area, if success. * Otherwise a vend is returned that indicates failure. @@ -1493,7 +1519,6 @@ __alloc_vmap_area(struct rb_root *root, struct list_h= ead *head, bool adjust_search_size =3D true; unsigned long nva_start_addr; struct vmap_area *va; - int ret; =20 /* * Do not adjust when: @@ -1511,18 +1536,8 @@ __alloc_vmap_area(struct rb_root *root, struct list_= head *head, if (unlikely(!va)) return vend; =20 - if (va->va_start > vstart) - nva_start_addr =3D ALIGN(va->va_start, align); - else - nva_start_addr =3D ALIGN(vstart, align); - - /* Check the "vend" restriction. */ - if (nva_start_addr + size > vend) - return vend; - - /* Update the free vmap_area. */ - ret =3D adjust_va_to_fit_type(root, head, va, nva_start_addr, size); - if (WARN_ON_ONCE(ret)) + nva_start_addr =3D va_alloc(va, root, head, size, align, vstart, vend); + if (nva_start_addr =3D=3D vend) return vend; =20 #if DEBUG_AUGMENT_LOWEST_MATCH_CHECK --=20 2.39.2 From nobody Fri Dec 26 19:23:15 2025 Received: from mail-lf1-f46.google.com (mail-lf1-f46.google.com [209.85.167.46]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id C3CEA15AD4 for ; Tue, 2 Jan 2024 18:46:40 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=gmail.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=gmail.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=gmail.com header.i=@gmail.com header.b="ZOxsg8N0" Received: by mail-lf1-f46.google.com with SMTP id 2adb3069b0e04-50e7dd8bce8so6873239e87.1 for ; Tue, 02 Jan 2024 10:46:40 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20230601; t=1704221199; x=1704825999; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=FX7TcFMuiFXtsjOyyf3a39fplMDkCgu6S97Vy7SNSKo=; b=ZOxsg8N0TsGN/pQkciU6mluz34KMi9OYuVTieW6qWnpNTuzV5DfpDWyCKnVA7yiAXi DicU88Q1BgtSbNLVYi2ST3VXA6aB3/oiOvZVzt0rHO869XozGWWlL3RJ+oyXxxcKtBpJ z7aY6BgyE3fpOaLybNh/egsswZizY9Vsodz4AApWIQx1dpeRogehgmBUpPGsh81XBzmo bGAvPjuj7+befTOkUGSiM20AMlrMPNP7Y/6Y7/cFWUPDTdXj25VCgrsPC39tlD03pKbC 69DsbxShGCWSqKq/sUBxO7PqdrxyChMl0FGshSgb6FYdtl5xE1EFRbQq3cJeaIx3wBnz 0e+g== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1704221199; x=1704825999; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=FX7TcFMuiFXtsjOyyf3a39fplMDkCgu6S97Vy7SNSKo=; b=BbB318DQ+9wvyjNT7Oxx+5Ql7gu5frn+BCugUVBdgluf59YUidxhcC3PQp8nm640Qa TdJWW5oF52OkglnHn+d2kN7wgLQI2mBfUX6L2dDFTACAhACfNqeQ83a405ZpkRVzf8pi YbGF8AzPUXSMVjAXSLnOT96bg7nEH6LQQFFvVlYYOhSOKVibcErJsPQPkawYiCpm2tCq SmRgmhZg5fggUcPuYZb45n1gBMf/Sq3/t47swOOpXbXC/kGYqJ4oGQlH6K/yNKZsfw4n KC9KCyAuVzHgaRrINPU4SygTsnKmwlHEqqC47GszwgqvNnJDmbsfqkdlOhCm3bQEH/Y/ hkrw== X-Gm-Message-State: AOJu0YwV7h7lWhumf1T5F0CgzB9+nRS44/4wORgCJ7invV+TLGRHxtHg rKG8iceXnfsBfUjGFgDkYCc= X-Google-Smtp-Source: AGHT+IFb3oZclXd0Au63TOx+OSB0Wz5akV/2lKhF4t3qWVHWtcMSu2kZ5bT7p0eryTowoUB4TO5dpw== X-Received: by 2002:a05:6512:78d:b0:50e:7a91:7e93 with SMTP id x13-20020a056512078d00b0050e7a917e93mr5221292lfr.44.1704221198527; Tue, 02 Jan 2024 10:46:38 -0800 (PST) Received: from pc638.lan (host-185-121-47-193.sydskane.nu. [185.121.47.193]) by smtp.gmail.com with ESMTPSA id q1-20020ac246e1000000b0050e7be886d9sm2592656lfo.56.2024.01.02.10.46.37 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 02 Jan 2024 10:46:38 -0800 (PST) From: "Uladzislau Rezki (Sony)" To: linux-mm@kvack.org, Andrew Morton Cc: LKML , Baoquan He , Lorenzo Stoakes , Christoph Hellwig , Matthew Wilcox , "Liam R . Howlett" , Dave Chinner , "Paul E . McKenney" , Joel Fernandes , Uladzislau Rezki , Oleksiy Avramchenko , Christoph Hellwig Subject: [PATCH v3 02/11] mm: vmalloc: Rename adjust_va_to_fit_type() function Date: Tue, 2 Jan 2024 19:46:24 +0100 Message-Id: <20240102184633.748113-3-urezki@gmail.com> X-Mailer: git-send-email 2.39.2 In-Reply-To: <20240102184633.748113-1-urezki@gmail.com> References: <20240102184633.748113-1-urezki@gmail.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" This patch renames the adjust_va_to_fit_type() function to va_clip() which is shorter and more expressive. There is no a functional change as a result of this patch. Reviewed-by: Baoquan He Reviewed-by: Christoph Hellwig Reviewed-by: Lorenzo Stoakes Signed-off-by: Uladzislau Rezki (Sony) --- mm/vmalloc.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 739401a9eafc..10f289e86512 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1382,9 +1382,9 @@ classify_va_fit_type(struct vmap_area *va, } =20 static __always_inline int -adjust_va_to_fit_type(struct rb_root *root, struct list_head *head, - struct vmap_area *va, unsigned long nva_start_addr, - unsigned long size) +va_clip(struct rb_root *root, struct list_head *head, + struct vmap_area *va, unsigned long nva_start_addr, + unsigned long size) { struct vmap_area *lva =3D NULL; enum fit_type type =3D classify_va_fit_type(va, nva_start_addr, size); @@ -1500,7 +1500,7 @@ va_alloc(struct vmap_area *va, return vend; =20 /* Update the free vmap_area. */ - ret =3D adjust_va_to_fit_type(root, head, va, nva_start_addr, size); + ret =3D va_clip(root, head, va, nva_start_addr, size); if (WARN_ON_ONCE(ret)) return vend; =20 @@ -4155,9 +4155,8 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned l= ong *offsets, /* It is a BUG(), but trigger recovery instead. */ goto recovery; =20 - ret =3D adjust_va_to_fit_type(&free_vmap_area_root, - &free_vmap_area_list, - va, start, size); + ret =3D va_clip(&free_vmap_area_root, + &free_vmap_area_list, va, start, size); if (WARN_ON_ONCE(unlikely(ret))) /* It is a BUG(), but trigger recovery instead. */ goto recovery; --=20 2.39.2 From nobody Fri Dec 26 19:23:15 2025 Received: from mail-lf1-f47.google.com (mail-lf1-f47.google.com [209.85.167.47]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 7933815AD0 for ; Tue, 2 Jan 2024 18:46:41 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=gmail.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=gmail.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=gmail.com header.i=@gmail.com header.b="kV49Ep38" Received: by mail-lf1-f47.google.com with SMTP id 2adb3069b0e04-50e8ca6c76dso4333720e87.3 for ; Tue, 02 Jan 2024 10:46:41 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20230601; t=1704221199; x=1704825999; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=MyVIBHsk2yf4bdOQXWkGleZVntvAopzQ+wazdTKAsLk=; b=kV49Ep380VmNH/spQqvTmSVd5TymT2QFzj1QoI/Msf9QLHJQR9J1PxfznXwCDuGHGy ZfUgD9mT3GtRAGPwa1FS2wzWYeaF+RowmxvEuqW4vl6BYJXsAdcj6yV7TYFHb0ydkXf1 5m+P8tJ+TsJI7g/NyoeFP03r+hCYB4azMd+v1V2189BMg9ccuXJ41plggayjm86ZPsZ0 4EIdikqLIj24hAJPjTbRsPTeD9o6JSMs+zuezUi00taEVafiXJwaTtV1AOIl2FXn2meA y4uGjF+kpP0j0bEs6tqF6/TvcWqHZqpT6tyTkv6VSGui/fNvhVMcdkJD1JnwXPYU4n/I e70A== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1704221199; x=1704825999; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=MyVIBHsk2yf4bdOQXWkGleZVntvAopzQ+wazdTKAsLk=; b=n9GdacAHSEt2bL2Z/ZQ5Z+0tTBihBv6x3xrMLyOZZCFWfOGXFM5TRYFwogzp6De3AR 9yJAhuMAIFelqhRAiv7ywxEyuKpGY3rdKjv6ktubAwLZQPVo8BzRdOtbe+yYA79bFLHV 7ujhsygdvNoxP/W2vJ8PoKMjdXUwnG5vboZgeEAfThwO5ZfqC3uTfmRc/3erctWl0J9b P1V5pKJCvVC/1SxkXRZH4uewuA1So0y2iskIP4c9NQDHzSpXVBrkjb5RDUNXLrPrfV3J VYSJKAV11y8qHXAYdUcJuNd2Nt2EmMN7LDmT/V1QVckaERDdPbk2VJhRlTo3UvmJdpu/ B1eA== X-Gm-Message-State: AOJu0YyRdCT4EkenlzqjM/aObnT1pETMqOxeixbF11dThTz64MK8u2Ul Ctt2y2vrIepbzH3XgDOaV/c= X-Google-Smtp-Source: AGHT+IGk2c4cJ37LTpQvZZNmS0KxjEDGtzNNytmQ3n3FUXtipuMauWJG2kN4yaiFW3Ij1oRt/VBGdg== X-Received: by 2002:ac2:4e43:0:b0:50b:c57e:1418 with SMTP id f3-20020ac24e43000000b0050bc57e1418mr6636354lfr.16.1704221199512; Tue, 02 Jan 2024 10:46:39 -0800 (PST) Received: from pc638.lan (host-185-121-47-193.sydskane.nu. [185.121.47.193]) by smtp.gmail.com with ESMTPSA id q1-20020ac246e1000000b0050e7be886d9sm2592656lfo.56.2024.01.02.10.46.38 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 02 Jan 2024 10:46:39 -0800 (PST) From: "Uladzislau Rezki (Sony)" To: linux-mm@kvack.org, Andrew Morton Cc: LKML , Baoquan He , Lorenzo Stoakes , Christoph Hellwig , Matthew Wilcox , "Liam R . Howlett" , Dave Chinner , "Paul E . McKenney" , Joel Fernandes , Uladzislau Rezki , Oleksiy Avramchenko , Christoph Hellwig Subject: [PATCH v3 03/11] mm: vmalloc: Move vmap_init_free_space() down in vmalloc.c Date: Tue, 2 Jan 2024 19:46:25 +0100 Message-Id: <20240102184633.748113-4-urezki@gmail.com> X-Mailer: git-send-email 2.39.2 In-Reply-To: <20240102184633.748113-1-urezki@gmail.com> References: <20240102184633.748113-1-urezki@gmail.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" A vmap_init_free_space() is a function that setups a vmap space and is considered as part of initialization phase. Since a main entry which is vmalloc_init(), has been moved down in vmalloc.c it makes sense to follow the pattern. There is no a functional change as a result of this patch. Reviewed-by: Baoquan He Reviewed-by: Christoph Hellwig Reviewed-by: Lorenzo Stoakes Signed-off-by: Uladzislau Rezki (Sony) --- mm/vmalloc.c | 82 ++++++++++++++++++++++++++-------------------------- 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 10f289e86512..06bd843d18ae 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2512,47 +2512,6 @@ void __init vm_area_register_early(struct vm_struct = *vm, size_t align) kasan_populate_early_vm_area_shadow(vm->addr, vm->size); } =20 -static void vmap_init_free_space(void) -{ - unsigned long vmap_start =3D 1; - const unsigned long vmap_end =3D ULONG_MAX; - struct vmap_area *busy, *free; - - /* - * B F B B B F - * -|-----|.....|-----|-----|-----|.....|- - * | The KVA space | - * |<--------------------------------->| - */ - list_for_each_entry(busy, &vmap_area_list, list) { - if (busy->va_start - vmap_start > 0) { - free =3D kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT); - if (!WARN_ON_ONCE(!free)) { - free->va_start =3D vmap_start; - free->va_end =3D busy->va_start; - - insert_vmap_area_augment(free, NULL, - &free_vmap_area_root, - &free_vmap_area_list); - } - } - - vmap_start =3D busy->va_end; - } - - if (vmap_end - vmap_start > 0) { - free =3D kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT); - if (!WARN_ON_ONCE(!free)) { - free->va_start =3D vmap_start; - free->va_end =3D vmap_end; - - insert_vmap_area_augment(free, NULL, - &free_vmap_area_root, - &free_vmap_area_list); - } - } -} - static inline void setup_vmalloc_vm_locked(struct vm_struct *vm, struct vmap_area *va, unsigned long flags, const void *caller) { @@ -4465,6 +4424,47 @@ module_init(proc_vmalloc_init); =20 #endif =20 +static void vmap_init_free_space(void) +{ + unsigned long vmap_start =3D 1; + const unsigned long vmap_end =3D ULONG_MAX; + struct vmap_area *busy, *free; + + /* + * B F B B B F + * -|-----|.....|-----|-----|-----|.....|- + * | The KVA space | + * |<--------------------------------->| + */ + list_for_each_entry(busy, &vmap_area_list, list) { + if (busy->va_start - vmap_start > 0) { + free =3D kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT); + if (!WARN_ON_ONCE(!free)) { + free->va_start =3D vmap_start; + free->va_end =3D busy->va_start; + + insert_vmap_area_augment(free, NULL, + &free_vmap_area_root, + &free_vmap_area_list); + } + } + + vmap_start =3D busy->va_end; + } + + if (vmap_end - vmap_start > 0) { + free =3D kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT); + if (!WARN_ON_ONCE(!free)) { + free->va_start =3D vmap_start; + free->va_end =3D vmap_end; + + insert_vmap_area_augment(free, NULL, + &free_vmap_area_root, + &free_vmap_area_list); + } + } +} + void __init vmalloc_init(void) { struct vmap_area *va; --=20 2.39.2 From nobody Fri Dec 26 19:23:15 2025 Received: from mail-lf1-f43.google.com (mail-lf1-f43.google.com [209.85.167.43]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 92F9215AF7 for ; Tue, 2 Jan 2024 18:46:42 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=gmail.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=gmail.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=gmail.com header.i=@gmail.com header.b="eq/p6ZvV" Received: by mail-lf1-f43.google.com with SMTP id 2adb3069b0e04-50e8ca6c76dso4333732e87.3 for ; Tue, 02 Jan 2024 10:46:42 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20230601; t=1704221200; x=1704826000; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=846iHGUkxLzE5ToX3/xFEcRjp3aW1Mkydl1M48KgaNk=; b=eq/p6ZvV5Tefw8btJLqA7KWquv6gPjOHWzBNYzuafvAjMJYL5uomhIkhK4vbaemU5u uQErfNgXMaFl6HaTDf2cY6kYyEBMvihMYbN/PgnzccQ5D7mHxVKG0H5zkUa7SeMmwW50 qU8HbU28gOPfUTvC2nuqhlz1LbCrPh/75H6kp7JiRi3dxxCzlesrlTjdh6rU5N0c7RWt jajRurtySimvMiBRPlXeUVC7zLeukClNV2pb3gOiSUxby74vjRP3K8F03om3KnlEo+y+ KgRPZ/9pMyw/GYk3AtSEde/DR0PP9uVxLe3+yVSihmjcj9GW/FU+OqHEIbEuEJn7sI9V WgTw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1704221200; x=1704826000; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=846iHGUkxLzE5ToX3/xFEcRjp3aW1Mkydl1M48KgaNk=; b=JLW8E/3tbSVlSiawdJ6uACkOevg+dZDYRpdLJ832alajsEaUgH8GNgqLD2AKa5YcS+ ChBEe/ylsdMlBlAoHubl40mg+9jIzQLdC9Iw3ktrXa5LMNhgSBL/OCg6GJCaho5qAmhP SrHOmmNG0r1faKpwRm8cS4y7qWVh6ddQhRbO+0MUrQJrRl3VZAKyTJCQ/XiADOLK7Ero zDk4Kp4NikHAw1LyOAouzTz/IdBVZ/hbmSotGdiIKaTVT5lG2Xv7jE8B2oj5i11mnTnn hjYg958t7w/E6h7Fx6KtKMxk9472MiTgdS4scLQguYHrgUV//r5uxJJVt30sxKtYYNKf Ojzw== X-Gm-Message-State: AOJu0YzCEnWxEPe7TKPj2Qx/YnA7KFgs7qRFkMVhvqi2/nvnAESbjAdB DfuE1kXBhH36D3cfl/WthDgYl/J8v55C5A== X-Google-Smtp-Source: AGHT+IEWhJlp1zs3fLwCvhy1mP5b731c3cOSs/r4uDwHpR4O2rYQ3MiJru5ERnu0tkKGRVB+bMlVkg== X-Received: by 2002:a05:6512:3b28:b0:50e:7f8a:5f77 with SMTP id f40-20020a0565123b2800b0050e7f8a5f77mr6510582lfv.127.1704221200488; Tue, 02 Jan 2024 10:46:40 -0800 (PST) Received: from pc638.lan (host-185-121-47-193.sydskane.nu. [185.121.47.193]) by smtp.gmail.com with ESMTPSA id q1-20020ac246e1000000b0050e7be886d9sm2592656lfo.56.2024.01.02.10.46.39 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 02 Jan 2024 10:46:40 -0800 (PST) From: "Uladzislau Rezki (Sony)" To: linux-mm@kvack.org, Andrew Morton Cc: LKML , Baoquan He , Lorenzo Stoakes , Christoph Hellwig , Matthew Wilcox , "Liam R . Howlett" , Dave Chinner , "Paul E . McKenney" , Joel Fernandes , Uladzislau Rezki , Oleksiy Avramchenko Subject: [PATCH v3 04/11] mm: vmalloc: Remove global vmap_area_root rb-tree Date: Tue, 2 Jan 2024 19:46:26 +0100 Message-Id: <20240102184633.748113-5-urezki@gmail.com> X-Mailer: git-send-email 2.39.2 In-Reply-To: <20240102184633.748113-1-urezki@gmail.com> References: <20240102184633.748113-1-urezki@gmail.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Store allocated objects in a separate nodes. A va->va_start address is converted into a correct node where it should be placed and resided. An addr_to_node() function is used to do a proper address conversion to determine a node that contains a VA. Such approach balances VAs across nodes as a result an access becomes scalable. Number of nodes in a system depends on number of CPUs. Please note: 1. As of now allocated VAs are bound to a node-0. It means the patch does not give any difference comparing with a current behavior; 2. The global vmap_area_lock, vmap_area_root are removed as there is no need in it anymore. The vmap_area_list is still kept and is _empty_. It is exported for a kexec only; 3. The vmallocinfo and vread() have to be reworked to be able to handle multiple nodes. Reviewed-by: Baoquan He Signed-off-by: Uladzislau Rezki (Sony) Reviewed-by: Lorenzo Stoakes --- mm/vmalloc.c | 240 +++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 173 insertions(+), 67 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 06bd843d18ae..786ecb18ae22 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -728,11 +728,9 @@ EXPORT_SYMBOL(vmalloc_to_pfn); #define DEBUG_AUGMENT_LOWEST_MATCH_CHECK 0 =20 =20 -static DEFINE_SPINLOCK(vmap_area_lock); static DEFINE_SPINLOCK(free_vmap_area_lock); /* Export for kexec only */ LIST_HEAD(vmap_area_list); -static struct rb_root vmap_area_root =3D RB_ROOT; static bool vmap_initialized __read_mostly; =20 static struct rb_root purge_vmap_area_root =3D RB_ROOT; @@ -772,6 +770,38 @@ static struct rb_root free_vmap_area_root =3D RB_ROOT; */ static DEFINE_PER_CPU(struct vmap_area *, ne_fit_preload_node); =20 +/* + * An effective vmap-node logic. Users make use of nodes instead + * of a global heap. It allows to balance an access and mitigate + * contention. + */ +struct rb_list { + struct rb_root root; + struct list_head head; + spinlock_t lock; +}; + +static struct vmap_node { + /* Bookkeeping data of this node. */ + struct rb_list busy; +} single; + +static struct vmap_node *vmap_nodes =3D &single; +static __read_mostly unsigned int nr_vmap_nodes =3D 1; +static __read_mostly unsigned int vmap_zone_size =3D 1; + +static inline unsigned int +addr_to_node_id(unsigned long addr) +{ + return (addr / vmap_zone_size) % nr_vmap_nodes; +} + +static inline struct vmap_node * +addr_to_node(unsigned long addr) +{ + return &vmap_nodes[addr_to_node_id(addr)]; +} + static __always_inline unsigned long va_size(struct vmap_area *va) { @@ -803,10 +833,11 @@ unsigned long vmalloc_nr_pages(void) } =20 /* Look up the first VA which satisfies addr < va_end, NULL if none. */ -static struct vmap_area *find_vmap_area_exceed_addr(unsigned long addr) +static struct vmap_area * +find_vmap_area_exceed_addr(unsigned long addr, struct rb_root *root) { struct vmap_area *va =3D NULL; - struct rb_node *n =3D vmap_area_root.rb_node; + struct rb_node *n =3D root->rb_node; =20 addr =3D (unsigned long)kasan_reset_tag((void *)addr); =20 @@ -1552,12 +1583,14 @@ __alloc_vmap_area(struct rb_root *root, struct list= _head *head, */ static void free_vmap_area(struct vmap_area *va) { + struct vmap_node *vn =3D addr_to_node(va->va_start); + /* * Remove from the busy tree/list. */ - spin_lock(&vmap_area_lock); - unlink_va(va, &vmap_area_root); - spin_unlock(&vmap_area_lock); + spin_lock(&vn->busy.lock); + unlink_va(va, &vn->busy.root); + spin_unlock(&vn->busy.lock); =20 /* * Insert/Merge it back to the free tree/list. @@ -1600,6 +1633,7 @@ static struct vmap_area *alloc_vmap_area(unsigned lon= g size, int node, gfp_t gfp_mask, unsigned long va_flags) { + struct vmap_node *vn; struct vmap_area *va; unsigned long freed; unsigned long addr; @@ -1645,9 +1679,11 @@ static struct vmap_area *alloc_vmap_area(unsigned lo= ng size, va->vm =3D NULL; va->flags =3D va_flags; =20 - spin_lock(&vmap_area_lock); - insert_vmap_area(va, &vmap_area_root, &vmap_area_list); - spin_unlock(&vmap_area_lock); + vn =3D addr_to_node(va->va_start); + + spin_lock(&vn->busy.lock); + insert_vmap_area(va, &vn->busy.root, &vn->busy.head); + spin_unlock(&vn->busy.lock); =20 BUG_ON(!IS_ALIGNED(va->va_start, align)); BUG_ON(va->va_start < vstart); @@ -1871,26 +1907,61 @@ static void free_unmap_vmap_area(struct vmap_area *= va) =20 struct vmap_area *find_vmap_area(unsigned long addr) { + struct vmap_node *vn; struct vmap_area *va; + int i, j; =20 - spin_lock(&vmap_area_lock); - va =3D __find_vmap_area(addr, &vmap_area_root); - spin_unlock(&vmap_area_lock); + /* + * An addr_to_node_id(addr) converts an address to a node index + * where a VA is located. If VA spans several zones and passed + * addr is not the same as va->va_start, what is not common, we + * may need to scan an extra nodes. See an example: + * + * <--va--> + * -|-----|-----|-----|-----|- + * 1 2 0 1 + * + * VA resides in node 1 whereas it spans 1 and 2. If passed + * addr is within a second node we should do extra work. We + * should mention that it is rare and is a corner case from + * the other hand it has to be covered. + */ + i =3D j =3D addr_to_node_id(addr); + do { + vn =3D &vmap_nodes[i]; =20 - return va; + spin_lock(&vn->busy.lock); + va =3D __find_vmap_area(addr, &vn->busy.root); + spin_unlock(&vn->busy.lock); + + if (va) + return va; + } while ((i =3D (i + 1) % nr_vmap_nodes) !=3D j); + + return NULL; } =20 static struct vmap_area *find_unlink_vmap_area(unsigned long addr) { + struct vmap_node *vn; struct vmap_area *va; + int i, j; =20 - spin_lock(&vmap_area_lock); - va =3D __find_vmap_area(addr, &vmap_area_root); - if (va) - unlink_va(va, &vmap_area_root); - spin_unlock(&vmap_area_lock); + i =3D j =3D addr_to_node_id(addr); + do { + vn =3D &vmap_nodes[i]; =20 - return va; + spin_lock(&vn->busy.lock); + va =3D __find_vmap_area(addr, &vn->busy.root); + if (va) + unlink_va(va, &vn->busy.root); + spin_unlock(&vn->busy.lock); + + if (va) + return va; + } while ((i =3D (i + 1) % nr_vmap_nodes) !=3D j); + + return NULL; } =20 /*** Per cpu kva allocator ***/ @@ -2092,6 +2163,7 @@ static void *new_vmap_block(unsigned int order, gfp_t= gfp_mask) =20 static void free_vmap_block(struct vmap_block *vb) { + struct vmap_node *vn; struct vmap_block *tmp; struct xarray *xa; =20 @@ -2099,9 +2171,10 @@ static void free_vmap_block(struct vmap_block *vb) tmp =3D xa_erase(xa, addr_to_vb_idx(vb->va->va_start)); BUG_ON(tmp !=3D vb); =20 - spin_lock(&vmap_area_lock); - unlink_va(vb->va, &vmap_area_root); - spin_unlock(&vmap_area_lock); + vn =3D addr_to_node(vb->va->va_start); + spin_lock(&vn->busy.lock); + unlink_va(vb->va, &vn->busy.root); + spin_unlock(&vn->busy.lock); =20 free_vmap_area_noflush(vb->va); kfree_rcu(vb, rcu_head); @@ -2525,9 +2598,11 @@ static inline void setup_vmalloc_vm_locked(struct vm= _struct *vm, static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va, unsigned long flags, const void *caller) { - spin_lock(&vmap_area_lock); + struct vmap_node *vn =3D addr_to_node(va->va_start); + + spin_lock(&vn->busy.lock); setup_vmalloc_vm_locked(vm, va, flags, caller); - spin_unlock(&vmap_area_lock); + spin_unlock(&vn->busy.lock); } =20 static void clear_vm_uninitialized_flag(struct vm_struct *vm) @@ -3715,6 +3790,7 @@ static size_t vmap_ram_vread_iter(struct iov_iter *it= er, const char *addr, */ long vread_iter(struct iov_iter *iter, const char *addr, size_t count) { + struct vmap_node *vn; struct vmap_area *va; struct vm_struct *vm; char *vaddr; @@ -3728,8 +3804,11 @@ long vread_iter(struct iov_iter *iter, const char *a= ddr, size_t count) =20 remains =3D count; =20 - spin_lock(&vmap_area_lock); - va =3D find_vmap_area_exceed_addr((unsigned long)addr); + /* Hooked to node_0 so far. */ + vn =3D addr_to_node(0); + spin_lock(&vn->busy.lock); + + va =3D find_vmap_area_exceed_addr((unsigned long)addr, &vn->busy.root); if (!va) goto finished_zero; =20 @@ -3737,7 +3816,7 @@ long vread_iter(struct iov_iter *iter, const char *ad= dr, size_t count) if ((unsigned long)addr + remains <=3D va->va_start) goto finished_zero; =20 - list_for_each_entry_from(va, &vmap_area_list, list) { + list_for_each_entry_from(va, &vn->busy.head, list) { size_t copied; =20 if (remains =3D=3D 0) @@ -3796,12 +3875,12 @@ long vread_iter(struct iov_iter *iter, const char *= addr, size_t count) } =20 finished_zero: - spin_unlock(&vmap_area_lock); + spin_unlock(&vn->busy.lock); /* zero-fill memory holes */ return count - remains + zero_iter(iter, remains); finished: /* Nothing remains, or We couldn't copy/zero everything. */ - spin_unlock(&vmap_area_lock); + spin_unlock(&vn->busy.lock); =20 return count - remains; } @@ -4135,14 +4214,15 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned= long *offsets, } =20 /* insert all vm's */ - spin_lock(&vmap_area_lock); for (area =3D 0; area < nr_vms; area++) { - insert_vmap_area(vas[area], &vmap_area_root, &vmap_area_list); + struct vmap_node *vn =3D addr_to_node(vas[area]->va_start); =20 + spin_lock(&vn->busy.lock); + insert_vmap_area(vas[area], &vn->busy.root, &vn->busy.head); setup_vmalloc_vm_locked(vms[area], vas[area], VM_ALLOC, pcpu_get_vm_areas); + spin_unlock(&vn->busy.lock); } - spin_unlock(&vmap_area_lock); =20 /* * Mark allocated areas as accessible. Do it now as a best-effort @@ -4253,55 +4333,57 @@ bool vmalloc_dump_obj(void *object) { void *objp =3D (void *)PAGE_ALIGN((unsigned long)object); const void *caller; - struct vm_struct *vm; struct vmap_area *va; + struct vmap_node *vn; unsigned long addr; unsigned int nr_pages; + bool success =3D false; =20 - if (!spin_trylock(&vmap_area_lock)) - return false; - va =3D __find_vmap_area((unsigned long)objp, &vmap_area_root); - if (!va) { - spin_unlock(&vmap_area_lock); - return false; - } + vn =3D addr_to_node((unsigned long)objp); =20 - vm =3D va->vm; - if (!vm) { - spin_unlock(&vmap_area_lock); - return false; + if (spin_trylock(&vn->busy.lock)) { + va =3D __find_vmap_area(addr, &vn->busy.root); + + if (va && va->vm) { + addr =3D (unsigned long)va->vm->addr; + caller =3D va->vm->caller; + nr_pages =3D va->vm->nr_pages; + success =3D true; + } + + spin_unlock(&vn->busy.lock); } - addr =3D (unsigned long)vm->addr; - caller =3D vm->caller; - nr_pages =3D vm->nr_pages; - spin_unlock(&vmap_area_lock); - pr_cont(" %u-page vmalloc region starting at %#lx allocated at %pS\n", - nr_pages, addr, caller); - return true; + + if (success) + pr_cont(" %u-page vmalloc region starting at %#lx allocated at %pS\n", + nr_pages, addr, caller); + + return success; } #endif =20 #ifdef CONFIG_PROC_FS static void *s_start(struct seq_file *m, loff_t *pos) - __acquires(&vmap_purge_lock) - __acquires(&vmap_area_lock) { + struct vmap_node *vn =3D addr_to_node(0); + mutex_lock(&vmap_purge_lock); - spin_lock(&vmap_area_lock); + spin_lock(&vn->busy.lock); =20 - return seq_list_start(&vmap_area_list, *pos); + return seq_list_start(&vn->busy.head, *pos); } =20 static void *s_next(struct seq_file *m, void *p, loff_t *pos) { - return seq_list_next(p, &vmap_area_list, pos); + struct vmap_node *vn =3D addr_to_node(0); + return seq_list_next(p, &vn->busy.head, pos); } =20 static void s_stop(struct seq_file *m, void *p) - __releases(&vmap_area_lock) - __releases(&vmap_purge_lock) { - spin_unlock(&vmap_area_lock); + struct vmap_node *vn =3D addr_to_node(0); + + spin_unlock(&vn->busy.lock); mutex_unlock(&vmap_purge_lock); } =20 @@ -4344,9 +4426,11 @@ static void show_purge_info(struct seq_file *m) =20 static int s_show(struct seq_file *m, void *p) { + struct vmap_node *vn; struct vmap_area *va; struct vm_struct *v; =20 + vn =3D addr_to_node(0); va =3D list_entry(p, struct vmap_area, list); =20 if (!va->vm) { @@ -4397,7 +4481,7 @@ static int s_show(struct seq_file *m, void *p) * As a final step, dump "unpurged" areas. */ final: - if (list_is_last(&va->list, &vmap_area_list)) + if (list_is_last(&va->list, &vn->busy.head)) show_purge_info(m); =20 return 0; @@ -4428,7 +4512,8 @@ static void vmap_init_free_space(void) { unsigned long vmap_start =3D 1; const unsigned long vmap_end =3D ULONG_MAX; - struct vmap_area *busy, *free; + struct vmap_area *free; + struct vm_struct *busy; =20 /* * B F B B B F @@ -4436,12 +4521,12 @@ static void vmap_init_free_space(void) * | The KVA space | * |<--------------------------------->| */ - list_for_each_entry(busy, &vmap_area_list, list) { - if (busy->va_start - vmap_start > 0) { + for (busy =3D vmlist; busy; busy =3D busy->next) { + if ((unsigned long) busy->addr - vmap_start > 0) { free =3D kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT); if (!WARN_ON_ONCE(!free)) { free->va_start =3D vmap_start; - free->va_end =3D busy->va_start; + free->va_end =3D (unsigned long) busy->addr; =20 insert_vmap_area_augment(free, NULL, &free_vmap_area_root, @@ -4449,7 +4534,7 @@ static void vmap_init_free_space(void) } } =20 - vmap_start =3D busy->va_end; + vmap_start =3D (unsigned long) busy->addr + busy->size; } =20 if (vmap_end - vmap_start > 0) { @@ -4465,9 +4550,23 @@ static void vmap_init_free_space(void) } } =20 +static void vmap_init_nodes(void) +{ + struct vmap_node *vn; + int i; + + for (i =3D 0; i < nr_vmap_nodes; i++) { + vn =3D &vmap_nodes[i]; + vn->busy.root =3D RB_ROOT; + INIT_LIST_HEAD(&vn->busy.head); + spin_lock_init(&vn->busy.lock); + } +} + void __init vmalloc_init(void) { struct vmap_area *va; + struct vmap_node *vn; struct vm_struct *tmp; int i; =20 @@ -4489,6 +4588,11 @@ void __init vmalloc_init(void) xa_init(&vbq->vmap_blocks); } =20 + /* + * Setup nodes before importing vmlist. + */ + vmap_init_nodes(); + /* Import existing vmlist entries. */ for (tmp =3D vmlist; tmp; tmp =3D tmp->next) { va =3D kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT); @@ -4498,7 +4602,9 @@ void __init vmalloc_init(void) va->va_start =3D (unsigned long)tmp->addr; va->va_end =3D va->va_start + tmp->size; va->vm =3D tmp; - insert_vmap_area(va, &vmap_area_root, &vmap_area_list); + + vn =3D addr_to_node(va->va_start); + insert_vmap_area(va, &vn->busy.root, &vn->busy.head); } =20 /* --=20 2.39.2 From nobody Fri Dec 26 19:23:15 2025 Received: from mail-lf1-f46.google.com (mail-lf1-f46.google.com [209.85.167.46]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id A5B6E15EBF for ; Tue, 2 Jan 2024 18:46:43 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=gmail.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=gmail.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=gmail.com header.i=@gmail.com header.b="gVYvPRCJ" Received: by mail-lf1-f46.google.com with SMTP id 2adb3069b0e04-50e7c76897dso6612588e87.2 for ; Tue, 02 Jan 2024 10:46:43 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20230601; t=1704221202; x=1704826002; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=W/i8ZfJzhOkgwpvB6heisf6dptNqD+Kyc9OPs5waPNM=; b=gVYvPRCJDEf9ckww0mLZ4FpWF4/pTHnlf7W2ZVYuAEafpgYZ9v1vKFMDdLBACv2bmr hzchWlQLU61auRhlCofw6eAE/KhZnSvngHLhVsqEeFw7N207/U8TaJ7COI2mgEeO0Awv UlzvG2nVIyui8eJuQVLqRb1kSs785XktKbLWGihz3SiCZUjzHnFtbLtglR4EWqFQa3ji wn8J/HNFuaRhvXrlrZSHssYIaZUMLoBTmSaJymK9CT8MNypEi5tLN5idqGCtPHYLntkH jT+w8h8wTwo2Y3LrN0vnzKYVHIw4fE+iLQdTHi2HFFHWSeZCseb+OxFn8FbxL2i5js6M 9Kxg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1704221202; x=1704826002; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=W/i8ZfJzhOkgwpvB6heisf6dptNqD+Kyc9OPs5waPNM=; b=I0CnXPi8D8uK87pd2jz9q9y7vVVMGElknyZBgy5QwbDsqh33IGybidQkGevb9KQiJu 3univqlhwnOm6sprREgqFAdlyxzAw+WsBKpDtLw840i6CewmqPbGIPk4UdYIg5S5PZgR uLTiaNc01s6yH3PKiqQuwLdRRnona4xbAsy+BdNK/CCtJagNLrmQz7By8l1ayQGKYJ5V 9EhwtW6sCDtCBjotAlET4UK4Ed3JtcaE/FWKg4lHwloqLqf44TbggvpoDWlAU1Uo9euj EUg/HCTR1sQo/lPrei3jvT2Pll54/BSzPpSaaTCNk2JxDLTSJtgNa8XRlzInaqJH/LY/ NQ5w== X-Gm-Message-State: AOJu0Yxd4JS0jJ37pXvVDltG7SPsbR7Wlwz8E8MI8kG7Ceqjb3ksqP2P PEmv2oZf1ppCpjYgjIyEgY4= X-Google-Smtp-Source: AGHT+IEsu7Urp6TRXfcpo6ncKQnZwNhBJ5A87G0DxmsMgN27G74Ggz2BQ1tsbpUPmeUAXXarv+DFCQ== X-Received: by 2002:a05:6512:5d5:b0:50e:7b70:b9cb with SMTP id o21-20020a05651205d500b0050e7b70b9cbmr2003145lfo.218.1704221201423; Tue, 02 Jan 2024 10:46:41 -0800 (PST) Received: from pc638.lan (host-185-121-47-193.sydskane.nu. [185.121.47.193]) by smtp.gmail.com with ESMTPSA id q1-20020ac246e1000000b0050e7be886d9sm2592656lfo.56.2024.01.02.10.46.40 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 02 Jan 2024 10:46:41 -0800 (PST) From: "Uladzislau Rezki (Sony)" To: linux-mm@kvack.org, Andrew Morton Cc: LKML , Baoquan He , Lorenzo Stoakes , Christoph Hellwig , Matthew Wilcox , "Liam R . Howlett" , Dave Chinner , "Paul E . McKenney" , Joel Fernandes , Uladzislau Rezki , Oleksiy Avramchenko Subject: [PATCH v3 05/11] mm/vmalloc: remove vmap_area_list Date: Tue, 2 Jan 2024 19:46:27 +0100 Message-Id: <20240102184633.748113-6-urezki@gmail.com> X-Mailer: git-send-email 2.39.2 In-Reply-To: <20240102184633.748113-1-urezki@gmail.com> References: <20240102184633.748113-1-urezki@gmail.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Baoquan He Earlier, vmap_area_list is exported to vmcoreinfo so that makedumpfile get the base address of vmalloc area. Now, vmap_area_list is empty, so export VMALLOC_START to vmcoreinfo instead, and remove vmap_area_list. Signed-off-by: Baoquan He Signed-off-by: Uladzislau Rezki (Sony) Acked-by: Lorenzo Stoakes --- Documentation/admin-guide/kdump/vmcoreinfo.rst | 8 ++++---- arch/arm64/kernel/crash_core.c | 1 - arch/riscv/kernel/crash_core.c | 1 - include/linux/vmalloc.h | 1 - kernel/crash_core.c | 4 +--- kernel/kallsyms_selftest.c | 1 - mm/nommu.c | 2 -- mm/vmalloc.c | 2 -- 8 files changed, 5 insertions(+), 15 deletions(-) diff --git a/Documentation/admin-guide/kdump/vmcoreinfo.rst b/Documentation= /admin-guide/kdump/vmcoreinfo.rst index 78e4d2e7ba14..df54fbeaaa16 100644 --- a/Documentation/admin-guide/kdump/vmcoreinfo.rst +++ b/Documentation/admin-guide/kdump/vmcoreinfo.rst @@ -65,11 +65,11 @@ Defines the beginning of the text section. In general, = _stext indicates the kernel start address. Used to convert a virtual address from the direct kernel map to a physical address. =20 -vmap_area_list --------------- +VMALLOC_START +------------- =20 -Stores the virtual area list. makedumpfile gets the vmalloc start value -from this variable and its value is necessary for vmalloc translation. +Stores the base address of vmalloc area. makedumpfile gets this value +since is necessary for vmalloc translation. =20 mem_map ------- diff --git a/arch/arm64/kernel/crash_core.c b/arch/arm64/kernel/crash_core.c index 66cde752cd74..2a24199a9b81 100644 --- a/arch/arm64/kernel/crash_core.c +++ b/arch/arm64/kernel/crash_core.c @@ -23,7 +23,6 @@ void arch_crash_save_vmcoreinfo(void) /* Please note VMCOREINFO_NUMBER() uses "%d", not "%x" */ vmcoreinfo_append_str("NUMBER(MODULES_VADDR)=3D0x%lx\n", MODULES_VADDR); vmcoreinfo_append_str("NUMBER(MODULES_END)=3D0x%lx\n", MODULES_END); - vmcoreinfo_append_str("NUMBER(VMALLOC_START)=3D0x%lx\n", VMALLOC_START); vmcoreinfo_append_str("NUMBER(VMALLOC_END)=3D0x%lx\n", VMALLOC_END); vmcoreinfo_append_str("NUMBER(VMEMMAP_START)=3D0x%lx\n", VMEMMAP_START); vmcoreinfo_append_str("NUMBER(VMEMMAP_END)=3D0x%lx\n", VMEMMAP_END); diff --git a/arch/riscv/kernel/crash_core.c b/arch/riscv/kernel/crash_core.c index 8706736fd4e2..d18d529fd9b9 100644 --- a/arch/riscv/kernel/crash_core.c +++ b/arch/riscv/kernel/crash_core.c @@ -8,7 +8,6 @@ void arch_crash_save_vmcoreinfo(void) VMCOREINFO_NUMBER(phys_ram_base); =20 vmcoreinfo_append_str("NUMBER(PAGE_OFFSET)=3D0x%lx\n", PAGE_OFFSET); - vmcoreinfo_append_str("NUMBER(VMALLOC_START)=3D0x%lx\n", VMALLOC_START); vmcoreinfo_append_str("NUMBER(VMALLOC_END)=3D0x%lx\n", VMALLOC_END); #ifdef CONFIG_MMU VMCOREINFO_NUMBER(VA_BITS); diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index c720be70c8dd..91810b4e9510 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -253,7 +253,6 @@ extern long vread_iter(struct iov_iter *iter, const cha= r *addr, size_t count); /* * Internals. Don't use.. */ -extern struct list_head vmap_area_list; extern __init void vm_area_add_early(struct vm_struct *vm); extern __init void vm_area_register_early(struct vm_struct *vm, size_t ali= gn); =20 diff --git a/kernel/crash_core.c b/kernel/crash_core.c index d4313b53837e..b427f4a3b156 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -759,7 +759,7 @@ static int __init crash_save_vmcoreinfo_init(void) VMCOREINFO_SYMBOL_ARRAY(swapper_pg_dir); #endif VMCOREINFO_SYMBOL(_stext); - VMCOREINFO_SYMBOL(vmap_area_list); + vmcoreinfo_append_str("NUMBER(VMALLOC_START)=3D0x%lx\n", VMALLOC_START); =20 #ifndef CONFIG_NUMA VMCOREINFO_SYMBOL(mem_map); @@ -800,8 +800,6 @@ static int __init crash_save_vmcoreinfo_init(void) VMCOREINFO_OFFSET(free_area, free_list); VMCOREINFO_OFFSET(list_head, next); VMCOREINFO_OFFSET(list_head, prev); - VMCOREINFO_OFFSET(vmap_area, va_start); - VMCOREINFO_OFFSET(vmap_area, list); VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER + 1); log_buf_vmcoreinfo_setup(); VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES); diff --git a/kernel/kallsyms_selftest.c b/kernel/kallsyms_selftest.c index b4cac76ea5e9..8a689b4ff4f9 100644 --- a/kernel/kallsyms_selftest.c +++ b/kernel/kallsyms_selftest.c @@ -89,7 +89,6 @@ static struct test_item test_items[] =3D { ITEM_DATA(kallsyms_test_var_data_static), ITEM_DATA(kallsyms_test_var_bss), ITEM_DATA(kallsyms_test_var_data), - ITEM_DATA(vmap_area_list), #endif }; =20 diff --git a/mm/nommu.c b/mm/nommu.c index b6dc558d3144..5ec8f44e7ce9 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -131,8 +131,6 @@ int follow_pfn(struct vm_area_struct *vma, unsigned lon= g address, } EXPORT_SYMBOL(follow_pfn); =20 -LIST_HEAD(vmap_area_list); - void vfree(const void *addr) { kfree(addr); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 786ecb18ae22..8c01f2225ef7 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -729,8 +729,6 @@ EXPORT_SYMBOL(vmalloc_to_pfn); =20 =20 static DEFINE_SPINLOCK(free_vmap_area_lock); -/* Export for kexec only */ -LIST_HEAD(vmap_area_list); static bool vmap_initialized __read_mostly; =20 static struct rb_root purge_vmap_area_root =3D RB_ROOT; --=20 2.39.2 From nobody Fri Dec 26 19:23:15 2025 Received: from mail-lf1-f52.google.com (mail-lf1-f52.google.com [209.85.167.52]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 5FEF716415 for ; Tue, 2 Jan 2024 18:46:44 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=gmail.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=gmail.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=gmail.com header.i=@gmail.com header.b="mD/HtqhK" Received: by mail-lf1-f52.google.com with SMTP id 2adb3069b0e04-50e7af5f618so6917304e87.1 for ; Tue, 02 Jan 2024 10:46:44 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20230601; t=1704221202; x=1704826002; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=Y7TPXqOPL1FAObBiwLOLjtzw+Q8CAhI3CaP8/RlkErk=; b=mD/HtqhKcLOIVSwYq82R5UvmnJaImogH9T6pCBJ2GYBKoA4ZvO8DqsTMA9qmdWM8f3 y7XkQNOjwVyvWGhZkDcUkhvYs1zYFADq9qnHfJzG6Y8gqiGHUqTLlyNEOzJ8XDGiLgLK fAEEojTXrg2l1NsLo6JLNSwZpulNRLYZRhgiGDaE+2c0gofFrxbpc828tH4CqFEk5HFj LgDxCv3xqFMm7Yc1O35MJSd7l3062oY3tKgWEN3rRutCrSG/RSgDWfkrq+tCzHBcyLnJ 7JOxsvFf5vERnlPYAZpCdc+XjgqC6C6cLNICwXVM2mIo2HlyZ6LI6tGPYE/HwNrLHWcG AKMQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1704221202; x=1704826002; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=Y7TPXqOPL1FAObBiwLOLjtzw+Q8CAhI3CaP8/RlkErk=; b=QASfy4hdtbPZEcOimN2BKvPOZE8ZKFxYnZ5yyklkFOqcxStX1Arn3Dj4eCsl+vRTP8 ipqzqTxrFjRQmAceMvew8CIqiWKHqkGPXYdl+d9yO9/qblPYwLWE5njHHb4+iXreqYQj CpBWGw0U3bc8WeAC0go1ExjEecJxOYlDE00jGDxDsA98WotIPbE1joTOAgvOA5Ln938a 6AjIjBMmm0fIvTnHgau3hgi18Fkr+YAcrOfGc+nwPD/wuWTr5nfQsVwcdBTtm7ECeO2a vXlShqwF1Yp/BWASjXfkZFJchLEBzSX4MsTkWgzjwE281ICrEeGVpcb+vK6Z3jRtx4BO Kvvw== X-Gm-Message-State: AOJu0YxifunKo92rBPVfkbVV6hO9REjwAfeMj3rC1nAVES/oTrY/5fve mI5RkxXLDfFnEfYIimrPDlQ= X-Google-Smtp-Source: AGHT+IEqmNEBRY+67JLEN5/lqBW0cHGCcx7d8OHSSEsbnprrKpdCdH45LWYpuY7EtQcmCYTMyqhKag== X-Received: by 2002:a05:6512:1581:b0:50e:902d:b48 with SMTP id bp1-20020a056512158100b0050e902d0b48mr3325679lfb.64.1704221202399; Tue, 02 Jan 2024 10:46:42 -0800 (PST) Received: from pc638.lan (host-185-121-47-193.sydskane.nu. [185.121.47.193]) by smtp.gmail.com with ESMTPSA id q1-20020ac246e1000000b0050e7be886d9sm2592656lfo.56.2024.01.02.10.46.41 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 02 Jan 2024 10:46:42 -0800 (PST) From: "Uladzislau Rezki (Sony)" To: linux-mm@kvack.org, Andrew Morton Cc: LKML , Baoquan He , Lorenzo Stoakes , Christoph Hellwig , Matthew Wilcox , "Liam R . Howlett" , Dave Chinner , "Paul E . McKenney" , Joel Fernandes , Uladzislau Rezki , Oleksiy Avramchenko Subject: [PATCH v3 06/11] mm: vmalloc: Remove global purge_vmap_area_root rb-tree Date: Tue, 2 Jan 2024 19:46:28 +0100 Message-Id: <20240102184633.748113-7-urezki@gmail.com> X-Mailer: git-send-email 2.39.2 In-Reply-To: <20240102184633.748113-1-urezki@gmail.com> References: <20240102184633.748113-1-urezki@gmail.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Similar to busy VA, lazily-freed area is stored to a node it belongs to. Such approach does not require any global locking primitive, instead an access becomes scalable what mitigates a contention. This patch removes a global purge-lock, global purge-tree and global purge list. Reviewed-by: Baoquan He Signed-off-by: Uladzislau Rezki (Sony) --- mm/vmalloc.c | 135 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 82 insertions(+), 53 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 8c01f2225ef7..9b2f1b0cac9d 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -731,10 +731,6 @@ EXPORT_SYMBOL(vmalloc_to_pfn); static DEFINE_SPINLOCK(free_vmap_area_lock); static bool vmap_initialized __read_mostly; =20 -static struct rb_root purge_vmap_area_root =3D RB_ROOT; -static LIST_HEAD(purge_vmap_area_list); -static DEFINE_SPINLOCK(purge_vmap_area_lock); - /* * This kmem_cache is used for vmap_area objects. Instead of * allocating from slab we reuse an object from this cache to @@ -782,6 +778,12 @@ struct rb_list { static struct vmap_node { /* Bookkeeping data of this node. */ struct rb_list busy; + struct rb_list lazy; + + /* + * Ready-to-free areas. + */ + struct list_head purge_list; } single; =20 static struct vmap_node *vmap_nodes =3D &single; @@ -1766,40 +1768,22 @@ static DEFINE_MUTEX(vmap_purge_lock); =20 /* for per-CPU blocks */ static void purge_fragmented_blocks_allcpus(void); +static cpumask_t purge_nodes; =20 /* * Purges all lazily-freed vmap areas. */ -static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end) +static unsigned long +purge_vmap_node(struct vmap_node *vn) { - unsigned long resched_threshold; - unsigned int num_purged_areas =3D 0; - struct list_head local_purge_list; + unsigned long num_purged_areas =3D 0; struct vmap_area *va, *n_va; =20 - lockdep_assert_held(&vmap_purge_lock); - - spin_lock(&purge_vmap_area_lock); - purge_vmap_area_root =3D RB_ROOT; - list_replace_init(&purge_vmap_area_list, &local_purge_list); - spin_unlock(&purge_vmap_area_lock); - - if (unlikely(list_empty(&local_purge_list))) - goto out; - - start =3D min(start, - list_first_entry(&local_purge_list, - struct vmap_area, list)->va_start); - - end =3D max(end, - list_last_entry(&local_purge_list, - struct vmap_area, list)->va_end); - - flush_tlb_kernel_range(start, end); - resched_threshold =3D lazy_max_pages() << 1; + if (list_empty(&vn->purge_list)) + return 0; =20 spin_lock(&free_vmap_area_lock); - list_for_each_entry_safe(va, n_va, &local_purge_list, list) { + list_for_each_entry_safe(va, n_va, &vn->purge_list, list) { unsigned long nr =3D (va->va_end - va->va_start) >> PAGE_SHIFT; unsigned long orig_start =3D va->va_start; unsigned long orig_end =3D va->va_end; @@ -1821,13 +1805,55 @@ static bool __purge_vmap_area_lazy(unsigned long st= art, unsigned long end) =20 atomic_long_sub(nr, &vmap_lazy_nr); num_purged_areas++; - - if (atomic_long_read(&vmap_lazy_nr) < resched_threshold) - cond_resched_lock(&free_vmap_area_lock); } spin_unlock(&free_vmap_area_lock); =20 -out: + return num_purged_areas; +} + +/* + * Purges all lazily-freed vmap areas. + */ +static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end) +{ + unsigned long num_purged_areas =3D 0; + struct vmap_node *vn; + int i; + + lockdep_assert_held(&vmap_purge_lock); + purge_nodes =3D CPU_MASK_NONE; + + for (i =3D 0; i < nr_vmap_nodes; i++) { + vn =3D &vmap_nodes[i]; + + INIT_LIST_HEAD(&vn->purge_list); + + if (RB_EMPTY_ROOT(&vn->lazy.root)) + continue; + + spin_lock(&vn->lazy.lock); + WRITE_ONCE(vn->lazy.root.rb_node, NULL); + list_replace_init(&vn->lazy.head, &vn->purge_list); + spin_unlock(&vn->lazy.lock); + + start =3D min(start, list_first_entry(&vn->purge_list, + struct vmap_area, list)->va_start); + + end =3D max(end, list_last_entry(&vn->purge_list, + struct vmap_area, list)->va_end); + + cpumask_set_cpu(i, &purge_nodes); + } + + if (cpumask_weight(&purge_nodes) > 0) { + flush_tlb_kernel_range(start, end); + + for_each_cpu(i, &purge_nodes) { + vn =3D &nodes[i]; + num_purged_areas +=3D purge_vmap_node(vn); + } + } + trace_purge_vmap_area_lazy(start, end, num_purged_areas); return num_purged_areas > 0; } @@ -1846,16 +1872,9 @@ static void reclaim_and_purge_vmap_areas(void) =20 static void drain_vmap_area_work(struct work_struct *work) { - unsigned long nr_lazy; - - do { - mutex_lock(&vmap_purge_lock); - __purge_vmap_area_lazy(ULONG_MAX, 0); - mutex_unlock(&vmap_purge_lock); - - /* Recheck if further work is required. */ - nr_lazy =3D atomic_long_read(&vmap_lazy_nr); - } while (nr_lazy > lazy_max_pages()); + mutex_lock(&vmap_purge_lock); + __purge_vmap_area_lazy(ULONG_MAX, 0); + mutex_unlock(&vmap_purge_lock); } =20 /* @@ -1865,6 +1884,7 @@ static void drain_vmap_area_work(struct work_struct *= work) */ static void free_vmap_area_noflush(struct vmap_area *va) { + struct vmap_node *vn =3D addr_to_node(va->va_start); unsigned long nr_lazy_max =3D lazy_max_pages(); unsigned long va_start =3D va->va_start; unsigned long nr_lazy; @@ -1878,10 +1898,9 @@ static void free_vmap_area_noflush(struct vmap_area = *va) /* * Merge or place it to the purge tree/list. */ - spin_lock(&purge_vmap_area_lock); - merge_or_add_vmap_area(va, - &purge_vmap_area_root, &purge_vmap_area_list); - spin_unlock(&purge_vmap_area_lock); + spin_lock(&vn->lazy.lock); + merge_or_add_vmap_area(va, &vn->lazy.root, &vn->lazy.head); + spin_unlock(&vn->lazy.lock); =20 trace_free_vmap_area_noflush(va_start, nr_lazy, nr_lazy_max); =20 @@ -4411,15 +4430,21 @@ static void show_numa_info(struct seq_file *m, stru= ct vm_struct *v) =20 static void show_purge_info(struct seq_file *m) { + struct vmap_node *vn; struct vmap_area *va; + int i; =20 - spin_lock(&purge_vmap_area_lock); - list_for_each_entry(va, &purge_vmap_area_list, list) { - seq_printf(m, "0x%pK-0x%pK %7ld unpurged vm_area\n", - (void *)va->va_start, (void *)va->va_end, - va->va_end - va->va_start); + for (i =3D 0; i < nr_vmap_nodes; i++) { + vn =3D &vmap_nodes[i]; + + spin_lock(&vn->lazy.lock); + list_for_each_entry(va, &vn->lazy.head, list) { + seq_printf(m, "0x%pK-0x%pK %7ld unpurged vm_area\n", + (void *)va->va_start, (void *)va->va_end, + va->va_end - va->va_start); + } + spin_unlock(&vn->lazy.lock); } - spin_unlock(&purge_vmap_area_lock); } =20 static int s_show(struct seq_file *m, void *p) @@ -4558,6 +4583,10 @@ static void vmap_init_nodes(void) vn->busy.root =3D RB_ROOT; INIT_LIST_HEAD(&vn->busy.head); spin_lock_init(&vn->busy.lock); + + vn->lazy.root =3D RB_ROOT; + INIT_LIST_HEAD(&vn->lazy.head); + spin_lock_init(&vn->lazy.lock); } } =20 --=20 2.39.2 From nobody Fri Dec 26 19:23:15 2025 Received: from mail-lf1-f42.google.com (mail-lf1-f42.google.com [209.85.167.42]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id B1EB1171C5 for ; Tue, 2 Jan 2024 18:47:15 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=gmail.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=gmail.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=gmail.com header.i=@gmail.com header.b="NrcehluU" Received: by mail-lf1-f42.google.com with SMTP id 2adb3069b0e04-50e80d14404so5335212e87.1 for ; Tue, 02 Jan 2024 10:47:15 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20230601; t=1704221234; x=1704826034; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=h07asMUPPPIr10RgfLD/fjXiYN2b5qLhuka70E2DoO0=; b=NrcehluUmDNrdeY+LnObMqSFBbTD1qO4PZRuaY0r2VoS7EiFs1dvxVT+N6nJASywoV DlMD5oi1/ZXNnVZEDAkTqwIRzvdh92nEdnaHE16Uy0nhrkHcB9Y2/1MYvC/KDVCT6f0H 6MVYkULJtzrP2jUbdC89skk7tYyaEiaKRmGGg/yRhI8Is4OBnn50FlC3D27QWKZ2zVYR 6YboGPhpbS3W1jbQ9wKpKbJCbURlCLRDx7ZEzW471BRPXufrl1FS71xu4/JeT3+FVL6V 3FUB/w5dFbUVI3FcVQaqHZ9xmY11DseKH+ZokInJEHtl9q/x/J0JcM4o/Kan/CRZCh29 Wvgw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1704221234; x=1704826034; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=h07asMUPPPIr10RgfLD/fjXiYN2b5qLhuka70E2DoO0=; b=w98pzS/KpIjIyFZdM+f3+FUz/h1kZwcuh5rbEvNgOElCrW0ciAbn5v606mtl6DguCl w9YuOluFDhlXJclT/YzGOctdHzCt4qnHTXxZnWWyp4wPkkZxJmifJdDK9ASxqBS0mo2c WzbywA3yWrs86k5a8E13wxQg/YVDX98pi3azSNRRrSHKU0eCjCxwHyxwMAnMMnzJ3BWQ Mx1aDGoMzyc8a8ixN99AQWhpRsSQXwXqxT19G8ueaLEePI4c25Ec9D9cNn6MypSvfN6v j9xN99o6kUFdHGuc/IcMEYdYafYgdGhAHHjyohnwXcaoONpES8AOWb1JdgsyeHn4vbsh u94g== X-Gm-Message-State: AOJu0YxBhuHmuL2ztG2fW1aD73ISXI7gXGLYNHRTMlQRh+VM043sM3Rl pS9GwYfkkgdCx/0h81+Duuo= X-Google-Smtp-Source: AGHT+IECQYn33SzuSoJtEpL4Qq9+JrKnIGLpOJF6cxZlu1nvYsQ8EYyOApu4rMADQk0y798LtEw2JQ== X-Received: by 2002:ac2:5975:0:b0:50e:765b:1ea3 with SMTP id h21-20020ac25975000000b0050e765b1ea3mr6543728lfp.22.1704221203595; Tue, 02 Jan 2024 10:46:43 -0800 (PST) Received: from pc638.lan (host-185-121-47-193.sydskane.nu. [185.121.47.193]) by smtp.gmail.com with ESMTPSA id q1-20020ac246e1000000b0050e7be886d9sm2592656lfo.56.2024.01.02.10.46.42 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 02 Jan 2024 10:46:43 -0800 (PST) From: "Uladzislau Rezki (Sony)" To: linux-mm@kvack.org, Andrew Morton Cc: LKML , Baoquan He , Lorenzo Stoakes , Christoph Hellwig , Matthew Wilcox , "Liam R . Howlett" , Dave Chinner , "Paul E . McKenney" , Joel Fernandes , Uladzislau Rezki , Oleksiy Avramchenko Subject: [PATCH v3 07/11] mm: vmalloc: Offload free_vmap_area_lock lock Date: Tue, 2 Jan 2024 19:46:29 +0100 Message-Id: <20240102184633.748113-8-urezki@gmail.com> X-Mailer: git-send-email 2.39.2 In-Reply-To: <20240102184633.748113-1-urezki@gmail.com> References: <20240102184633.748113-1-urezki@gmail.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Concurrent access to a global vmap space is a bottle-neck. We can simulate a high contention by running a vmalloc test suite. To address it, introduce an effective vmap node logic. Each node behaves as independent entity. When a node is accessed it serves a request directly(if possible) from its pool. This model has a size based pool for requests, i.e. pools are serialized and populated based on object size and real demand. A maximum object size that pool can handle is set to 256 pages. This technique reduces a pressure on the global vmap lock. Signed-off-by: Uladzislau Rezki (Sony) --- mm/vmalloc.c | 387 +++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 342 insertions(+), 45 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 9b2f1b0cac9d..fa4ab2bbbc5b 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -775,7 +775,22 @@ struct rb_list { spinlock_t lock; }; =20 +struct vmap_pool { + struct list_head head; + unsigned long len; +}; + +/* + * A fast size storage contains VAs up to 1M size. + */ +#define MAX_VA_SIZE_PAGES 256 + static struct vmap_node { + /* Simple size segregated storage. */ + struct vmap_pool pool[MAX_VA_SIZE_PAGES]; + spinlock_t pool_lock; + bool skip_populate; + /* Bookkeeping data of this node. */ struct rb_list busy; struct rb_list lazy; @@ -784,6 +799,8 @@ static struct vmap_node { * Ready-to-free areas. */ struct list_head purge_list; + struct work_struct purge_work; + unsigned long nr_purged; } single; =20 static struct vmap_node *vmap_nodes =3D &single; @@ -802,6 +819,61 @@ addr_to_node(unsigned long addr) return &vmap_nodes[addr_to_node_id(addr)]; } =20 +static inline struct vmap_node * +id_to_node(unsigned int id) +{ + return &vmap_nodes[id % nr_vmap_nodes]; +} + +/* + * We use the value 0 to represent "no node", that is why + * an encoded value will be the node-id incremented by 1. + * It is always greater then 0. A valid node_id which can + * be encoded is [0:nr_vmap_nodes - 1]. If a passed node_id + * is not valid 0 is returned. + */ +static unsigned int +encode_vn_id(unsigned int node_id) +{ + /* Can store U8_MAX [0:254] nodes. */ + if (node_id < nr_vmap_nodes) + return (node_id + 1) << BITS_PER_BYTE; + + /* Warn and no node encoded. */ + WARN_ONCE(1, "Encode wrong node id (%u)\n", node_id); + return 0; +} + +/* + * Returns an encoded node-id, the valid range is within + * [0:nr_vmap_nodes-1] values. Otherwise nr_vmap_nodes is + * returned if extracted data is wrong. + */ +static unsigned int +decode_vn_id(unsigned int val) +{ + unsigned int node_id =3D (val >> BITS_PER_BYTE) - 1; + + /* Can store U8_MAX [0:254] nodes. */ + if (node_id < nr_vmap_nodes) + return node_id; + + /* If it was _not_ zero, warn. */ + WARN_ONCE(node_id !=3D UINT_MAX, + "Decode wrong node id (%d)\n", node_id); + + return nr_vmap_nodes; +} + +static bool +is_vn_id_valid(unsigned int node_id) +{ + if (node_id < nr_vmap_nodes) + return true; + + return false; +} + static __always_inline unsigned long va_size(struct vmap_area *va) { @@ -1623,6 +1695,104 @@ preload_this_cpu_lock(spinlock_t *lock, gfp_t gfp_m= ask, int node) kmem_cache_free(vmap_area_cachep, va); } =20 +static struct vmap_pool * +size_to_va_pool(struct vmap_node *vn, unsigned long size) +{ + unsigned int idx =3D (size - 1) / PAGE_SIZE; + + if (idx < MAX_VA_SIZE_PAGES) + return &vn->pool[idx]; + + return NULL; +} + +static bool +node_pool_add_va(struct vmap_node *n, struct vmap_area *va) +{ + struct vmap_pool *vp; + + vp =3D size_to_va_pool(n, va_size(va)); + if (!vp) + return false; + + spin_lock(&n->pool_lock); + list_add(&va->list, &vp->head); + WRITE_ONCE(vp->len, vp->len + 1); + spin_unlock(&n->pool_lock); + + return true; +} + +static struct vmap_area * +node_pool_del_va(struct vmap_node *vn, unsigned long size, + unsigned long align, unsigned long vstart, + unsigned long vend) +{ + struct vmap_area *va =3D NULL; + struct vmap_pool *vp; + int err =3D 0; + + vp =3D size_to_va_pool(vn, size); + if (!vp || list_empty(&vp->head)) + return NULL; + + spin_lock(&vn->pool_lock); + if (!list_empty(&vp->head)) { + va =3D list_first_entry(&vp->head, struct vmap_area, list); + + if (IS_ALIGNED(va->va_start, align)) { + /* + * Do some sanity check and emit a warning + * if one of below checks detects an error. + */ + err |=3D (va_size(va) !=3D size); + err |=3D (va->va_start < vstart); + err |=3D (va->va_end > vend); + + if (!WARN_ON_ONCE(err)) { + list_del_init(&va->list); + WRITE_ONCE(vp->len, vp->len - 1); + } else { + va =3D NULL; + } + } else { + list_move_tail(&va->list, &vp->head); + va =3D NULL; + } + } + spin_unlock(&vn->pool_lock); + + return va; +} + +static struct vmap_area * +node_alloc(unsigned long size, unsigned long align, + unsigned long vstart, unsigned long vend, + unsigned long *addr, unsigned int *vn_id) +{ + struct vmap_area *va; + + *vn_id =3D 0; + *addr =3D vend; + + /* + * Fallback to a global heap if not vmalloc or there + * is only one node. + */ + if (vstart !=3D VMALLOC_START || vend !=3D VMALLOC_END || + nr_vmap_nodes =3D=3D 1) + return NULL; + + *vn_id =3D raw_smp_processor_id() % nr_vmap_nodes; + va =3D node_pool_del_va(id_to_node(*vn_id), size, align, vstart, vend); + *vn_id =3D encode_vn_id(*vn_id); + + if (va) + *addr =3D va->va_start; + + return va; +} + /* * Allocate a region of KVA of the specified size and alignment, within the * vstart and vend. @@ -1637,6 +1807,7 @@ static struct vmap_area *alloc_vmap_area(unsigned lon= g size, struct vmap_area *va; unsigned long freed; unsigned long addr; + unsigned int vn_id; int purged =3D 0; int ret; =20 @@ -1647,11 +1818,23 @@ static struct vmap_area *alloc_vmap_area(unsigned l= ong size, return ERR_PTR(-EBUSY); =20 might_sleep(); - gfp_mask =3D gfp_mask & GFP_RECLAIM_MASK; =20 - va =3D kmem_cache_alloc_node(vmap_area_cachep, gfp_mask, node); - if (unlikely(!va)) - return ERR_PTR(-ENOMEM); + /* + * If a VA is obtained from a global heap(if it fails here) + * it is anyway marked with this "vn_id" so it is returned + * to this pool's node later. Such way gives a possibility + * to populate pools based on users demand. + * + * On success a ready to go VA is returned. + */ + va =3D node_alloc(size, align, vstart, vend, &addr, &vn_id); + if (!va) { + gfp_mask =3D gfp_mask & GFP_RECLAIM_MASK; + + va =3D kmem_cache_alloc_node(vmap_area_cachep, gfp_mask, node); + if (unlikely(!va)) + return ERR_PTR(-ENOMEM); + } =20 /* * Only scan the relevant parts containing pointers to other objects @@ -1660,10 +1843,12 @@ static struct vmap_area *alloc_vmap_area(unsigned l= ong size, kmemleak_scan_area(&va->rb_node, SIZE_MAX, gfp_mask); =20 retry: - preload_this_cpu_lock(&free_vmap_area_lock, gfp_mask, node); - addr =3D __alloc_vmap_area(&free_vmap_area_root, &free_vmap_area_list, - size, align, vstart, vend); - spin_unlock(&free_vmap_area_lock); + if (addr =3D=3D vend) { + preload_this_cpu_lock(&free_vmap_area_lock, gfp_mask, node); + addr =3D __alloc_vmap_area(&free_vmap_area_root, &free_vmap_area_list, + size, align, vstart, vend); + spin_unlock(&free_vmap_area_lock); + } =20 trace_alloc_vmap_area(addr, size, align, vstart, vend, addr =3D=3D vend); =20 @@ -1677,7 +1862,7 @@ static struct vmap_area *alloc_vmap_area(unsigned lon= g size, va->va_start =3D addr; va->va_end =3D addr + size; va->vm =3D NULL; - va->flags =3D va_flags; + va->flags =3D (va_flags | vn_id); =20 vn =3D addr_to_node(va->va_start); =20 @@ -1770,63 +1955,135 @@ static DEFINE_MUTEX(vmap_purge_lock); static void purge_fragmented_blocks_allcpus(void); static cpumask_t purge_nodes; =20 -/* - * Purges all lazily-freed vmap areas. - */ -static unsigned long -purge_vmap_node(struct vmap_node *vn) +static void +reclaim_list_global(struct list_head *head) { - unsigned long num_purged_areas =3D 0; - struct vmap_area *va, *n_va; + struct vmap_area *va, *n; =20 - if (list_empty(&vn->purge_list)) - return 0; + if (list_empty(head)) + return; =20 spin_lock(&free_vmap_area_lock); + list_for_each_entry_safe(va, n, head, list) + merge_or_add_vmap_area_augment(va, + &free_vmap_area_root, &free_vmap_area_list); + spin_unlock(&free_vmap_area_lock); +} + +static void +decay_va_pool_node(struct vmap_node *vn, bool full_decay) +{ + struct vmap_area *va, *nva; + struct list_head decay_list; + struct rb_root decay_root; + unsigned long n_decay; + int i; + + decay_root =3D RB_ROOT; + INIT_LIST_HEAD(&decay_list); + + for (i =3D 0; i < MAX_VA_SIZE_PAGES; i++) { + struct list_head tmp_list; + + if (list_empty(&vn->pool[i].head)) + continue; + + INIT_LIST_HEAD(&tmp_list); + + /* Detach the pool, so no-one can access it. */ + spin_lock(&vn->pool_lock); + list_replace_init(&vn->pool[i].head, &tmp_list); + spin_unlock(&vn->pool_lock); + + if (full_decay) + WRITE_ONCE(vn->pool[i].len, 0); + + /* Decay a pool by ~25% out of left objects. */ + n_decay =3D vn->pool[i].len >> 2; + + list_for_each_entry_safe(va, nva, &tmp_list, list) { + list_del_init(&va->list); + merge_or_add_vmap_area(va, &decay_root, &decay_list); + + if (!full_decay) { + WRITE_ONCE(vn->pool[i].len, vn->pool[i].len - 1); + + if (!--n_decay) + break; + } + } + + /* Attach the pool back if it has been partly decayed. */ + if (!full_decay && !list_empty(&tmp_list)) { + spin_lock(&vn->pool_lock); + list_replace_init(&tmp_list, &vn->pool[i].head); + spin_unlock(&vn->pool_lock); + } + } + + reclaim_list_global(&decay_list); +} + +static void purge_vmap_node(struct work_struct *work) +{ + struct vmap_node *vn =3D container_of(work, + struct vmap_node, purge_work); + struct vmap_area *va, *n_va; + LIST_HEAD(local_list); + + vn->nr_purged =3D 0; + list_for_each_entry_safe(va, n_va, &vn->purge_list, list) { unsigned long nr =3D (va->va_end - va->va_start) >> PAGE_SHIFT; unsigned long orig_start =3D va->va_start; unsigned long orig_end =3D va->va_end; + unsigned int vn_id =3D decode_vn_id(va->flags); =20 - /* - * Finally insert or merge lazily-freed area. It is - * detached and there is no need to "unlink" it from - * anything. - */ - va =3D merge_or_add_vmap_area_augment(va, &free_vmap_area_root, - &free_vmap_area_list); - - if (!va) - continue; + list_del_init(&va->list); =20 if (is_vmalloc_or_module_addr((void *)orig_start)) kasan_release_vmalloc(orig_start, orig_end, va->va_start, va->va_end); =20 atomic_long_sub(nr, &vmap_lazy_nr); - num_purged_areas++; + vn->nr_purged++; + + if (is_vn_id_valid(vn_id) && !vn->skip_populate) + if (node_pool_add_va(vn, va)) + continue; + + /* Go back to global. */ + list_add(&va->list, &local_list); } - spin_unlock(&free_vmap_area_lock); =20 - return num_purged_areas; + reclaim_list_global(&local_list); } =20 /* * Purges all lazily-freed vmap areas. */ -static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end) +static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end, + bool full_pool_decay) { - unsigned long num_purged_areas =3D 0; + unsigned long nr_purged_areas =3D 0; + unsigned int nr_purge_helpers; + unsigned int nr_purge_nodes; struct vmap_node *vn; int i; =20 lockdep_assert_held(&vmap_purge_lock); + + /* + * Use cpumask to mark which node has to be processed. + */ purge_nodes =3D CPU_MASK_NONE; =20 for (i =3D 0; i < nr_vmap_nodes; i++) { vn =3D &vmap_nodes[i]; =20 INIT_LIST_HEAD(&vn->purge_list); + vn->skip_populate =3D full_pool_decay; + decay_va_pool_node(vn, full_pool_decay); =20 if (RB_EMPTY_ROOT(&vn->lazy.root)) continue; @@ -1845,17 +2102,45 @@ static bool __purge_vmap_area_lazy(unsigned long st= art, unsigned long end) cpumask_set_cpu(i, &purge_nodes); } =20 - if (cpumask_weight(&purge_nodes) > 0) { + nr_purge_nodes =3D cpumask_weight(&purge_nodes); + if (nr_purge_nodes > 0) { flush_tlb_kernel_range(start, end); =20 + /* One extra worker is per a lazy_max_pages() full set minus one. */ + nr_purge_helpers =3D atomic_long_read(&vmap_lazy_nr) / lazy_max_pages(); + nr_purge_helpers =3D clamp(nr_purge_helpers, 1U, nr_purge_nodes) - 1; + for_each_cpu(i, &purge_nodes) { - vn =3D &nodes[i]; - num_purged_areas +=3D purge_vmap_node(vn); + vn =3D &vmap_nodes[i]; + + if (nr_purge_helpers > 0) { + INIT_WORK(&vn->purge_work, purge_vmap_node); + + if (cpumask_test_cpu(i, cpu_online_mask)) + schedule_work_on(i, &vn->purge_work); + else + schedule_work(&vn->purge_work); + + nr_purge_helpers--; + } else { + vn->purge_work.func =3D NULL; + purge_vmap_node(&vn->purge_work); + nr_purged_areas +=3D vn->nr_purged; + } + } + + for_each_cpu(i, &purge_nodes) { + vn =3D &vmap_nodes[i]; + + if (vn->purge_work.func) { + flush_work(&vn->purge_work); + nr_purged_areas +=3D vn->nr_purged; + } } } =20 - trace_purge_vmap_area_lazy(start, end, num_purged_areas); - return num_purged_areas > 0; + trace_purge_vmap_area_lazy(start, end, nr_purged_areas); + return nr_purged_areas > 0; } =20 /* @@ -1866,14 +2151,14 @@ static void reclaim_and_purge_vmap_areas(void) { mutex_lock(&vmap_purge_lock); purge_fragmented_blocks_allcpus(); - __purge_vmap_area_lazy(ULONG_MAX, 0); + __purge_vmap_area_lazy(ULONG_MAX, 0, true); mutex_unlock(&vmap_purge_lock); } =20 static void drain_vmap_area_work(struct work_struct *work) { mutex_lock(&vmap_purge_lock); - __purge_vmap_area_lazy(ULONG_MAX, 0); + __purge_vmap_area_lazy(ULONG_MAX, 0, false); mutex_unlock(&vmap_purge_lock); } =20 @@ -1884,9 +2169,10 @@ static void drain_vmap_area_work(struct work_struct = *work) */ static void free_vmap_area_noflush(struct vmap_area *va) { - struct vmap_node *vn =3D addr_to_node(va->va_start); unsigned long nr_lazy_max =3D lazy_max_pages(); unsigned long va_start =3D va->va_start; + unsigned int vn_id =3D decode_vn_id(va->flags); + struct vmap_node *vn; unsigned long nr_lazy; =20 if (WARN_ON_ONCE(!list_empty(&va->list))) @@ -1896,10 +2182,14 @@ static void free_vmap_area_noflush(struct vmap_area= *va) PAGE_SHIFT, &vmap_lazy_nr); =20 /* - * Merge or place it to the purge tree/list. + * If it was request by a certain node we would like to + * return it to that node, i.e. its pool for later reuse. */ + vn =3D is_vn_id_valid(vn_id) ? + id_to_node(vn_id):addr_to_node(va->va_start); + spin_lock(&vn->lazy.lock); - merge_or_add_vmap_area(va, &vn->lazy.root, &vn->lazy.head); + insert_vmap_area(va, &vn->lazy.root, &vn->lazy.head); spin_unlock(&vn->lazy.lock); =20 trace_free_vmap_area_noflush(va_start, nr_lazy, nr_lazy_max); @@ -2408,7 +2698,7 @@ static void _vm_unmap_aliases(unsigned long start, un= signed long end, int flush) } free_purged_blocks(&purge_list); =20 - if (!__purge_vmap_area_lazy(start, end) && flush) + if (!__purge_vmap_area_lazy(start, end, false) && flush) flush_tlb_kernel_range(start, end); mutex_unlock(&vmap_purge_lock); } @@ -4576,7 +4866,7 @@ static void vmap_init_free_space(void) static void vmap_init_nodes(void) { struct vmap_node *vn; - int i; + int i, j; =20 for (i =3D 0; i < nr_vmap_nodes; i++) { vn =3D &vmap_nodes[i]; @@ -4587,6 +4877,13 @@ static void vmap_init_nodes(void) vn->lazy.root =3D RB_ROOT; INIT_LIST_HEAD(&vn->lazy.head); spin_lock_init(&vn->lazy.lock); + + for (j =3D 0; j < MAX_VA_SIZE_PAGES; j++) { + INIT_LIST_HEAD(&vn->pool[j].head); + WRITE_ONCE(vn->pool[j].len, 0); + } + + spin_lock_init(&vn->pool_lock); } } =20 --=20 2.39.2 From nobody Fri Dec 26 19:23:15 2025 Received: from mail-lf1-f42.google.com (mail-lf1-f42.google.com [209.85.167.42]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 8DFA7171AB for ; Tue, 2 Jan 2024 18:47:16 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=gmail.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=gmail.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=gmail.com header.i=@gmail.com header.b="FY6pwNo1" Received: by mail-lf1-f42.google.com with SMTP id 2adb3069b0e04-50e7e55c0f6so6464776e87.0 for ; Tue, 02 Jan 2024 10:47:16 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20230601; t=1704221235; x=1704826035; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=QZAvt7CkxqIe5cgK0fSSW+F0xApr58BOh4APGf1P6Lc=; b=FY6pwNo1H8WB+3cPzzwrc4urNP/9S3hQMHbQzgGC+zilS+ybIAbRohqzbwW1+8Kpo+ mWnB5z+cwIpvFA+R5Klm2bOtiLWocJO713gWJ+L12TJakTfozai0erqNoPPSpL09gLzz NohyRAclPYHbNEax6rwrCaPZfDFoHebuQ23zTUAzqn+Sk0BsbHNg+OsK+OFEI10TJjw9 TnP4oSvU7oMVeslq1ZXjk6ge6tpjwjT5NkkuvP6D6XuD2UxgYBdF5uu+ZyWhkjXl12hf 8ePwPoYaD8qOs9SrAkUb5l7xBeS1RHCk5otyUVg+20kKgG/FX6nSQDSr0z+po2/GRDgd UErw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1704221235; x=1704826035; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=QZAvt7CkxqIe5cgK0fSSW+F0xApr58BOh4APGf1P6Lc=; b=pELx/uhrkylsAVql7LAuPk/bgLIFWXbJtzpTtFgPocVE7cSPpkx+1koWZ2O3RpZgZt sNdEcU5fESXHgfYNCY0vCtK83E1pYlgItkoq4pnCb5zjngy28UOBGoIlAIi2XMTzF5Al zOQ9ow6XrTloB0cRVITeCV5N6WX8VFeBuS2geK+ngMT2s2nezfIHeFiaHOyQmLYEV3iP dHzUviWeEYmki4he+S6+5Q1tySRTHcx7B/YiqknLzQjwm1nqyOrGCEg7cVBJUwxMdTM5 aYiAc+ry4YG6/BLJmk5H7lQ58HcvZPtwXUUW31hgSteL6H0O4gciF/LsqapAA63LMLBo ao+g== X-Gm-Message-State: AOJu0YzmNwkr3RBea6CTzOG6mHWJMAKYawex+4dxr7WE1/i1f8p+OO93 sZ4bRgRjjE9CAQMiPGyoT3JXXJ1P67kcVQ== X-Google-Smtp-Source: AGHT+IG2+hyU4AcsC7fH/iKMYGpwIo7u9xULgZSVpY4T2CGFn4/x55AJtPvIKYTpeIpe7us9O7g2tw== X-Received: by 2002:ac2:520b:0:b0:50e:9a53:c22c with SMTP id a11-20020ac2520b000000b0050e9a53c22cmr794413lfl.126.1704221234559; Tue, 02 Jan 2024 10:47:14 -0800 (PST) Received: from pc638.lan (host-185-121-47-193.sydskane.nu. [185.121.47.193]) by smtp.gmail.com with ESMTPSA id q1-20020ac246e1000000b0050e7be886d9sm2592656lfo.56.2024.01.02.10.47.13 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 02 Jan 2024 10:47:14 -0800 (PST) From: "Uladzislau Rezki (Sony)" To: linux-mm@kvack.org, Andrew Morton Cc: LKML , Baoquan He , Lorenzo Stoakes , Christoph Hellwig , Matthew Wilcox , "Liam R . Howlett" , Dave Chinner , "Paul E . McKenney" , Joel Fernandes , Uladzislau Rezki , Oleksiy Avramchenko Subject: [PATCH v3 08/11] mm: vmalloc: Support multiple nodes in vread_iter Date: Tue, 2 Jan 2024 19:46:30 +0100 Message-Id: <20240102184633.748113-9-urezki@gmail.com> X-Mailer: git-send-email 2.39.2 In-Reply-To: <20240102184633.748113-1-urezki@gmail.com> References: <20240102184633.748113-1-urezki@gmail.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Extend the vread_iter() to be able to perform a sequential reading of VAs which are spread among multiple nodes. So a data read over the /dev/kmem correctly reflects a vmalloc memory layout. Reviewed-by: Baoquan He Signed-off-by: Uladzislau Rezki (Sony) --- mm/vmalloc.c | 67 +++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 53 insertions(+), 14 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index fa4ab2bbbc5b..594ed003d44d 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -906,7 +906,7 @@ unsigned long vmalloc_nr_pages(void) =20 /* Look up the first VA which satisfies addr < va_end, NULL if none. */ static struct vmap_area * -find_vmap_area_exceed_addr(unsigned long addr, struct rb_root *root) +__find_vmap_area_exceed_addr(unsigned long addr, struct rb_root *root) { struct vmap_area *va =3D NULL; struct rb_node *n =3D root->rb_node; @@ -930,6 +930,41 @@ find_vmap_area_exceed_addr(unsigned long addr, struct = rb_root *root) return va; } =20 +/* + * Returns a node where a first VA, that satisfies addr < va_end, resides. + * If success, a node is locked. A user is responsible to unlock it when a + * VA is no longer needed to be accessed. + * + * Returns NULL if nothing found. + */ +static struct vmap_node * +find_vmap_area_exceed_addr_lock(unsigned long addr, struct vmap_area **va) +{ + struct vmap_node *vn, *va_node =3D NULL; + struct vmap_area *va_lowest; + int i; + + for (i =3D 0; i < nr_vmap_nodes; i++) { + vn =3D &vmap_nodes[i]; + + spin_lock(&vn->busy.lock); + va_lowest =3D __find_vmap_area_exceed_addr(addr, &vn->busy.root); + if (va_lowest) { + if (!va_node || va_lowest->va_start < (*va)->va_start) { + if (va_node) + spin_unlock(&va_node->busy.lock); + + *va =3D va_lowest; + va_node =3D vn; + continue; + } + } + spin_unlock(&vn->busy.lock); + } + + return va_node; +} + static struct vmap_area *__find_vmap_area(unsigned long addr, struct rb_ro= ot *root) { struct rb_node *n =3D root->rb_node; @@ -4102,6 +4137,7 @@ long vread_iter(struct iov_iter *iter, const char *ad= dr, size_t count) struct vm_struct *vm; char *vaddr; size_t n, size, flags, remains; + unsigned long next; =20 addr =3D kasan_reset_tag(addr); =20 @@ -4111,19 +4147,15 @@ long vread_iter(struct iov_iter *iter, const char *= addr, size_t count) =20 remains =3D count; =20 - /* Hooked to node_0 so far. */ - vn =3D addr_to_node(0); - spin_lock(&vn->busy.lock); - - va =3D find_vmap_area_exceed_addr((unsigned long)addr, &vn->busy.root); - if (!va) + vn =3D find_vmap_area_exceed_addr_lock((unsigned long) addr, &va); + if (!vn) goto finished_zero; =20 /* no intersects with alive vmap_area */ if ((unsigned long)addr + remains <=3D va->va_start) goto finished_zero; =20 - list_for_each_entry_from(va, &vn->busy.head, list) { + do { size_t copied; =20 if (remains =3D=3D 0) @@ -4138,10 +4170,10 @@ long vread_iter(struct iov_iter *iter, const char *= addr, size_t count) WARN_ON(flags =3D=3D VMAP_BLOCK); =20 if (!vm && !flags) - continue; + goto next_va; =20 if (vm && (vm->flags & VM_UNINITIALIZED)) - continue; + goto next_va; =20 /* Pair with smp_wmb() in clear_vm_uninitialized_flag() */ smp_rmb(); @@ -4150,7 +4182,7 @@ long vread_iter(struct iov_iter *iter, const char *ad= dr, size_t count) size =3D vm ? get_vm_area_size(vm) : va_size(va); =20 if (addr >=3D vaddr + size) - continue; + goto next_va; =20 if (addr < vaddr) { size_t to_zero =3D min_t(size_t, vaddr - addr, remains); @@ -4179,15 +4211,22 @@ long vread_iter(struct iov_iter *iter, const char *= addr, size_t count) =20 if (copied !=3D n) goto finished; - } + + next_va: + next =3D va->va_end; + spin_unlock(&vn->busy.lock); + } while ((vn =3D find_vmap_area_exceed_addr_lock(next, &va))); =20 finished_zero: - spin_unlock(&vn->busy.lock); + if (vn) + spin_unlock(&vn->busy.lock); + /* zero-fill memory holes */ return count - remains + zero_iter(iter, remains); finished: /* Nothing remains, or We couldn't copy/zero everything. */ - spin_unlock(&vn->busy.lock); + if (vn) + spin_unlock(&vn->busy.lock); =20 return count - remains; } --=20 2.39.2 From nobody Fri Dec 26 19:23:15 2025 Received: from mail-lf1-f50.google.com (mail-lf1-f50.google.com [209.85.167.50]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 85C26171C8 for ; Tue, 2 Jan 2024 18:47:17 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=gmail.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=gmail.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=gmail.com header.i=@gmail.com header.b="Pz0PdIuB" Received: by mail-lf1-f50.google.com with SMTP id 2adb3069b0e04-50ea226bda8so574963e87.2 for ; Tue, 02 Jan 2024 10:47:17 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20230601; t=1704221235; x=1704826035; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=cMaatDyVMtexg7cRa9eaEuAm/eowYR09uugeWui4AYQ=; b=Pz0PdIuBjcbRTVOympFkruZ117s0tB/fGYZCU5oIHYXArAd3mvmxGFtUJQ10vPEUNR ccwQ7VZnHK/rlaNIft/CMY3QsK0A8LwxT14Vi9Q9Azg8e5mtR7RyArQNmEIhkTxy5Ksq 9YQlINWpGmdh4A8gxgs5w6d2oRJV69lJhfaEhwqY1E7iqgmTFpMLp0FX1U/NptFNm/sl Kh+EsLdclrnU/NbI83C0B8HKaNvIdDxgSLsJF7HXpdz++eeLthv9Hgjxers0/5NRYjdM SG7DREt4zpMHyNmPLOoaSWPdFs/XO1J3YhvvfFSRkXT5d9+ROUypgsiUmdJ56elPrQIA aRAQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1704221235; x=1704826035; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=cMaatDyVMtexg7cRa9eaEuAm/eowYR09uugeWui4AYQ=; b=kiQ3kPhpYoSkZXjDaw3QCqS5KbiPC2XZf5OdbKuUY3zbeC+uqWjUBh2i6agfW92Vei yCbFFHo5GsqCoMYyWdY7hqPH6wEXpU1cl4iKffaVFD790Ijywgz/Bl6DdgyjMCnpUMdc rMjUPjvk5m2S7d2ohIPemhv5Wi+OoSolGWBu/M6Q61tJlooflUqV8bV1ZXjyr1EXCTL5 KYEHGYEOLcqSLNUHI+XLrudae3HnHH68dGP24eHR1deOptTrXgdnzzPv6Lvj0RJJKjPj tJF6SJa9B4cSr6yzzAWC6Ir1wjgcSxZDwOyDpF7s3rXSrQzCoaRzNluW1H65guLP4eNN 1I9Q== X-Gm-Message-State: AOJu0YyTzO5i8hg5mCc2FyhDmVSb6pD37MXaN39GPa3LDwjHGw9VBHeW hSJXpz4gE3jSe22p6HsTmz0= X-Google-Smtp-Source: AGHT+IHL4Wmy2w8EjWfxM2Ec6BVxhpJGK0/FEzXHtHxP7HAyn5/e0Ve12sc1nQgdeQE352aAG5a4ag== X-Received: by 2002:a05:6512:3d07:b0:50e:73ac:a179 with SMTP id d7-20020a0565123d0700b0050e73aca179mr7225341lfv.91.1704221235497; Tue, 02 Jan 2024 10:47:15 -0800 (PST) Received: from pc638.lan (host-185-121-47-193.sydskane.nu. [185.121.47.193]) by smtp.gmail.com with ESMTPSA id q1-20020ac246e1000000b0050e7be886d9sm2592656lfo.56.2024.01.02.10.47.14 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 02 Jan 2024 10:47:15 -0800 (PST) From: "Uladzislau Rezki (Sony)" To: linux-mm@kvack.org, Andrew Morton Cc: LKML , Baoquan He , Lorenzo Stoakes , Christoph Hellwig , Matthew Wilcox , "Liam R . Howlett" , Dave Chinner , "Paul E . McKenney" , Joel Fernandes , Uladzislau Rezki , Oleksiy Avramchenko Subject: [PATCH v3 09/11] mm: vmalloc: Support multiple nodes in vmallocinfo Date: Tue, 2 Jan 2024 19:46:31 +0100 Message-Id: <20240102184633.748113-10-urezki@gmail.com> X-Mailer: git-send-email 2.39.2 In-Reply-To: <20240102184633.748113-1-urezki@gmail.com> References: <20240102184633.748113-1-urezki@gmail.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Allocated areas are spread among nodes, it implies that the scanning has to be performed individually of each node in order to dump all existing VAs. Signed-off-by: Uladzislau Rezki (Sony) --- mm/vmalloc.c | 120 ++++++++++++++++++++------------------------------- 1 file changed, 47 insertions(+), 73 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 594ed003d44d..0c671cb96151 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -4709,30 +4709,6 @@ bool vmalloc_dump_obj(void *object) #endif =20 #ifdef CONFIG_PROC_FS -static void *s_start(struct seq_file *m, loff_t *pos) -{ - struct vmap_node *vn =3D addr_to_node(0); - - mutex_lock(&vmap_purge_lock); - spin_lock(&vn->busy.lock); - - return seq_list_start(&vn->busy.head, *pos); -} - -static void *s_next(struct seq_file *m, void *p, loff_t *pos) -{ - struct vmap_node *vn =3D addr_to_node(0); - return seq_list_next(p, &vn->busy.head, pos); -} - -static void s_stop(struct seq_file *m, void *p) -{ - struct vmap_node *vn =3D addr_to_node(0); - - spin_unlock(&vn->busy.lock); - mutex_unlock(&vmap_purge_lock); -} - static void show_numa_info(struct seq_file *m, struct vm_struct *v) { if (IS_ENABLED(CONFIG_NUMA)) { @@ -4776,84 +4752,82 @@ static void show_purge_info(struct seq_file *m) } } =20 -static int s_show(struct seq_file *m, void *p) +static int vmalloc_info_show(struct seq_file *m, void *p) { struct vmap_node *vn; struct vmap_area *va; struct vm_struct *v; + int i; =20 - vn =3D addr_to_node(0); - va =3D list_entry(p, struct vmap_area, list); + for (i =3D 0; i < nr_vmap_nodes; i++) { + vn =3D &vmap_nodes[i]; =20 - if (!va->vm) { - if (va->flags & VMAP_RAM) - seq_printf(m, "0x%pK-0x%pK %7ld vm_map_ram\n", - (void *)va->va_start, (void *)va->va_end, - va->va_end - va->va_start); + spin_lock(&vn->busy.lock); + list_for_each_entry(va, &vn->busy.head, list) { + if (!va->vm) { + if (va->flags & VMAP_RAM) + seq_printf(m, "0x%pK-0x%pK %7ld vm_map_ram\n", + (void *)va->va_start, (void *)va->va_end, + va->va_end - va->va_start); =20 - goto final; - } + continue; + } =20 - v =3D va->vm; + v =3D va->vm; =20 - seq_printf(m, "0x%pK-0x%pK %7ld", - v->addr, v->addr + v->size, v->size); + seq_printf(m, "0x%pK-0x%pK %7ld", + v->addr, v->addr + v->size, v->size); =20 - if (v->caller) - seq_printf(m, " %pS", v->caller); + if (v->caller) + seq_printf(m, " %pS", v->caller); =20 - if (v->nr_pages) - seq_printf(m, " pages=3D%d", v->nr_pages); + if (v->nr_pages) + seq_printf(m, " pages=3D%d", v->nr_pages); =20 - if (v->phys_addr) - seq_printf(m, " phys=3D%pa", &v->phys_addr); + if (v->phys_addr) + seq_printf(m, " phys=3D%pa", &v->phys_addr); =20 - if (v->flags & VM_IOREMAP) - seq_puts(m, " ioremap"); + if (v->flags & VM_IOREMAP) + seq_puts(m, " ioremap"); =20 - if (v->flags & VM_ALLOC) - seq_puts(m, " vmalloc"); + if (v->flags & VM_ALLOC) + seq_puts(m, " vmalloc"); =20 - if (v->flags & VM_MAP) - seq_puts(m, " vmap"); + if (v->flags & VM_MAP) + seq_puts(m, " vmap"); =20 - if (v->flags & VM_USERMAP) - seq_puts(m, " user"); + if (v->flags & VM_USERMAP) + seq_puts(m, " user"); =20 - if (v->flags & VM_DMA_COHERENT) - seq_puts(m, " dma-coherent"); + if (v->flags & VM_DMA_COHERENT) + seq_puts(m, " dma-coherent"); =20 - if (is_vmalloc_addr(v->pages)) - seq_puts(m, " vpages"); + if (is_vmalloc_addr(v->pages)) + seq_puts(m, " vpages"); =20 - show_numa_info(m, v); - seq_putc(m, '\n'); + show_numa_info(m, v); + seq_putc(m, '\n'); + } + spin_unlock(&vn->busy.lock); + } =20 /* * As a final step, dump "unpurged" areas. */ -final: - if (list_is_last(&va->list, &vn->busy.head)) - show_purge_info(m); - + show_purge_info(m); return 0; } =20 -static const struct seq_operations vmalloc_op =3D { - .start =3D s_start, - .next =3D s_next, - .stop =3D s_stop, - .show =3D s_show, -}; - static int __init proc_vmalloc_init(void) { + void *priv_data =3D NULL; + if (IS_ENABLED(CONFIG_NUMA)) - proc_create_seq_private("vmallocinfo", 0400, NULL, - &vmalloc_op, - nr_node_ids * sizeof(unsigned int), NULL); - else - proc_create_seq("vmallocinfo", 0400, NULL, &vmalloc_op); + priv_data =3D kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL); + + proc_create_single_data("vmallocinfo", + 0400, NULL, vmalloc_info_show, priv_data); + return 0; } module_init(proc_vmalloc_init); --=20 2.39.2 From nobody Fri Dec 26 19:23:15 2025 Received: from mail-lf1-f43.google.com (mail-lf1-f43.google.com [209.85.167.43]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id AC716171D7 for ; Tue, 2 Jan 2024 18:47:18 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=gmail.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=gmail.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=gmail.com header.i=@gmail.com header.b="mbWj9Eqr" Received: by mail-lf1-f43.google.com with SMTP id 2adb3069b0e04-50e8ca6c76dso4334342e87.3 for ; Tue, 02 Jan 2024 10:47:18 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20230601; t=1704221237; x=1704826037; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=36KamwtZGiOJ+iCEiFn7yiYoyZmI7LhL5vNG1kBAh/Y=; b=mbWj9EqrC4HIMlsTfAsHEcbfwNCgTFrjYtO5o6KeiT7Cf+zUTUFJoAWwuTBFtR+TJJ KqIhYe1ydLVim3Dv6eFxyLlTq9Cc6BGrVYvpyBxJZib7ospyLZSDppFuer2cMB1bLKeI ZQw6aeOkgy2kK7c+lFTqFpWR5AXdRL+d7Pbj7rR9Ky63sMzDszrR6aNSVDZWgVvqN9YX FiFLuNz9TCh+UqfDTcahYbCgJbuk096HtB4dvFR2Vluz3gTh6Ce4WbvmK8uaOM9KjMBs Ss6djungNmtXXKzVRWBs75W5byPFaUAOljj5dmg8pd8rBFPHHf/HZTaIcvyCRAQudzUo y6fg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1704221237; x=1704826037; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=36KamwtZGiOJ+iCEiFn7yiYoyZmI7LhL5vNG1kBAh/Y=; b=LhZnG8iccmzhl02s3103AAT3gIhiV+hcWGzH0Q8vBRNY/4aLgBAvhhIsWJxKjShx/z oeBfj/a3jnIPv03S1C40qLY6PgUpLfr1YdC0Lqi31sYNa82pDDWBePfOliZHy5oXXVGd vCrRu0hQ8P5NCOCJnXQDufrbw6IgBb0Btp0vkHBHrAFUb3UTjYRY5RcjVusisUotuiJj IPBpDb2NZkNwqPA4MT+OzpBLUlnDisu0FkGh8fjLeiUn4Pna4jMLhbOJg+aO/VANw9GC W+fjsR8o1cuIw81t8V0T2fznw2jLLxWnUHN7LYyJT48ou/ApbvwJvZWzpdd1zeiz454V HCmQ== X-Gm-Message-State: AOJu0YxQORKfiTfv3AjWgKYZJ/Cw74bfF+K561fkAzVGKLkJUr/Zrk3P EFC54b3Zm/lIYzSLogLPRs8= X-Google-Smtp-Source: AGHT+IFi7xrYzx6xhsiq08yLfVLl/kXBKY0Xd49T3HSM7WNrgq2+gVmjHodohn+UrhXHc14kRcbU4A== X-Received: by 2002:ac2:5d67:0:b0:50e:383b:19bd with SMTP id h7-20020ac25d67000000b0050e383b19bdmr6365437lft.102.1704221236664; Tue, 02 Jan 2024 10:47:16 -0800 (PST) Received: from pc638.lan (host-185-121-47-193.sydskane.nu. [185.121.47.193]) by smtp.gmail.com with ESMTPSA id q1-20020ac246e1000000b0050e7be886d9sm2592656lfo.56.2024.01.02.10.47.15 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 02 Jan 2024 10:47:16 -0800 (PST) From: "Uladzislau Rezki (Sony)" To: linux-mm@kvack.org, Andrew Morton Cc: LKML , Baoquan He , Lorenzo Stoakes , Christoph Hellwig , Matthew Wilcox , "Liam R . Howlett" , Dave Chinner , "Paul E . McKenney" , Joel Fernandes , Uladzislau Rezki , Oleksiy Avramchenko Subject: [PATCH v3 10/11] mm: vmalloc: Set nr_nodes based on CPUs in a system Date: Tue, 2 Jan 2024 19:46:32 +0100 Message-Id: <20240102184633.748113-11-urezki@gmail.com> X-Mailer: git-send-email 2.39.2 In-Reply-To: <20240102184633.748113-1-urezki@gmail.com> References: <20240102184633.748113-1-urezki@gmail.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" A number of nodes which are used in the alloc/free paths is set based on num_possible_cpus() in a system. Please note a high limit threshold though is fixed and corresponds to 128 nodes. For 32-bit or single core systems an access to a global vmap heap is not balanced. Such small systems do not suffer from lock contentions due to low number of CPUs. In such case the nr_nodes is equal to 1. Test on AMD Ryzen Threadripper 3970X 32-Core Processor: sudo ./test_vmalloc.sh run_test_mask=3D7 nr_threads=3D64 94.41% 0.89% [kernel] [k] _raw_spin_lock 93.35% 93.07% [kernel] [k] native_queued_spin_lock_slowpath 76.13% 0.28% [kernel] [k] __vmalloc_node_range 72.96% 0.81% [kernel] [k] alloc_vmap_area 56.94% 0.00% [kernel] [k] __get_vm_area_node 41.95% 0.00% [kernel] [k] vmalloc 37.15% 0.01% [test_vmalloc] [k] full_fit_alloc_test 35.17% 0.00% [kernel] [k] ret_from_fork_asm 35.17% 0.00% [kernel] [k] ret_from_fork 35.17% 0.00% [kernel] [k] kthread 35.08% 0.00% [test_vmalloc] [k] test_func 34.45% 0.00% [test_vmalloc] [k] fix_size_alloc_test 28.09% 0.01% [test_vmalloc] [k] long_busy_list_alloc_test 23.53% 0.25% [kernel] [k] vfree.part.0 21.72% 0.00% [kernel] [k] remove_vm_area 20.08% 0.21% [kernel] [k] find_unlink_vmap_area 2.34% 0.61% [kernel] [k] free_vmap_area_noflush vs 82.32% 0.22% [test_vmalloc] [k] long_busy_list_alloc_test 63.36% 0.02% [kernel] [k] vmalloc 63.34% 2.64% [kernel] [k] __vmalloc_node_range 30.42% 4.46% [kernel] [k] vfree.part.0 28.98% 2.51% [kernel] [k] __alloc_pages_bulk 27.28% 0.19% [kernel] [k] __get_vm_area_node 26.13% 1.50% [kernel] [k] alloc_vmap_area 21.72% 21.67% [kernel] [k] clear_page_rep 19.51% 2.43% [kernel] [k] _raw_spin_lock 16.61% 16.51% [kernel] [k] native_queued_spin_lock_slowpath 13.40% 2.07% [kernel] [k] free_unref_page 10.62% 0.01% [kernel] [k] remove_vm_area 9.02% 8.73% [kernel] [k] insert_vmap_area 8.94% 0.00% [kernel] [k] ret_from_fork_asm 8.94% 0.00% [kernel] [k] ret_from_fork 8.94% 0.00% [kernel] [k] kthread 8.29% 0.00% [test_vmalloc] [k] test_func 7.81% 0.05% [test_vmalloc] [k] full_fit_alloc_test 5.30% 4.73% [kernel] [k] purge_vmap_node 4.47% 2.65% [kernel] [k] free_vmap_area_noflush confirms that a native_queued_spin_lock_slowpath goes down to 16.51% percent from 93.07%. The throughput is ~12x higher: urezki@pc638:~$ time sudo ./test_vmalloc.sh run_test_mask=3D7 nr_threads=3D= 64 Run the test with following parameters: run_test_mask=3D7 nr_threads=3D64 Done. Check the kernel ring buffer to see the summary. real 10m51.271s user 0m0.013s sys 0m0.187s urezki@pc638:~$ urezki@pc638:~$ time sudo ./test_vmalloc.sh run_test_mask=3D7 nr_threads=3D= 64 Run the test with following parameters: run_test_mask=3D7 nr_threads=3D64 Done. Check the kernel ring buffer to see the summary. real 0m51.301s user 0m0.015s sys 0m0.040s urezki@pc638:~$ Signed-off-by: Uladzislau Rezki (Sony) --- mm/vmalloc.c | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 0c671cb96151..ef534c76daef 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -4879,10 +4879,27 @@ static void vmap_init_free_space(void) static void vmap_init_nodes(void) { struct vmap_node *vn; - int i, j; + int i, n; + +#if BITS_PER_LONG =3D=3D 64 + /* A high threshold of max nodes is fixed and bound to 128. */ + n =3D clamp_t(unsigned int, num_possible_cpus(), 1, 128); + + if (n > 1) { + vn =3D kmalloc_array(n, sizeof(*vn), GFP_NOWAIT | __GFP_NOWARN); + if (vn) { + /* Node partition is 16 pages. */ + vmap_zone_size =3D (1 << 4) * PAGE_SIZE; + nr_vmap_nodes =3D n; + vmap_nodes =3D vn; + } else { + pr_err("Failed to allocate an array. Disable a node layer\n"); + } + } +#endif =20 - for (i =3D 0; i < nr_vmap_nodes; i++) { - vn =3D &vmap_nodes[i]; + for (n =3D 0; n < nr_vmap_nodes; n++) { + vn =3D &vmap_nodes[n]; vn->busy.root =3D RB_ROOT; INIT_LIST_HEAD(&vn->busy.head); spin_lock_init(&vn->busy.lock); @@ -4891,9 +4908,9 @@ static void vmap_init_nodes(void) INIT_LIST_HEAD(&vn->lazy.head); spin_lock_init(&vn->lazy.lock); =20 - for (j =3D 0; j < MAX_VA_SIZE_PAGES; j++) { - INIT_LIST_HEAD(&vn->pool[j].head); - WRITE_ONCE(vn->pool[j].len, 0); + for (i =3D 0; i < MAX_VA_SIZE_PAGES; i++) { + INIT_LIST_HEAD(&vn->pool[i].head); + WRITE_ONCE(vn->pool[i].len, 0); } =20 spin_lock_init(&vn->pool_lock); --=20 2.39.2 From nobody Fri Dec 26 19:23:15 2025 Received: from mail-lf1-f42.google.com (mail-lf1-f42.google.com [209.85.167.42]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 9FB861772F for ; Tue, 2 Jan 2024 18:47:19 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=gmail.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=gmail.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=gmail.com header.i=@gmail.com header.b="QqyNJBaF" Received: by mail-lf1-f42.google.com with SMTP id 2adb3069b0e04-50e7be1c65dso6281129e87.3 for ; Tue, 02 Jan 2024 10:47:19 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20230601; t=1704221238; x=1704826038; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=s9/T5K2ir2OJL/EqmsqvZQ3IpaLcsXDlXwLSIHXyRwY=; b=QqyNJBaF17QA4zJWhIzv8GdhcGwy4f1bXtAZCGdk6P7kYdAqHjznYBUttM5nINPnlV GKP1dQVag6KVzs8rClS3ibWcFkC3rGcDaY0TiylGhmIZ9m/4jTN5E3IA94/RQRBGPTwQ e4+q9qSauuA2k6oQf2ZyO750QcK53xwVq2rpf1BzGEUDkJMVHLtFA9xhQmJmKe5AIUha MbHQ8x+yLbXUBZtYFi6tr9/D1wDtFhWrC0lTLiDu5aLD7oeeJqRyB7L+CxvcWMKMNNit xnHuDZjlawjvFqPu6fldmU0o4GNjHjerZUWFrpJJ0d7kCAZhPvHXtH5gmLoGIocDc3Sx wV2A== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1704221238; x=1704826038; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=s9/T5K2ir2OJL/EqmsqvZQ3IpaLcsXDlXwLSIHXyRwY=; b=eL9X3sdiw+2PRaxxiga9v6t8jBAcD23Mw1NuYPegMLI8E0pRZ7XPTIqvyoTwUG5exv r615S7va/GgYLvaEs6TCnGtvmEABmnKE8mVyewDXhyGUpKMbPRb+Ta8OwVhu1+fiehW+ oVH/TnBU1ca7uzfBjC8/jl3k56gzO/xKUDnuG1eu6IxkFtEbB4Y/36bn+aow00jmFnPv cpLAo/REq3qKgWpYnoTeUQAMN05N4jl+IFv5Ya90EdpLULbra1OC/bB5sm0Za8YOepBd j1AcrPagFgQwXWVDWqI6tluHVnlmtTBcUb3UTVNK4eJLOsWJM7tiEPoNpe6YBqe0GcJL 2CfQ== X-Gm-Message-State: AOJu0Yz13Es96AYP+CdVEqSmQc6P6VsfFLABt/0qZzSOoZiXi2AQy2qj JgFRgVMddKkMeAI9X0fQqwQ= X-Google-Smtp-Source: AGHT+IHRbxitAW0kY1SyrAVZNdrjS0E3Q6aU8WIW/EO9WbSw23NrRoV+q5pHLLcc7N5Pm8J6+ROD3A== X-Received: by 2002:ac2:5fae:0:b0:50e:5f99:21f2 with SMTP id s14-20020ac25fae000000b0050e5f9921f2mr6671072lfe.37.1704221237681; Tue, 02 Jan 2024 10:47:17 -0800 (PST) Received: from pc638.lan (host-185-121-47-193.sydskane.nu. [185.121.47.193]) by smtp.gmail.com with ESMTPSA id q1-20020ac246e1000000b0050e7be886d9sm2592656lfo.56.2024.01.02.10.47.16 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 02 Jan 2024 10:47:17 -0800 (PST) From: "Uladzislau Rezki (Sony)" To: linux-mm@kvack.org, Andrew Morton Cc: LKML , Baoquan He , Lorenzo Stoakes , Christoph Hellwig , Matthew Wilcox , "Liam R . Howlett" , Dave Chinner , "Paul E . McKenney" , Joel Fernandes , Uladzislau Rezki , Oleksiy Avramchenko Subject: [PATCH v3 11/11] mm: vmalloc: Add a shrinker to drain vmap pools Date: Tue, 2 Jan 2024 19:46:33 +0100 Message-Id: <20240102184633.748113-12-urezki@gmail.com> X-Mailer: git-send-email 2.39.2 In-Reply-To: <20240102184633.748113-1-urezki@gmail.com> References: <20240102184633.748113-1-urezki@gmail.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" The added shrinker is used to return back current cached VAs into a global vmap space, when a system enters into a low memory mode. Signed-off-by: Uladzislau Rezki (Sony) --- mm/vmalloc.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index ef534c76daef..e30dabf68263 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -4917,8 +4917,37 @@ static void vmap_init_nodes(void) } } =20 +static unsigned long +vmap_node_shrink_count(struct shrinker *shrink, struct shrink_control *sc) +{ + unsigned long count; + struct vmap_node *vn; + int i, j; + + for (count =3D 0, i =3D 0; i < nr_vmap_nodes; i++) { + vn =3D &vmap_nodes[i]; + + for (j =3D 0; j < MAX_VA_SIZE_PAGES; j++) + count +=3D READ_ONCE(vn->pool[j].len); + } + + return count ? count : SHRINK_EMPTY; +} + +static unsigned long +vmap_node_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) +{ + int i; + + for (i =3D 0; i < nr_vmap_nodes; i++) + decay_va_pool_node(&vmap_nodes[i], true); + + return SHRINK_STOP; +} + void __init vmalloc_init(void) { + struct shrinker *vmap_node_shrinker; struct vmap_area *va; struct vmap_node *vn; struct vm_struct *tmp; @@ -4966,4 +4995,14 @@ void __init vmalloc_init(void) */ vmap_init_free_space(); vmap_initialized =3D true; + + vmap_node_shrinker =3D shrinker_alloc(0, "vmap-node"); + if (!vmap_node_shrinker) { + pr_err("Failed to allocate vmap-node shrinker!\n"); + return; + } + + vmap_node_shrinker->count_objects =3D vmap_node_shrink_count; + vmap_node_shrinker->scan_objects =3D vmap_node_shrink_scan; + shrinker_register(vmap_node_shrinker); } --=20 2.39.2