From nobody Mon Feb 9 11:26:53 2026 Received: from out-180.mta1.migadu.com (out-180.mta1.migadu.com [95.215.58.180]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 923C91EB1BC; Sun, 20 Apr 2025 15:59:27 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=95.215.58.180 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1745164770; cv=none; b=nB5/J+LCIOL1s1hVVQAezENYwv7EmMKx4h6PtJFvprPV61E6oiPOkDNPEQ9klZDu14cZV+tU/LoizOhcN6sGKsZBjSqcqT05zQhnxdMYUfgCU0D8pTbusjGU8uyJEB7nul3eKT9UX+IZeJyjSE4EgEsyNJmoaFWdFuUaTtn0428= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1745164770; c=relaxed/simple; bh=93kECPvpfHLvi3qDs25Onj8GT4ggPIYfsb3O6TfKzmA=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=RXXSUGBr1zrAyPWi0x0jm1MyAd9RDq01F+IEm+eNcdeyL5EZllXSx3aul0q+Yg24WSXG9xYDHR/DAzSO6X2EhSq9TkHaa0Y8KAE/1MXUWVyrYmb6PCIBA2apNmmye7RvZqkp52oHWNg4b51V31a8HN3xiHiNF5AzyHsnzLmVDmA= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev; spf=pass smtp.mailfrom=linux.dev; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b=kUIZORBi; arc=none smtp.client-ip=95.215.58.180 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=linux.dev Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b="kUIZORBi" X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and include these headers. DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.dev; s=key1; t=1745164765; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=l0cdqSd9eqgxjb0DRh6oAeCxxzKsmrIexabjD/n3lQo=; b=kUIZORBiJnEt2TOQoAnwsjOq5Q+olLJUhaeywBijv98iOmQRgBiOgNO9kMa6Kzmx4oW7+C t05IJILh8t9qiiaG9MNza1cLGXQf8Ou3vL8YXEaeBry6HACjkwAQw9D/kxoygb2D1npktU KRew8Hnvo9P9k8r3CecDDkwIbBRbNSA= From: Kent Overstreet To: linux-bcachefs@vger.kernel.org, linux-fsdevel@vger.kernel.org, linux-hardening@vger.kernel.org, linux-kernel@vger.kernel.org Cc: Kent Overstreet Subject: [PATCH 1/3] bcachefs: enumerated_ref.c Date: Sun, 20 Apr 2025 11:59:14 -0400 Message-ID: <20250420155918.749455-2-kent.overstreet@linux.dev> In-Reply-To: <20250420155918.749455-1-kent.overstreet@linux.dev> References: <20250420155918.749455-1-kent.overstreet@linux.dev> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Migadu-Flow: FLOW_OUT Content-Type: text/plain; charset="utf-8" Factor out the debug code for rw filesystem refs into a small library. In release mode an enumerated ref is a normal percpu refcount, but in debug mode all enumerated users of the ref get their own atomic_long_t ref - making it much easier to chase down refcount usage bugs for when a refcount has many users. For debugging, we have enumerated_ref_to_text(), which prints the current value of each different user. Additionally, in debug mode enumerated_ref_stop() has a 10 second timeout, after which it will dump outstanding refcounts. Signed-off-by: Kent Overstreet --- fs/bcachefs/Makefile | 1 + fs/bcachefs/enumerated_ref.c | 144 +++++++++++++++++++++++++++++ fs/bcachefs/enumerated_ref.h | 54 +++++++++++ fs/bcachefs/enumerated_ref_types.h | 19 ++++ 4 files changed, 218 insertions(+) create mode 100644 fs/bcachefs/enumerated_ref.c create mode 100644 fs/bcachefs/enumerated_ref.h create mode 100644 fs/bcachefs/enumerated_ref_types.h diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile index baf859bf83bb..3be39845e4f6 100644 --- a/fs/bcachefs/Makefile +++ b/fs/bcachefs/Makefile @@ -35,6 +35,7 @@ bcachefs-y :=3D \ disk_accounting.o \ disk_groups.o \ ec.o \ + enumerated_ref.o \ errcode.o \ error.o \ extents.o \ diff --git a/fs/bcachefs/enumerated_ref.c b/fs/bcachefs/enumerated_ref.c new file mode 100644 index 000000000000..56ab430f209f --- /dev/null +++ b/fs/bcachefs/enumerated_ref.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bcachefs.h" +#include "enumerated_ref.h" +#include "util.h" + +#include + +#ifdef ENUMERATED_REF_DEBUG +void enumerated_ref_get(struct enumerated_ref *ref, unsigned idx) +{ + BUG_ON(idx >=3D ref->nr); + atomic_long_inc(&ref->refs[idx]); +} + +bool __enumerated_ref_tryget(struct enumerated_ref *ref, unsigned idx) +{ + BUG_ON(idx >=3D ref->nr); + return atomic_long_inc_not_zero(&ref->refs[idx]); +} + +bool enumerated_ref_tryget(struct enumerated_ref *ref, unsigned idx) +{ + BUG_ON(idx >=3D ref->nr); + return !ref->dying && + atomic_long_inc_not_zero(&ref->refs[idx]); +} + +void enumerated_ref_put(struct enumerated_ref *ref, unsigned idx) +{ + BUG_ON(idx >=3D ref->nr); + long v =3D atomic_long_dec_return(&ref->refs[idx]); + + BUG_ON(v < 0); + if (v) + return; + + for (unsigned i =3D 0; i < ref->nr; i++) + if (atomic_long_read(&ref->refs[i])) + return; + + if (ref->stop_fn) + ref->stop_fn(ref); + complete(&ref->stop_complete); +} +#endif + +#ifndef ENUMERATED_REF_DEBUG +static void enumerated_ref_kill_cb(struct percpu_ref *percpu_ref) +{ + struct enumerated_ref *ref =3D + container_of(percpu_ref, struct enumerated_ref, ref); + + if (ref->stop_fn) + ref->stop_fn(ref); + complete(&ref->stop_complete); +} +#endif + +void enumerated_ref_stop_async(struct enumerated_ref *ref) +{ + reinit_completion(&ref->stop_complete); + +#ifndef ENUMERATED_REF_DEBUG + percpu_ref_kill(&ref->ref); +#else + ref->dying =3D true; + for (unsigned i =3D 0; i < ref->nr; i++) + enumerated_ref_put(ref, i); +#endif +} + +void enumerated_ref_stop(struct enumerated_ref *ref, + const char * const names[]) +{ + enumerated_ref_stop_async(ref); + while (!wait_for_completion_timeout(&ref->stop_complete, HZ * 10)) { + struct printbuf buf =3D PRINTBUF; + + prt_str(&buf, "Waited for 10 seconds to shutdown enumerated ref\n"); + prt_str(&buf, "Outstanding refs:\n"); + enumerated_ref_to_text(&buf, ref, names); + printk(KERN_ERR "%s", buf.buf); + printbuf_exit(&buf); + } +} + +void enumerated_ref_start(struct enumerated_ref *ref) +{ +#ifndef ENUMERATED_REF_DEBUG + percpu_ref_reinit(&ref->ref); +#else + ref->dying =3D false; + for (unsigned i =3D 0; i < ref->nr; i++) { + BUG_ON(atomic_long_read(&ref->refs[i])); + atomic_long_inc(&ref->refs[i]); + } +#endif +} + +void enumerated_ref_exit(struct enumerated_ref *ref) +{ +#ifndef ENUMERATED_REF_DEBUG + percpu_ref_exit(&ref->ref); +#else + kfree(ref->refs); + ref->refs =3D NULL; + ref->nr =3D 0; +#endif +} + +int enumerated_ref_init(struct enumerated_ref *ref, unsigned nr, + void (*stop_fn)(struct enumerated_ref *)) +{ + init_completion(&ref->stop_complete); + ref->stop_fn =3D stop_fn; + +#ifndef ENUMERATED_REF_DEBUG + return percpu_ref_init(&ref->ref, enumerated_ref_kill_cb, + PERCPU_REF_INIT_DEAD, GFP_KERNEL); +#else + ref->refs =3D kzalloc(sizeof(ref->refs[0]) * nr, GFP_KERNEL); + if (!ref->refs) + return -ENOMEM; + + ref->nr =3D nr; + return 0; +#endif +} + +void enumerated_ref_to_text(struct printbuf *out, + struct enumerated_ref *ref, + const char * const names[]) +{ +#ifdef ENUMERATED_REF_DEBUG + bch2_printbuf_tabstop_push(out, 32); + + for (unsigned i =3D 0; i < ref->nr; i++) + prt_printf(out, "%s\t%li\n", names[i], + atomic_long_read(&ref->refs[i])); +#else + prt_str(out, "(not in debug mode)\n"); +#endif +} diff --git a/fs/bcachefs/enumerated_ref.h b/fs/bcachefs/enumerated_ref.h new file mode 100644 index 000000000000..6d2283cf298d --- /dev/null +++ b/fs/bcachefs/enumerated_ref.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_ENUMERATED_REF_H +#define _BCACHEFS_ENUMERATED_REF_H + +#include "enumerated_ref_types.h" + +/* + * A refcount where the users are enumerated: in debug mode, we create sep= ate + * refcounts for each user, to make leaks and refcount errors easy to track + * down: + */ + +#ifdef ENUMERATED_REF_DEBUG +void enumerated_ref_get(struct enumerated_ref *, unsigned); +bool __enumerated_ref_tryget(struct enumerated_ref *, unsigned); +bool enumerated_ref_tryget(struct enumerated_ref *, unsigned); +void enumerated_ref_put(struct enumerated_ref *, unsigned); +#else + +static inline void enumerated_ref_get(struct enumerated_ref *ref, unsigned= idx) +{ + percpu_ref_get(&ref->ref); +} + +static inline bool __enumerated_ref_tryget(struct enumerated_ref *ref, uns= igned idx) +{ + return percpu_ref_tryget(&ref->ref); +} + +static inline bool enumerated_ref_tryget(struct enumerated_ref *ref, unsig= ned idx) +{ + return percpu_ref_tryget_live(&ref->ref); +} + +static inline void enumerated_ref_put(struct enumerated_ref *ref, unsigned= idx) +{ + percpu_ref_put(&ref->ref); +} +#endif + +void enumerated_ref_stop_async(struct enumerated_ref *); +void enumerated_ref_stop(struct enumerated_ref *, const char * const[]); +void enumerated_ref_start(struct enumerated_ref *); + +void enumerated_ref_exit(struct enumerated_ref *); +int enumerated_ref_init(struct enumerated_ref *, unsigned, + void (*stop_fn)(struct enumerated_ref *)); + +struct printbuf; +void enumerated_ref_to_text(struct printbuf *, + struct enumerated_ref *, + const char * const[]); + +#endif /* _BCACHEFS_ENUMERATED_REF_H */ diff --git a/fs/bcachefs/enumerated_ref_types.h b/fs/bcachefs/enumerated_re= f_types.h new file mode 100644 index 000000000000..0e6076f466d3 --- /dev/null +++ b/fs/bcachefs/enumerated_ref_types.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_ENUMERATED_REF_TYPES_H +#define _BCACHEFS_ENUMERATED_REF_TYPES_H + +#include + +struct enumerated_ref { +#ifdef ENUMERATED_REF_DEBUG + unsigned nr; + bool dying; + atomic_long_t *refs; +#else + struct percpu_ref ref; +#endif + void (*stop_fn)(struct enumerated_ref *); + struct completion stop_complete; +}; + +#endif /* _BCACHEFS_ENUMERATED_REF_TYPES_H */ --=20 2.49.0 From nobody Mon Feb 9 11:26:53 2026 Received: from out-176.mta1.migadu.com (out-176.mta1.migadu.com [95.215.58.176]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id A6DEE20B814 for ; Sun, 20 Apr 2025 15:59:28 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=95.215.58.176 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1745164772; cv=none; b=cyBTRUKi0Cx98Oa5rEaJbQMxxVW+fEulDUHv92tPAjAVMzDQ+gF77ht5z2WumKfXu6hzBOatSLhZv/Cx70MMzA5mquT/tl7bpXdfuOzTQ3vdC9gYogOmBnS5gaxcs+BIHelbDJmHAm/X5NKwgsctQgPGzRVad83iclP/aaXCuOY= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1745164772; c=relaxed/simple; bh=YJTA2WCu73KiUaMXC8PXQPYN2Tfrij0nfx8H+WdxTWc=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=ME0mVktiWlnAZF3+x++U7WFj1dFrPFsUJRAKn+UcRSlzSDLnzoZDwew9vmUO7CdTMG/3M31528QdkgNbxU6MTiZ6zv5+mqUaTtgg0zfJM+JBMH+MJX/Hd9p0f65RYrkEZzpbEE7CtGA12/SjRY+pJL8X7MDuZQUAee3B1Y8Nwas= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev; spf=pass smtp.mailfrom=linux.dev; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b=noHw/2Mz; arc=none smtp.client-ip=95.215.58.176 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=linux.dev Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b="noHw/2Mz" X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and include these headers. DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.dev; s=key1; t=1745164766; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=0hCKXlrHde8qXSd4LIsPBgn+QzDqDWcN7QcvO+ZGVpU=; b=noHw/2Mzdc9AdTAcY3mVgsoQtjWhoTS+GH22kclRbBTTJCFb7iLW47rSwjoaFAC8M76uzP pftbApbvDSzYKlK2k05iwCJaCMcDGE2+HQbVXd4oUcLyxb93gxjSe6WMI65Jfv0vDSoDUO D2ODPQXgnB/zlVeI1xO0xjM5244wnyE= From: Kent Overstreet To: linux-bcachefs@vger.kernel.org, linux-fsdevel@vger.kernel.org, linux-hardening@vger.kernel.org, linux-kernel@vger.kernel.org Cc: Kent Overstreet Subject: [PATCH 2/3] bcachefs: bch_fs.writes -> enumerated_refs Date: Sun, 20 Apr 2025 11:59:15 -0400 Message-ID: <20250420155918.749455-3-kent.overstreet@linux.dev> In-Reply-To: <20250420155918.749455-1-kent.overstreet@linux.dev> References: <20250420155918.749455-1-kent.overstreet@linux.dev> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Migadu-Flow: FLOW_OUT Content-Type: text/plain; charset="utf-8" Drop the single-purpose write ref code in bcachefs.h, and convert to enumarated refs. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 23 ++++++------ fs/bcachefs/bcachefs.h | 57 ++--------------------------- fs/bcachefs/btree_gc.c | 7 ++-- fs/bcachefs/btree_io.c | 7 ++-- fs/bcachefs/btree_trans_commit.c | 5 ++- fs/bcachefs/btree_update_interior.c | 7 ++-- fs/bcachefs/btree_write_buffer.c | 11 +++--- fs/bcachefs/ec.c | 13 ++++--- fs/bcachefs/fs-io-direct.c | 7 ++-- fs/bcachefs/fs-io.c | 9 +++-- fs/bcachefs/io_read.c | 7 ++-- fs/bcachefs/io_write.c | 5 ++- fs/bcachefs/journal.c | 5 ++- fs/bcachefs/reflink.c | 5 ++- fs/bcachefs/snapshot.c | 7 ++-- fs/bcachefs/subvolume.c | 7 ++-- fs/bcachefs/super.c | 32 ++++------------ fs/bcachefs/sysfs.c | 26 +++---------- 18 files changed, 87 insertions(+), 153 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 6156c18b3347..c7e50b1835ed 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -17,6 +17,7 @@ #include "debug.h" #include "disk_accounting.h" #include "ec.h" +#include "enumerated_ref.h" #include "error.h" #include "lru.h" #include "recovery.h" @@ -1381,7 +1382,7 @@ static void check_discard_freespace_key_work(struct w= ork_struct *work) container_of(work, struct check_discard_freespace_key_async, work); =20 bch2_trans_do(w->c, bch2_recheck_discard_freespace_key(trans, w->pos)); - bch2_write_ref_put(w->c, BCH_WRITE_REF_check_discard_freespace_key); + enumerated_ref_put(&w->c->writes, BCH_WRITE_REF_check_discard_freespace_k= ey); kfree(w); } =20 @@ -1458,7 +1459,7 @@ int bch2_check_discard_freespace_key(struct btree_tra= ns *trans, struct btree_ite if (!w) goto out; =20 - if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_check_discard_freespace_key)= ) { + if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_check_discard_frees= pace_key)) { kfree(w); goto out; } @@ -1953,14 +1954,14 @@ static void bch2_do_discards_work(struct work_struc= t *work) bch2_err_str(ret)); =20 percpu_ref_put(&ca->io_ref[WRITE]); - bch2_write_ref_put(c, BCH_WRITE_REF_discard); + enumerated_ref_put(&c->writes, BCH_WRITE_REF_discard); } =20 void bch2_dev_do_discards(struct bch_dev *ca) { struct bch_fs *c =3D ca->fs; =20 - if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard)) + if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_discard)) return; =20 if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE)) @@ -1971,7 +1972,7 @@ void bch2_dev_do_discards(struct bch_dev *ca) =20 percpu_ref_put(&ca->io_ref[WRITE]); put_write_ref: - bch2_write_ref_put(c, BCH_WRITE_REF_discard); + enumerated_ref_put(&c->writes, BCH_WRITE_REF_discard); } =20 void bch2_do_discards(struct bch_fs *c) @@ -2048,7 +2049,7 @@ static void bch2_do_discards_fast_work(struct work_st= ruct *work) =20 bch2_trans_put(trans); percpu_ref_put(&ca->io_ref[WRITE]); - bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast); + enumerated_ref_put(&c->writes, BCH_WRITE_REF_discard_fast); } =20 static void bch2_discard_one_bucket_fast(struct bch_dev *ca, u64 bucket) @@ -2058,7 +2059,7 @@ static void bch2_discard_one_bucket_fast(struct bch_d= ev *ca, u64 bucket) if (discard_in_flight_add(ca, bucket, false)) return; =20 - if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard_fast)) + if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_discard_fast)) return; =20 if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE)) @@ -2069,7 +2070,7 @@ static void bch2_discard_one_bucket_fast(struct bch_d= ev *ca, u64 bucket) =20 percpu_ref_put(&ca->io_ref[WRITE]); put_ref: - bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast); + enumerated_ref_put(&c->writes, BCH_WRITE_REF_discard_fast); } =20 static int invalidate_one_bp(struct btree_trans *trans, @@ -2263,14 +2264,14 @@ static void bch2_do_invalidates_work(struct work_st= ruct *work) bch2_trans_put(trans); percpu_ref_put(&ca->io_ref[WRITE]); bch2_bkey_buf_exit(&last_flushed, c); - bch2_write_ref_put(c, BCH_WRITE_REF_invalidate); + enumerated_ref_put(&c->writes, BCH_WRITE_REF_invalidate); } =20 void bch2_dev_do_invalidates(struct bch_dev *ca) { struct bch_fs *c =3D ca->fs; =20 - if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_invalidate)) + if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_invalidate)) return; =20 if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE)) @@ -2281,7 +2282,7 @@ void bch2_dev_do_invalidates(struct bch_dev *ca) =20 percpu_ref_put(&ca->io_ref[WRITE]); put_ref: - bch2_write_ref_put(c, BCH_WRITE_REF_invalidate); + enumerated_ref_put(&c->writes, BCH_WRITE_REF_invalidate); } =20 void bch2_do_invalidates(struct bch_fs *c) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 56a71c7eb256..d00c320fe973 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -220,7 +220,7 @@ #include "util.h" =20 #ifdef CONFIG_BCACHEFS_DEBUG -#define BCH_WRITE_REF_DEBUG +#define ENUMERATED_REF_DEBUG #endif =20 #ifndef dynamic_fault @@ -484,6 +484,7 @@ enum bch_time_stats { #include "clock_types.h" #include "disk_groups_types.h" #include "ec_types.h" +#include "enumerated_ref_types.h" #include "journal_types.h" #include "keylist_types.h" #include "quota_types.h" @@ -734,11 +735,7 @@ struct bch_fs { struct rw_semaphore state_lock; =20 /* Counts outstanding writes, for clean transition to read-only */ -#ifdef BCH_WRITE_REF_DEBUG - atomic_long_t writes[BCH_WRITE_REF_NR]; -#else - struct percpu_ref writes; -#endif + struct enumerated_ref writes; /* * Certain operations are only allowed in single threaded mode, during * recovery, and we want to assert that this is the case: @@ -1123,54 +1120,6 @@ struct bch_fs { =20 extern struct wait_queue_head bch2_read_only_wait; =20 -static inline void bch2_write_ref_get(struct bch_fs *c, enum bch_write_ref= ref) -{ -#ifdef BCH_WRITE_REF_DEBUG - atomic_long_inc(&c->writes[ref]); -#else - percpu_ref_get(&c->writes); -#endif -} - -static inline bool __bch2_write_ref_tryget(struct bch_fs *c, enum bch_writ= e_ref ref) -{ -#ifdef BCH_WRITE_REF_DEBUG - return !test_bit(BCH_FS_going_ro, &c->flags) && - atomic_long_inc_not_zero(&c->writes[ref]); -#else - return percpu_ref_tryget(&c->writes); -#endif -} - -static inline bool bch2_write_ref_tryget(struct bch_fs *c, enum bch_write_= ref ref) -{ -#ifdef BCH_WRITE_REF_DEBUG - return !test_bit(BCH_FS_going_ro, &c->flags) && - atomic_long_inc_not_zero(&c->writes[ref]); -#else - return percpu_ref_tryget_live(&c->writes); -#endif -} - -static inline void bch2_write_ref_put(struct bch_fs *c, enum bch_write_ref= ref) -{ -#ifdef BCH_WRITE_REF_DEBUG - long v =3D atomic_long_dec_return(&c->writes[ref]); - - BUG_ON(v < 0); - if (v) - return; - for (unsigned i =3D 0; i < BCH_WRITE_REF_NR; i++) - if (atomic_long_read(&c->writes[i])) - return; - - set_bit(BCH_FS_write_disable_complete, &c->flags); - wake_up(&bch2_read_only_wait); -#else - percpu_ref_put(&c->writes); -#endif -} - static inline bool bch2_ro_ref_tryget(struct bch_fs *c) { if (test_bit(BCH_FS_stopping, &c->flags)) diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 2824a6e87712..9db8f5f8745c 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -22,6 +22,7 @@ #include "debug.h" #include "disk_accounting.h" #include "ec.h" +#include "enumerated_ref.h" #include "error.h" #include "extents.h" #include "journal.h" @@ -1233,14 +1234,14 @@ static void bch2_gc_gens_work(struct work_struct *w= ork) { struct bch_fs *c =3D container_of(work, struct bch_fs, gc_gens_work); bch2_gc_gens(c); - bch2_write_ref_put(c, BCH_WRITE_REF_gc_gens); + enumerated_ref_put(&c->writes, BCH_WRITE_REF_gc_gens); } =20 void bch2_gc_gens_async(struct bch_fs *c) { - if (bch2_write_ref_tryget(c, BCH_WRITE_REF_gc_gens) && + if (enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_gc_gens) && !queue_work(c->write_ref_wq, &c->gc_gens_work)) - bch2_write_ref_put(c, BCH_WRITE_REF_gc_gens); + enumerated_ref_put(&c->writes, BCH_WRITE_REF_gc_gens); } =20 void bch2_fs_btree_gc_init_early(struct bch_fs *c) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 134d6eca9852..fb20146d78d5 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -13,6 +13,7 @@ #include "buckets.h" #include "checksum.h" #include "debug.h" +#include "enumerated_ref.h" #include "error.h" #include "extents.h" #include "io_write.h" @@ -1935,7 +1936,7 @@ static void btree_node_scrub_work(struct work_struct = *work) btree_bounce_free(c, c->opts.btree_node_size, scrub->used_mempool, scrub-= >buf); percpu_ref_put(&scrub->ca->io_ref[READ]); kfree(scrub); - bch2_write_ref_put(c, BCH_WRITE_REF_btree_node_scrub); + enumerated_ref_put(&c->writes, BCH_WRITE_REF_btree_node_scrub); } =20 static void btree_node_scrub_endio(struct bio *bio) @@ -1954,7 +1955,7 @@ int bch2_btree_node_scrub(struct btree_trans *trans, =20 struct bch_fs *c =3D trans->c; =20 - if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_btree_node_scrub)) + if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_btree_node_scrub)) return -BCH_ERR_erofs_no_writes; =20 struct extent_ptr_decoded pick; @@ -2004,7 +2005,7 @@ int bch2_btree_node_scrub(struct btree_trans *trans, btree_bounce_free(c, c->opts.btree_node_size, used_mempool, buf); percpu_ref_put(&ca->io_ref[READ]); err: - bch2_write_ref_put(c, BCH_WRITE_REF_btree_node_scrub); + enumerated_ref_put(&c->writes, BCH_WRITE_REF_btree_node_scrub); return ret; } =20 diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_com= mit.c index 4297d8b5eddd..cdde769e7da3 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -11,6 +11,7 @@ #include "btree_write_buffer.h" #include "buckets.h" #include "disk_accounting.h" +#include "enumerated_ref.h" #include "errcode.h" #include "error.h" #include "journal.h" @@ -994,7 +995,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsi= gned flags) goto out_reset; =20 if (!(flags & BCH_TRANS_COMMIT_no_check_rw) && - unlikely(!bch2_write_ref_tryget(c, BCH_WRITE_REF_trans))) { + unlikely(!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_trans))) { if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags))) ret =3D do_bch2_trans_commit_to_journal_replay(trans); else @@ -1060,7 +1061,7 @@ int __bch2_trans_commit(struct btree_trans *trans, un= signed flags) trace_and_count(c, transaction_commit, trans, _RET_IP_); out: if (likely(!(flags & BCH_TRANS_COMMIT_no_check_rw))) - bch2_write_ref_put(c, BCH_WRITE_REF_trans); + enumerated_ref_put(&c->writes, BCH_WRITE_REF_trans); out_reset: if (!ret) bch2_trans_downgrade(trans); diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update= _interior.c index a0ff2dc0aa91..fcca8b01dd62 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -14,6 +14,7 @@ #include "btree_locking.h" #include "buckets.h" #include "clock.h" +#include "enumerated_ref.h" #include "error.h" #include "extents.h" #include "io_write.h" @@ -2326,7 +2327,7 @@ static void async_btree_node_rewrite_work(struct work= _struct *work) closure_wake_up(&c->btree_node_rewrites_wait); =20 bch2_bkey_buf_exit(&a->key, c); - bch2_write_ref_put(c, BCH_WRITE_REF_node_rewrite); + enumerated_ref_put(&c->writes, BCH_WRITE_REF_node_rewrite); kfree(a); } =20 @@ -2348,7 +2349,7 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, = struct btree *b) =20 spin_lock(&c->btree_node_rewrites_lock); if (c->curr_recovery_pass > BCH_RECOVERY_PASS_journal_replay && - bch2_write_ref_tryget(c, BCH_WRITE_REF_node_rewrite)) { + enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_node_rewrite)) { list_add(&a->list, &c->btree_node_rewrites); now =3D true; } else if (!test_bit(BCH_FS_may_go_rw, &c->flags)) { @@ -2387,7 +2388,7 @@ void bch2_do_pending_node_rewrites(struct bch_fs *c) if (!a) break; =20 - bch2_write_ref_get(c, BCH_WRITE_REF_node_rewrite); + enumerated_ref_get(&c->writes, BCH_WRITE_REF_node_rewrite); queue_work(c->btree_node_rewrite_worker, &a->work); } } diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buf= fer.c index 68ab48af40f0..0094e4342b69 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -7,6 +7,7 @@ #include "btree_update_interior.h" #include "btree_write_buffer.h" #include "disk_accounting.h" +#include "enumerated_ref.h" #include "error.h" #include "extents.h" #include "journal.h" @@ -629,11 +630,11 @@ int bch2_btree_write_buffer_tryflush(struct btree_tra= ns *trans) { struct bch_fs *c =3D trans->c; =20 - if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_btree_write_buffer)) + if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_btree_write_buffer)) return -BCH_ERR_erofs_no_writes; =20 int ret =3D bch2_btree_write_buffer_flush_nocheck_rw(trans); - bch2_write_ref_put(c, BCH_WRITE_REF_btree_write_buffer); + enumerated_ref_put(&c->writes, BCH_WRITE_REF_btree_write_buffer); return ret; } =20 @@ -692,7 +693,7 @@ static void bch2_btree_write_buffer_flush_work(struct w= ork_struct *work) } while (!ret && bch2_btree_write_buffer_should_flush(c)); mutex_unlock(&wb->flushing.lock); =20 - bch2_write_ref_put(c, BCH_WRITE_REF_btree_write_buffer); + enumerated_ref_put(&c->writes, BCH_WRITE_REF_btree_write_buffer); } =20 static void wb_accounting_sort(struct btree_write_buffer *wb) @@ -821,9 +822,9 @@ int bch2_journal_keys_to_write_buffer_end(struct bch_fs= *c, struct journal_keys_ bch2_journal_pin_drop(&c->journal, &dst->wb->pin); =20 if (bch2_btree_write_buffer_should_flush(c) && - __bch2_write_ref_tryget(c, BCH_WRITE_REF_btree_write_buffer) && + __enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_btree_write_buffer)= && !queue_work(system_unbound_wq, &c->btree_write_buffer.flush_work)) - bch2_write_ref_put(c, BCH_WRITE_REF_btree_write_buffer); + enumerated_ref_put(&c->writes, BCH_WRITE_REF_btree_write_buffer); =20 if (dst->wb =3D=3D &wb->flushing) mutex_unlock(&wb->flushing.lock); diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 42600370ffb0..b4d78c0ca221 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -16,6 +16,7 @@ #include "disk_accounting.h" #include "disk_groups.h" #include "ec.h" +#include "enumerated_ref.h" #include "error.h" #include "io_read.h" #include "io_write.h" @@ -1017,14 +1018,14 @@ static void ec_stripe_delete_work(struct work_struc= t *work) BCH_TRANS_COMMIT_no_enospc, ({ ec_stripe_delete(trans, lru_k.k->p.offset); }))); - bch2_write_ref_put(c, BCH_WRITE_REF_stripe_delete); + enumerated_ref_put(&c->writes, BCH_WRITE_REF_stripe_delete); } =20 void bch2_do_stripe_deletes(struct bch_fs *c) { - if (bch2_write_ref_tryget(c, BCH_WRITE_REF_stripe_delete) && + if (enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_stripe_delete) && !queue_work(c->write_ref_wq, &c->ec_stripe_delete_work)) - bch2_write_ref_put(c, BCH_WRITE_REF_stripe_delete); + enumerated_ref_put(&c->writes, BCH_WRITE_REF_stripe_delete); } =20 /* stripe creation: */ @@ -1418,15 +1419,15 @@ static void ec_stripe_create_work(struct work_struc= t *work) while ((s =3D get_pending_stripe(c))) ec_stripe_create(s); =20 - bch2_write_ref_put(c, BCH_WRITE_REF_stripe_create); + enumerated_ref_put(&c->writes, BCH_WRITE_REF_stripe_create); } =20 void bch2_ec_do_stripe_creates(struct bch_fs *c) { - bch2_write_ref_get(c, BCH_WRITE_REF_stripe_create); + enumerated_ref_get(&c->writes, BCH_WRITE_REF_stripe_create); =20 if (!queue_work(system_long_wq, &c->ec_stripe_create_work)) - bch2_write_ref_put(c, BCH_WRITE_REF_stripe_create); + enumerated_ref_put(&c->writes, BCH_WRITE_REF_stripe_create); } =20 static void ec_stripe_new_set_pending(struct bch_fs *c, struct ec_stripe_h= ead *h) diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c index 535bc5fcbcc0..1f5154d9676b 100644 --- a/fs/bcachefs/fs-io-direct.c +++ b/fs/bcachefs/fs-io-direct.c @@ -3,6 +3,7 @@ =20 #include "bcachefs.h" #include "alloc_foreground.h" +#include "enumerated_ref.h" #include "fs.h" #include "fs-io.h" #include "fs-io-direct.h" @@ -401,7 +402,7 @@ static __always_inline long bch2_dio_write_done(struct = dio_write *dio) ret =3D dio->op.error ?: ((long) dio->written << 9); bio_put(&dio->op.wbio.bio); =20 - bch2_write_ref_put(c, BCH_WRITE_REF_dio_write); + enumerated_ref_put(&c->writes, BCH_WRITE_REF_dio_write); =20 /* inode->i_dio_count is our ref on inode and thus bch_fs */ inode_dio_end(&inode->v); @@ -606,7 +607,7 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov= _iter *iter) prefetch(&inode->ei_inode); prefetch((void *) &inode->ei_inode + 64); =20 - if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_dio_write)) + if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_dio_write)) return -EROFS; =20 inode_lock(&inode->v); @@ -675,7 +676,7 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov= _iter *iter) bio_put(bio); inode_dio_end(&inode->v); err_put_write_ref: - bch2_write_ref_put(c, BCH_WRITE_REF_dio_write); + enumerated_ref_put(&c->writes, BCH_WRITE_REF_dio_write); goto out; } =20 diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 409bba396bad..a84b1baf02d8 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -7,6 +7,7 @@ #include "btree_update.h" #include "buckets.h" #include "clock.h" +#include "enumerated_ref.h" #include "error.h" #include "extents.h" #include "extent_update.h" @@ -205,7 +206,7 @@ static int bch2_flush_inode(struct bch_fs *c, if (c->opts.journal_flush_disabled) return 0; =20 - if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_fsync)) + if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_fsync)) return -EROFS; =20 u64 seq; @@ -213,7 +214,7 @@ static int bch2_flush_inode(struct bch_fs *c, bch2_get_inode_journal_seq_trans(trans, inode_inum(inode), &seq)) ?: bch2_journal_flush_seq(&c->journal, seq, TASK_INTERRUPTIBLE) ?: bch2_inode_flush_nocow_writes(c, inode); - bch2_write_ref_put(c, BCH_WRITE_REF_fsync); + enumerated_ref_put(&c->writes, BCH_WRITE_REF_fsync); return ret; } =20 @@ -796,7 +797,7 @@ long bch2_fallocate_dispatch(struct file *file, int mod= e, struct bch_fs *c =3D inode->v.i_sb->s_fs_info; long ret; =20 - if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_fallocate)) + if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_fallocate)) return -EROFS; =20 inode_lock(&inode->v); @@ -820,7 +821,7 @@ long bch2_fallocate_dispatch(struct file *file, int mod= e, err: bch2_pagecache_block_put(inode); inode_unlock(&inode->v); - bch2_write_ref_put(c, BCH_WRITE_REF_fallocate); + enumerated_ref_put(&c->writes, BCH_WRITE_REF_fallocate); =20 return bch2_err_class(ret); } diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 36108b1fcd44..680f4eeea52a 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -17,6 +17,7 @@ #include "data_update.h" #include "disk_groups.h" #include "ec.h" +#include "enumerated_ref.h" #include "error.h" #include "io_read.h" #include "io_misc.h" @@ -162,7 +163,7 @@ static noinline void promote_free(struct bch_read_bio *= rbio) =20 bch2_data_update_exit(&op->write); =20 - bch2_write_ref_put(c, BCH_WRITE_REF_promote); + enumerated_ref_put(&c->writes, BCH_WRITE_REF_promote); kfree_rcu(op, rcu); } =20 @@ -227,7 +228,7 @@ static struct bch_read_bio *__promote_alloc(struct btre= e_trans *trans, return NULL; } =20 - if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_promote)) + if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_promote)) return ERR_PTR(-BCH_ERR_nopromote_no_writes); =20 struct promote_op *op =3D kzalloc(sizeof(*op), GFP_KERNEL); @@ -278,7 +279,7 @@ static struct bch_read_bio *__promote_alloc(struct btre= e_trans *trans, /* We may have added to the rhashtable and thus need rcu freeing: */ kfree_rcu(op, rcu); err_put: - bch2_write_ref_put(c, BCH_WRITE_REF_promote); + enumerated_ref_put(&c->writes, BCH_WRITE_REF_promote); return ERR_PTR(ret); } =20 diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index 4dabff3ac1be..755169c4e0e0 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -15,6 +15,7 @@ #include "compress.h" #include "debug.h" #include "ec.h" +#include "enumerated_ref.h" #include "error.h" #include "extent_update.h" #include "inode.h" @@ -512,7 +513,7 @@ static void bch2_write_done(struct closure *cl) bch2_disk_reservation_put(c, &op->res); =20 if (!(op->flags & BCH_WRITE_move)) - bch2_write_ref_put(c, BCH_WRITE_REF_write); + enumerated_ref_put(&c->writes, BCH_WRITE_REF_write); bch2_keylist_free(&op->insert_keys, op->inline_keys); =20 EBUG_ON(cl->parent); @@ -1660,7 +1661,7 @@ CLOSURE_CALLBACK(bch2_write) } =20 if (!(op->flags & BCH_WRITE_move) && - !bch2_write_ref_tryget(c, BCH_WRITE_REF_write)) { + !enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_write)) { op->error =3D -BCH_ERR_erofs_no_writes; goto err; } diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index e1cd6e8e37cf..e2c95192a577 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -12,6 +12,7 @@ #include "btree_update.h" #include "btree_write_buffer.h" #include "buckets.h" +#include "enumerated_ref.h" #include "error.h" #include "journal.h" #include "journal_io.h" @@ -989,11 +990,11 @@ int bch2_journal_meta(struct journal *j) { struct bch_fs *c =3D container_of(j, struct bch_fs, journal); =20 - if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_journal)) + if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_journal)) return -BCH_ERR_erofs_no_writes; =20 int ret =3D __bch2_journal_meta(j); - bch2_write_ref_put(c, BCH_WRITE_REF_journal); + enumerated_ref_put(&c->writes, BCH_WRITE_REF_journal); return ret; } =20 diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index 710178e3da4c..3a13dbcab6ba 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -3,6 +3,7 @@ #include "bkey_buf.h" #include "btree_update.h" #include "buckets.h" +#include "enumerated_ref.h" #include "error.h" #include "extents.h" #include "inode.h" @@ -610,7 +611,7 @@ s64 bch2_remap_range(struct bch_fs *c, !bch2_request_incompat_feature(c, bcachefs_metadata_version_reflink_p_ma= y_update_opts); int ret =3D 0, ret2 =3D 0; =20 - if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_reflink)) + if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_reflink)) return -BCH_ERR_erofs_no_writes; =20 bch2_check_set_feature(c, BCH_FEATURE_reflink); @@ -761,7 +762,7 @@ s64 bch2_remap_range(struct bch_fs *c, bch2_bkey_buf_exit(&new_src, c); bch2_bkey_buf_exit(&new_dst, c); =20 - bch2_write_ref_put(c, BCH_WRITE_REF_reflink); + enumerated_ref_put(&c->writes, BCH_WRITE_REF_reflink); =20 return dst_done ?: ret ?: ret2; } diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 2eede851572c..14ea09ccee37 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -6,6 +6,7 @@ #include "btree_key_cache.h" #include "btree_update.h" #include "buckets.h" +#include "enumerated_ref.h" #include "errcode.h" #include "error.h" #include "fs.h" @@ -1661,18 +1662,18 @@ void bch2_delete_dead_snapshots_work(struct work_st= ruct *work) set_worker_desc("bcachefs-delete-dead-snapshots/%s", c->name); =20 bch2_delete_dead_snapshots(c); - bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots); + enumerated_ref_put(&c->writes, BCH_WRITE_REF_delete_dead_snapshots); } =20 void bch2_delete_dead_snapshots_async(struct bch_fs *c) { - if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_delete_dead_snapshots)) + if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_delete_dead_snapshot= s)) return; =20 BUG_ON(!test_bit(BCH_FS_may_go_rw, &c->flags)); =20 if (!queue_work(c->write_ref_wq, &c->snapshot_delete_work)) - bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots); + enumerated_ref_put(&c->writes, BCH_WRITE_REF_delete_dead_snapshots); } =20 int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans, diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index 1b9fb60c05be..39376c87b4a6 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -3,6 +3,7 @@ #include "bcachefs.h" #include "btree_key_cache.h" #include "btree_update.h" +#include "enumerated_ref.h" #include "errcode.h" #include "error.h" #include "fs.h" @@ -517,7 +518,7 @@ static void bch2_subvolume_wait_for_pagecache_and_delet= e(struct work_struct *wor darray_exit(&s); } =20 - bch2_write_ref_put(c, BCH_WRITE_REF_snapshot_delete_pagecache); + enumerated_ref_put(&c->writes, BCH_WRITE_REF_snapshot_delete_pagecache); } =20 struct subvolume_unlink_hook { @@ -540,11 +541,11 @@ static int bch2_subvolume_wait_for_pagecache_and_dele= te_hook(struct btree_trans if (ret) return ret; =20 - if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_snapshot_delete_pagecache)) + if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_snapshot_delete_page= cache)) return -EROFS; =20 if (!queue_work(c->write_ref_wq, &c->snapshot_wait_for_pagecache_and_dele= te_work)) - bch2_write_ref_put(c, BCH_WRITE_REF_snapshot_delete_pagecache); + enumerated_ref_put(&c->writes, BCH_WRITE_REF_snapshot_delete_pagecache); return 0; } =20 diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 78f683c6b490..288e00779fff 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -28,6 +28,7 @@ #include "disk_accounting.h" #include "disk_groups.h" #include "ec.h" +#include "enumerated_ref.h" #include "errcode.h" #include "error.h" #include "fs.h" @@ -311,15 +312,13 @@ static void __bch2_fs_read_only(struct bch_fs *c) } } =20 -#ifndef BCH_WRITE_REF_DEBUG -static void bch2_writes_disabled(struct percpu_ref *writes) +static void bch2_writes_disabled(struct enumerated_ref *writes) { struct bch_fs *c =3D container_of(writes, struct bch_fs, writes); =20 set_bit(BCH_FS_write_disable_complete, &c->flags); wake_up(&bch2_read_only_wait); } -#endif =20 void bch2_fs_read_only(struct bch_fs *c) { @@ -337,12 +336,7 @@ void bch2_fs_read_only(struct bch_fs *c) * writes will return -EROFS: */ set_bit(BCH_FS_going_ro, &c->flags); -#ifndef BCH_WRITE_REF_DEBUG - percpu_ref_kill(&c->writes); -#else - for (unsigned i =3D 0; i < BCH_WRITE_REF_NR; i++) - bch2_write_ref_put(c, i); -#endif + enumerated_ref_stop_async(&c->writes); =20 /* * If we're not doing an emergency shutdown, we want to wait on @@ -525,14 +519,8 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool= early) set_bit(BCH_FS_rw, &c->flags); set_bit(BCH_FS_was_rw, &c->flags); =20 -#ifndef BCH_WRITE_REF_DEBUG - percpu_ref_reinit(&c->writes); -#else - for (unsigned i =3D 0; i < BCH_WRITE_REF_NR; i++) { - BUG_ON(atomic_long_read(&c->writes[i])); - atomic_long_inc(&c->writes[i]); - } -#endif + enumerated_ref_start(&c->writes); + if (!early) { ret =3D bch2_fs_read_write_late(c); if (ret) @@ -629,9 +617,7 @@ static void __bch2_fs_free(struct bch_fs *c) mempool_exit(&c->btree_bounce_pool); bioset_exit(&c->btree_bio); mempool_exit(&c->fill_iter); -#ifndef BCH_WRITE_REF_DEBUG - percpu_ref_exit(&c->writes); -#endif + enumerated_ref_exit(&c->writes); kfree(rcu_dereference_protected(c->disk_groups, 1)); kfree(c->journal_seq_blacklist_table); =20 @@ -978,10 +964,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb,= struct bch_opts opts, =20 if (!(c->btree_read_complete_wq =3D alloc_workqueue("bcachefs_btree_read_= complete", WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 512)) || -#ifndef BCH_WRITE_REF_DEBUG - percpu_ref_init(&c->writes, bch2_writes_disabled, - PERCPU_REF_INIT_DEAD, GFP_KERNEL) || -#endif + enumerated_ref_init(&c->writes, BCH_WRITE_REF_NR, + bch2_writes_disabled) || mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) || bioset_init(&c->btree_bio, 1, max(offsetof(struct btree_read_bio, bio), diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index 455c6ae9a494..58be32bbd49c 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -25,6 +25,7 @@ #include "disk_accounting.h" #include "disk_groups.h" #include "ec.h" +#include "enumerated_ref.h" #include "inode.h" #include "journal.h" #include "journal_reclaim.h" @@ -177,8 +178,6 @@ read_attribute(btree_reserve_cache); read_attribute(open_buckets); read_attribute(open_buckets_partial); read_attribute(nocow_lock_table); - -#ifdef BCH_WRITE_REF_DEBUG read_attribute(write_refs); =20 static const char * const bch2_write_refs[] =3D { @@ -188,15 +187,6 @@ static const char * const bch2_write_refs[] =3D { NULL }; =20 -static void bch2_write_refs_to_text(struct printbuf *out, struct bch_fs *c) -{ - bch2_printbuf_tabstop_push(out, 24); - - for (unsigned i =3D 0; i < ARRAY_SIZE(c->writes); i++) - prt_printf(out, "%s\t%li\n", bch2_write_refs[i], atomic_long_read(&c->wr= ites[i])); -} -#endif - read_attribute(internal_uuid); read_attribute(disk_groups); =20 @@ -481,10 +471,8 @@ SHOW(bch2_fs) if (attr =3D=3D &sysfs_moving_ctxts) bch2_fs_moving_ctxts_to_text(out, c); =20 -#ifdef BCH_WRITE_REF_DEBUG if (attr =3D=3D &sysfs_write_refs) - bch2_write_refs_to_text(out, c); -#endif + enumerated_ref_to_text(out, &c->writes, bch2_write_refs); =20 if (attr =3D=3D &sysfs_nocow_lock_table) bch2_nocow_locks_to_text(out, &c->nocow_locks); @@ -517,7 +505,7 @@ STORE(bch2_fs) if (attr =3D=3D &sysfs_trigger_btree_updates) queue_work(c->btree_interior_update_worker, &c->btree_interior_update_wo= rk); =20 - if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_sysfs)) + if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_sysfs)) return -EROFS; =20 if (attr =3D=3D &sysfs_trigger_btree_cache_shrink) { @@ -577,7 +565,7 @@ STORE(bch2_fs) size =3D ret; } #endif - bch2_write_ref_put(c, BCH_WRITE_REF_sysfs); + enumerated_ref_put(&c->writes, BCH_WRITE_REF_sysfs); return size; } SYSFS_OPS(bch2_fs); @@ -670,9 +658,7 @@ struct attribute *bch2_fs_internal_files[] =3D { &sysfs_new_stripes, &sysfs_open_buckets, &sysfs_open_buckets_partial, -#ifdef BCH_WRITE_REF_DEBUG &sysfs_write_refs, -#endif &sysfs_nocow_lock_table, &sysfs_io_timers_read, &sysfs_io_timers_write, @@ -738,7 +724,7 @@ static ssize_t sysfs_opt_store(struct bch_fs *c, * We don't need to take c->writes for correctness, but it eliminates an * unsightly error message in the dmesg log when we're RO: */ - if (unlikely(!bch2_write_ref_tryget(c, BCH_WRITE_REF_sysfs))) + if (unlikely(!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_sysfs))) return -EROFS; =20 char *tmp =3D kstrdup(buf, GFP_KERNEL); @@ -765,7 +751,7 @@ static ssize_t sysfs_opt_store(struct bch_fs *c, =20 ret =3D size; err: - bch2_write_ref_put(c, BCH_WRITE_REF_sysfs); + enumerated_ref_put(&c->writes, BCH_WRITE_REF_sysfs); return ret; } =20 --=20 2.49.0 From nobody Mon Feb 9 11:26:53 2026 Received: from out-176.mta1.migadu.com (out-176.mta1.migadu.com [95.215.58.176]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 60CF320B81B; Sun, 20 Apr 2025 15:59:29 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=95.215.58.176 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1745164772; cv=none; b=fE/ogaId/XRMOBT52OPAyIO+mM3LS82xsYeRqNDURxmSX7PsfMi023KPikfwvOxjdU/ygo0x8qa1sq3cYei4FqNmyMogu3bfxMkMKN4+5sH0WcGQDCgRzfMwWhWxds1F8QBHTxgs+D+zMOZmpV5JA2buKRzq6Lqf6OtD5x9g838= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1745164772; c=relaxed/simple; bh=luRxqSNkqDg7iiDhDaiRBlyF0UiesDjcFQl3Q4GUtiM=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=C3NylDfuOgXJANeAreoElWXCakrqS0Dw6aRIaxfNWOUL5kX08dI9ooLYnOKjDFxCpAv3wsD8MI7ZXTZXKuIPocbR3Azusb+HSxO4UZ5chihSiOeYkQOFWmleUULrA81+daqIKOX8CYqLztfcsgdbqDcrLxjRY8FlVxXq/M0L+wc= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev; spf=pass smtp.mailfrom=linux.dev; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b=mhminj2I; arc=none smtp.client-ip=95.215.58.176 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=linux.dev Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b="mhminj2I" X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and include these headers. DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.dev; s=key1; t=1745164767; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=Cx9DRbM3zBHy5tm6FaIz7q1iDeYYK2z5AiDAN6M/468=; b=mhminj2IX81+OoXJiFWln4aaJoxS8WR/1wlgn9QixwR/OEQWhZSMCHlPEP3DZWcIA1OOf7 ksbcemC9NJhICNS7VkZgIDdP7ciCPNB7LR1C21aeDNVRB54mteEWf2zsmAj3xocCFGkgNV mGL7C32wzHjmc4VjWywpWMX/f0IlgpE= From: Kent Overstreet To: linux-bcachefs@vger.kernel.org, linux-fsdevel@vger.kernel.org, linux-hardening@vger.kernel.org, linux-kernel@vger.kernel.org Cc: Kent Overstreet Subject: [PATCH 3/3] bcachefs: bch_dev.io_ref -> enumerated_ref Date: Sun, 20 Apr 2025 11:59:16 -0400 Message-ID: <20250420155918.749455-4-kent.overstreet@linux.dev> In-Reply-To: <20250420155918.749455-1-kent.overstreet@linux.dev> References: <20250420155918.749455-1-kent.overstreet@linux.dev> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Migadu-Flow: FLOW_OUT Content-Type: text/plain; charset="utf-8" Convert device IO refs to enumerated_refs, for easier debugging of refcount issues. Simple conversion: enumerate all users and convert to the new helpers. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 18 +++---- fs/bcachefs/backpointers.c | 6 ++- fs/bcachefs/bcachefs.h | 48 +++++++++++++++++- fs/bcachefs/btree_io.c | 22 ++++---- fs/bcachefs/btree_node_scan.c | 10 ++-- fs/bcachefs/buckets.c | 4 +- fs/bcachefs/debug.c | 12 +++-- fs/bcachefs/ec.c | 19 ++++--- fs/bcachefs/enumerated_ref.h | 12 +++++ fs/bcachefs/fs-io.c | 6 ++- fs/bcachefs/io_read.c | 10 ++-- fs/bcachefs/io_write.c | 15 ++++-- fs/bcachefs/journal.c | 5 +- fs/bcachefs/journal_io.c | 15 +++--- fs/bcachefs/journal_reclaim.c | 2 +- fs/bcachefs/sb-members.h | 32 ++++++------ fs/bcachefs/super-io.c | 18 +++---- fs/bcachefs/super.c | 93 ++++++++++++++++++---------------- fs/bcachefs/super.h | 3 ++ fs/bcachefs/sysfs.c | 22 ++++---- 20 files changed, 236 insertions(+), 136 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index c7e50b1835ed..37cbbf86fe3c 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -1953,7 +1953,7 @@ static void bch2_do_discards_work(struct work_struct = *work) trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discard= ed, bch2_err_str(ret)); =20 - percpu_ref_put(&ca->io_ref[WRITE]); + enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_dev_do_discards); enumerated_ref_put(&c->writes, BCH_WRITE_REF_discard); } =20 @@ -1964,13 +1964,13 @@ void bch2_dev_do_discards(struct bch_dev *ca) if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_discard)) return; =20 - if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE)) + if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE, BCH_DEV_WRITE_REF_dev_do_d= iscards)) goto put_write_ref; =20 if (queue_work(c->write_ref_wq, &ca->discard_work)) return; =20 - percpu_ref_put(&ca->io_ref[WRITE]); + enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_dev_do_discards); put_write_ref: enumerated_ref_put(&c->writes, BCH_WRITE_REF_discard); } @@ -2048,7 +2048,7 @@ static void bch2_do_discards_fast_work(struct work_st= ruct *work) trace_discard_buckets_fast(c, s.seen, s.open, s.need_journal_commit, s.di= scarded, bch2_err_str(ret)); =20 bch2_trans_put(trans); - percpu_ref_put(&ca->io_ref[WRITE]); + enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_discard_one_buck= et_fast); enumerated_ref_put(&c->writes, BCH_WRITE_REF_discard_fast); } =20 @@ -2062,13 +2062,13 @@ static void bch2_discard_one_bucket_fast(struct bch= _dev *ca, u64 bucket) if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_discard_fast)) return; =20 - if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE)) + if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE, BCH_DEV_WRITE_REF_discard_= one_bucket_fast)) goto put_ref; =20 if (queue_work(c->write_ref_wq, &ca->discard_fast_work)) return; =20 - percpu_ref_put(&ca->io_ref[WRITE]); + enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_discard_one_buck= et_fast); put_ref: enumerated_ref_put(&c->writes, BCH_WRITE_REF_discard_fast); } @@ -2262,8 +2262,8 @@ static void bch2_do_invalidates_work(struct work_stru= ct *work) bch2_trans_iter_exit(trans, &iter); err: bch2_trans_put(trans); - percpu_ref_put(&ca->io_ref[WRITE]); bch2_bkey_buf_exit(&last_flushed, c); + enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_do_invalidates); enumerated_ref_put(&c->writes, BCH_WRITE_REF_invalidate); } =20 @@ -2274,13 +2274,13 @@ void bch2_dev_do_invalidates(struct bch_dev *ca) if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_invalidate)) return; =20 - if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE)) + if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE, BCH_DEV_WRITE_REF_do_inval= idates)) goto put_ref; =20 if (queue_work(c->write_ref_wq, &ca->invalidate_work)) return; =20 - percpu_ref_put(&ca->io_ref[WRITE]); + enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_do_invalidates); put_ref: enumerated_ref_put(&c->writes, BCH_WRITE_REF_invalidate); } diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index ff26bb515150..b870eedf4fd7 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -437,7 +437,8 @@ static int check_extent_checksum(struct btree_trans *tr= ans, =20 bytes =3D p.crc.compressed_size << 9; =20 - struct bch_dev *ca =3D bch2_dev_get_ioref(c, dev, READ); + struct bch_dev *ca =3D bch2_dev_get_ioref(c, dev, READ, + BCH_DEV_READ_REF_check_extent_checksums); if (!ca) return false; =20 @@ -474,7 +475,8 @@ static int check_extent_checksum(struct btree_trans *tr= ans, if (bio) bio_put(bio); kvfree(data_buf); - percpu_ref_put(&ca->io_ref[READ]); + enumerated_ref_put(&ca->io_ref[READ], + BCH_DEV_READ_REF_check_extent_checksums); printbuf_exit(&buf); return ret; } diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index d00c320fe973..3ed5ec5949b2 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -517,6 +517,51 @@ struct discard_in_flight { u64 bucket:63; }; =20 +#define BCH_DEV_READ_REFS() \ + x(bch2_online_devs) \ + x(trans_mark_dev_sbs) \ + x(read_fua_test) \ + x(sb_field_resize) \ + x(write_super) \ + x(journal_read) \ + x(fs_journal_alloc) \ + x(fs_resize_on_mount) \ + x(btree_node_read) \ + x(btree_node_read_all_replicas) \ + x(btree_node_scrub) \ + x(btree_node_write) \ + x(btree_node_scan) \ + x(btree_verify_replicas) \ + x(btree_node_ondisk_to_text) \ + x(io_read) \ + x(check_extent_checksums) \ + x(ec_block) + +enum bch_dev_read_ref { +#define x(n) BCH_DEV_READ_REF_##n, + BCH_DEV_READ_REFS() +#undef x + BCH_DEV_READ_REF_NR, +}; + +#define BCH_DEV_WRITE_REFS() \ + x(journal_write) \ + x(journal_do_discards) \ + x(dev_do_discards) \ + x(discard_one_bucket_fast) \ + x(do_invalidates) \ + x(nocow_flush) \ + x(io_write) \ + x(ec_block) \ + x(ec_bucket_zero) + +enum bch_dev_write_ref { +#define x(n) BCH_DEV_WRITE_REF_##n, + BCH_DEV_WRITE_REFS() +#undef x + BCH_DEV_WRITE_REF_NR, +}; + struct bch_dev { struct kobject kobj; #ifdef CONFIG_BCACHEFS_DEBUG @@ -527,8 +572,7 @@ struct bch_dev { struct percpu_ref ref; #endif struct completion ref_completion; - struct percpu_ref io_ref[2]; - struct completion io_ref_completion[2]; + struct enumerated_ref io_ref[2]; =20 struct bch_fs *fs; =20 diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index fb20146d78d5..065fad831aae 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1325,7 +1325,7 @@ static void btree_node_read_work(struct work_struct *= work) while (1) { retry =3D true; bch_info(c, "retrying read"); - ca =3D bch2_dev_get_ioref(c, rb->pick.ptr.dev, READ); + ca =3D bch2_dev_get_ioref(c, rb->pick.ptr.dev, READ, BCH_DEV_READ_REF_bt= ree_node_read); rb->have_ioref =3D ca !=3D NULL; rb->start_time =3D local_clock(); bio_reset(bio, NULL, REQ_OP_READ|REQ_SYNC|REQ_META); @@ -1350,7 +1350,7 @@ static void btree_node_read_work(struct work_struct *= work) "btree read error %s for %s", bch2_blk_status_to_str(bio->bi_status), buf.buf); if (rb->have_ioref) - percpu_ref_put(&ca->io_ref[READ]); + enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_read); rb->have_ioref =3D false; =20 bch2_mark_io_failure(&failed, &rb->pick, false); @@ -1612,7 +1612,8 @@ static void btree_node_read_all_replicas_endio(struct= bio *bio) struct bch_dev *ca =3D bch2_dev_have_ref(c, rb->pick.ptr.dev); =20 bch2_latency_acct(ca, rb->start_time, READ); - percpu_ref_put(&ca->io_ref[READ]); + enumerated_ref_put(&ca->io_ref[READ], + BCH_DEV_READ_REF_btree_node_read_all_replicas); } =20 ra->err[rb->idx] =3D bio->bi_status; @@ -1652,7 +1653,8 @@ static int btree_node_read_all_replicas(struct bch_fs= *c, struct btree *b, bool =20 i =3D 0; bkey_for_each_ptr_decode(k.k, ptrs, pick, entry) { - struct bch_dev *ca =3D bch2_dev_get_ioref(c, pick.ptr.dev, READ); + struct bch_dev *ca =3D bch2_dev_get_ioref(c, pick.ptr.dev, READ, + BCH_DEV_READ_REF_btree_node_read_all_replicas); struct btree_read_bio *rb =3D container_of(ra->bio[i], struct btree_read_bio, bio); rb->c =3D c; @@ -1729,7 +1731,7 @@ void bch2_btree_node_read(struct btree_trans *trans, = struct btree *b, return; } =20 - ca =3D bch2_dev_get_ioref(c, pick.ptr.dev, READ); + ca =3D bch2_dev_get_ioref(c, pick.ptr.dev, READ, BCH_DEV_READ_REF_btree_n= ode_read); =20 bio =3D bio_alloc_bioset(NULL, buf_pages(b->data, btree_buf_bytes(b)), @@ -1934,7 +1936,7 @@ static void btree_node_scrub_work(struct work_struct = *work) printbuf_exit(&err); bch2_bkey_buf_exit(&scrub->key, c);; btree_bounce_free(c, c->opts.btree_node_size, scrub->used_mempool, scrub-= >buf); - percpu_ref_put(&scrub->ca->io_ref[READ]); + enumerated_ref_put(&scrub->ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_= scrub); kfree(scrub); enumerated_ref_put(&c->writes, BCH_WRITE_REF_btree_node_scrub); } @@ -1963,7 +1965,8 @@ int bch2_btree_node_scrub(struct btree_trans *trans, if (ret <=3D 0) goto err; =20 - struct bch_dev *ca =3D bch2_dev_get_ioref(c, pick.ptr.dev, READ); + struct bch_dev *ca =3D bch2_dev_get_ioref(c, pick.ptr.dev, READ, + BCH_DEV_READ_REF_btree_node_scrub); if (!ca) { ret =3D -BCH_ERR_device_offline; goto err; @@ -2003,7 +2006,7 @@ int bch2_btree_node_scrub(struct btree_trans *trans, return 0; err_free: btree_bounce_free(c, c->opts.btree_node_size, used_mempool, buf); - percpu_ref_put(&ca->io_ref[READ]); + enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scrub); err: enumerated_ref_put(&c->writes, BCH_WRITE_REF_btree_node_scrub); return ret; @@ -2172,7 +2175,8 @@ static void btree_node_write_endio(struct bio *bio) * btree writes yet (due to device removal/ro): */ if (wbio->have_ioref) - percpu_ref_put(&ca->io_ref[READ]); + enumerated_ref_put(&ca->io_ref[READ], + BCH_DEV_READ_REF_btree_node_write); =20 if (parent) { bio_put(bio); diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c index 81ee7ae88a77..7bd13438d5ef 100644 --- a/fs/bcachefs/btree_node_scan.c +++ b/fs/bcachefs/btree_node_scan.c @@ -271,7 +271,7 @@ static int read_btree_nodes_worker(void *p) err: bio_put(bio); free_page((unsigned long) buf); - percpu_ref_put(&ca->io_ref[READ]); + enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scan); closure_put(w->cl); kfree(w); return 0; @@ -285,13 +285,13 @@ static int read_btree_nodes(struct find_btree_nodes *= f) =20 closure_init_stack(&cl); =20 - for_each_online_member(c, ca) { + for_each_online_member(c, ca, BCH_DEV_READ_REF_btree_node_scan) { if (!(ca->mi.data_allowed & BIT(BCH_DATA_btree))) continue; =20 struct find_btree_nodes_worker *w =3D kmalloc(sizeof(*w), GFP_KERNEL); if (!w) { - percpu_ref_put(&ca->io_ref[READ]); + enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scan); ret =3D -ENOMEM; goto err; } @@ -303,14 +303,14 @@ static int read_btree_nodes(struct find_btree_nodes *= f) struct task_struct *t =3D kthread_create(read_btree_nodes_worker, w, "re= ad_btree_nodes/%s", ca->name); ret =3D PTR_ERR_OR_ZERO(t); if (ret) { - percpu_ref_put(&ca->io_ref[READ]); + enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scan); kfree(w); bch_err_msg(c, ret, "starting kthread"); break; } =20 closure_get(&cl); - percpu_ref_get(&ca->io_ref[READ]); + enumerated_ref_get(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scan); wake_up_process(t); } err: diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 09e84d4a76b5..7c267244966b 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -1139,10 +1139,10 @@ int bch2_trans_mark_dev_sb(struct bch_fs *c, struct= bch_dev *ca, int bch2_trans_mark_dev_sbs_flags(struct bch_fs *c, enum btree_iter_update_trigger_flags flags) { - for_each_online_member(c, ca) { + for_each_online_member(c, ca, BCH_DEV_READ_REF_trans_mark_dev_sbs) { int ret =3D bch2_trans_mark_dev_sb(c, ca, flags); if (ret) { - percpu_ref_put(&ca->io_ref[READ]); + enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_trans_mark_dev_s= bs); return ret; } } diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index 71d05ee7e6e3..18314329d84d 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -43,7 +43,8 @@ static bool bch2_btree_verify_replica(struct bch_fs *c, s= truct btree *b, struct bio *bio; bool failed =3D false, saw_error =3D false; =20 - struct bch_dev *ca =3D bch2_dev_get_ioref(c, pick.ptr.dev, READ); + struct bch_dev *ca =3D bch2_dev_get_ioref(c, pick.ptr.dev, READ, + BCH_DEV_READ_REF_btree_verify_replicas); if (!ca) return false; =20 @@ -58,7 +59,8 @@ static bool bch2_btree_verify_replica(struct bch_fs *c, s= truct btree *b, submit_bio_wait(bio); =20 bio_put(bio); - percpu_ref_put(&ca->io_ref[READ]); + enumerated_ref_put(&ca->io_ref[READ], + BCH_DEV_READ_REF_btree_verify_replicas); =20 memcpy(n_ondisk, n_sorted, btree_buf_bytes(b)); =20 @@ -197,7 +199,8 @@ void bch2_btree_node_ondisk_to_text(struct printbuf *ou= t, struct bch_fs *c, return; } =20 - ca =3D bch2_dev_get_ioref(c, pick.ptr.dev, READ); + ca =3D bch2_dev_get_ioref(c, pick.ptr.dev, READ, + BCH_DEV_READ_REF_btree_node_ondisk_to_text); if (!ca) { prt_printf(out, "error getting device to read from: not online\n"); return; @@ -298,7 +301,8 @@ void bch2_btree_node_ondisk_to_text(struct printbuf *ou= t, struct bch_fs *c, if (bio) bio_put(bio); kvfree(n_ondisk); - percpu_ref_put(&ca->io_ref[READ]); + enumerated_ref_put(&ca->io_ref[READ], + BCH_DEV_READ_REF_btree_node_ondisk_to_text); } =20 #ifdef CONFIG_DEBUG_FS diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index b4d78c0ca221..1dde0a036f5e 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -707,6 +707,9 @@ static void ec_block_endio(struct bio *bio) struct bch_dev *ca =3D ec_bio->ca; struct closure *cl =3D bio->bi_private; int rw =3D ec_bio->rw; + unsigned ref =3D rw =3D=3D READ + ? BCH_DEV_READ_REF_ec_block + : BCH_DEV_WRITE_REF_ec_block; =20 bch2_account_io_completion(ca, bio_data_dir(bio), ec_bio->submit_time, !bio->bi_status); @@ -728,7 +731,7 @@ static void ec_block_endio(struct bio *bio) } =20 bio_put(&ec_bio->bio); - percpu_ref_put(&ca->io_ref[rw]); + enumerated_ref_put(&ca->io_ref[rw], ref); closure_put(cl); } =20 @@ -742,8 +745,11 @@ static void ec_block_io(struct bch_fs *c, struct ec_st= ripe_buf *buf, ? BCH_DATA_user : BCH_DATA_parity; int rw =3D op_is_write(opf); + unsigned ref =3D rw =3D=3D READ + ? BCH_DEV_READ_REF_ec_block + : BCH_DEV_WRITE_REF_ec_block; =20 - struct bch_dev *ca =3D bch2_dev_get_ioref(c, ptr->dev, rw); + struct bch_dev *ca =3D bch2_dev_get_ioref(c, ptr->dev, rw, ref); if (!ca) { clear_bit(idx, buf->valid); return; @@ -789,14 +795,14 @@ static void ec_block_io(struct bch_fs *c, struct ec_s= tripe_buf *buf, bch2_bio_map(&ec_bio->bio, buf->data[idx] + offset, b); =20 closure_get(cl); - percpu_ref_get(&ca->io_ref[rw]); + enumerated_ref_get(&ca->io_ref[rw], ref); =20 submit_bio(&ec_bio->bio); =20 offset +=3D b; } =20 - percpu_ref_put(&ca->io_ref[rw]); + enumerated_ref_put(&ca->io_ref[rw], ref); } =20 static int get_stripe_key_trans(struct btree_trans *trans, u64 idx, @@ -1253,7 +1259,8 @@ static void zero_out_rest_of_ec_bucket(struct bch_fs = *c, unsigned block, struct open_bucket *ob) { - struct bch_dev *ca =3D bch2_dev_get_ioref(c, ob->dev, WRITE); + struct bch_dev *ca =3D bch2_dev_get_ioref(c, ob->dev, WRITE, + BCH_DEV_WRITE_REF_ec_bucket_zero); if (!ca) { s->err =3D -BCH_ERR_erofs_no_writes; return; @@ -1269,7 +1276,7 @@ static void zero_out_rest_of_ec_bucket(struct bch_fs = *c, ob->sectors_free, GFP_KERNEL, 0); =20 - percpu_ref_put(&ca->io_ref[WRITE]); + enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_ec_bucket_zero); =20 if (ret) s->err =3D ret; diff --git a/fs/bcachefs/enumerated_ref.h b/fs/bcachefs/enumerated_ref.h index 6d2283cf298d..d2d19da26cd5 100644 --- a/fs/bcachefs/enumerated_ref.h +++ b/fs/bcachefs/enumerated_ref.h @@ -38,6 +38,18 @@ static inline void enumerated_ref_put(struct enumerated_= ref *ref, unsigned idx) } #endif =20 +static inline bool enumerated_ref_is_zero(struct enumerated_ref *ref) +{ +#ifndef BCH_REFCOUNT_DEBUG + return percpu_ref_is_zero(&ref->ref); +#else + for (unsigned i =3D 0; i < ref->nr; i++) + if (atomic_long_read(&ref->refs[i])) + return false; + return true; +#endif +} + void enumerated_ref_stop_async(struct enumerated_ref *); void enumerated_ref_stop(struct enumerated_ref *, const char * const[]); void enumerated_ref_start(struct enumerated_ref *); diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index a84b1baf02d8..801e9cd61a40 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -49,7 +49,8 @@ static void nocow_flush_endio(struct bio *_bio) struct nocow_flush *bio =3D container_of(_bio, struct nocow_flush, bio); =20 closure_put(bio->cl); - percpu_ref_put(&bio->ca->io_ref[WRITE]); + enumerated_ref_put(&bio->ca->io_ref[WRITE], + BCH_DEV_WRITE_REF_nocow_flush); bio_put(&bio->bio); } =20 @@ -72,7 +73,8 @@ void bch2_inode_flush_nocow_writes_async(struct bch_fs *c, for_each_set_bit(dev, devs.d, BCH_SB_MEMBERS_MAX) { rcu_read_lock(); ca =3D rcu_dereference(c->devs[dev]); - if (ca && !percpu_ref_tryget(&ca->io_ref[WRITE])) + if (ca && !enumerated_ref_tryget(&ca->io_ref[WRITE], + BCH_DEV_WRITE_REF_nocow_flush)) ca =3D NULL; rcu_read_unlock(); =20 diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 680f4eeea52a..2bfbeecdedba 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -411,7 +411,7 @@ static inline struct bch_read_bio *bch2_rbio_free(struc= t bch_read_bio *rbio) =20 if (rbio->have_ioref) { struct bch_dev *ca =3D bch2_dev_have_ref(rbio->c, rbio->pick.ptr.dev); - percpu_ref_put(&ca->io_ref[READ]); + enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_io_read); } =20 if (rbio->split) { @@ -1101,7 +1101,8 @@ int __bch2_read_extent(struct btree_trans *trans, str= uct bch_read_bio *orig, goto err; } =20 - struct bch_dev *ca =3D bch2_dev_get_ioref(c, pick.ptr.dev, READ); + struct bch_dev *ca =3D bch2_dev_get_ioref(c, pick.ptr.dev, READ, + BCH_DEV_READ_REF_io_read); =20 /* * Stale dirty pointers are treated as IO errors, but @failed isn't @@ -1115,7 +1116,7 @@ int __bch2_read_extent(struct btree_trans *trans, str= uct bch_read_bio *orig, unlikely(dev_ptr_stale(ca, &pick.ptr))) { read_from_stale_dirty_pointer(trans, ca, k, pick.ptr); bch2_mark_io_failure(failed, &pick, false); - percpu_ref_put(&ca->io_ref[READ]); + enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_io_read); goto retry_pick; } =20 @@ -1148,7 +1149,8 @@ int __bch2_read_extent(struct btree_trans *trans, str= uct bch_read_bio *orig, */ if (pick.crc.compressed_size > u->op.wbio.bio.bi_iter.bi_size) { if (ca) - percpu_ref_put(&ca->io_ref[READ]); + enumerated_ref_put(&ca->io_ref[READ], + BCH_DEV_READ_REF_io_read); rbio->ret =3D -BCH_ERR_data_read_buffer_too_small; goto out_read_done; } diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index 755169c4e0e0..f7c5fcbfd73f 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -442,6 +442,10 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *w= bio, struct bch_fs *c, { struct bkey_ptrs_c ptrs =3D bch2_bkey_ptrs_c(bkey_i_to_s_c(k)); struct bch_write_bio *n; + unsigned ref_rw =3D type =3D=3D BCH_DATA_btree ? READ : WRITE; + unsigned ref_idx =3D type =3D=3D BCH_DATA_btree + ? BCH_DEV_READ_REF_btree_node_write + : BCH_DEV_WRITE_REF_io_write; =20 BUG_ON(c->opts.nochanges); =20 @@ -453,7 +457,7 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wb= io, struct bch_fs *c, */ struct bch_dev *ca =3D nocow ? bch2_dev_have_ref(c, ptr->dev) - : bch2_dev_get_ioref(c, ptr->dev, type =3D=3D BCH_DATA_btree ? READ : W= RITE); + : bch2_dev_get_ioref(c, ptr->dev, ref_rw, ref_idx); =20 if (to_entry(ptr + 1) < ptrs.end) { n =3D to_wbio(bio_alloc_clone(NULL, &wbio->bio, GFP_NOFS, &c->replica_s= et)); @@ -728,7 +732,8 @@ static void bch2_write_endio(struct bio *bio) } =20 if (wbio->have_ioref) - percpu_ref_put(&ca->io_ref[WRITE]); + enumerated_ref_put(&ca->io_ref[WRITE], + BCH_DEV_WRITE_REF_io_write); =20 if (wbio->bounce) bch2_bio_free_pages_pool(c, bio); @@ -1325,7 +1330,8 @@ static void bch2_nocow_write(struct bch_write_op *op) /* Get iorefs before dropping btree locks: */ struct bkey_ptrs_c ptrs =3D bch2_bkey_ptrs_c(k); bkey_for_each_ptr(ptrs, ptr) { - struct bch_dev *ca =3D bch2_dev_get_ioref(c, ptr->dev, WRITE); + struct bch_dev *ca =3D bch2_dev_get_ioref(c, ptr->dev, WRITE, + BCH_DEV_WRITE_REF_io_write); if (unlikely(!ca)) goto err_get_ioref; =20 @@ -1427,7 +1433,8 @@ static void bch2_nocow_write(struct bch_write_op *op) return; err_get_ioref: darray_for_each(buckets, i) - percpu_ref_put(&bch2_dev_have_ref(c, i->b.inode)->io_ref[WRITE]); + enumerated_ref_put(&bch2_dev_have_ref(c, i->b.inode)->io_ref[WRITE], + BCH_DEV_WRITE_REF_io_write); =20 /* Fall back to COW path: */ goto out; diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index e2c95192a577..f2963a6cca88 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -1336,13 +1336,14 @@ int bch2_dev_journal_alloc(struct bch_dev *ca, bool= new_fs) =20 int bch2_fs_journal_alloc(struct bch_fs *c) { - for_each_online_member(c, ca) { + for_each_online_member(c, ca, BCH_DEV_READ_REF_fs_journal_alloc) { if (ca->journal.nr) continue; =20 int ret =3D bch2_dev_journal_alloc(ca, true); if (ret) { - percpu_ref_put(&ca->io_ref[READ]); + enumerated_ref_put(&ca->io_ref[READ], + BCH_DEV_READ_REF_fs_journal_alloc); return ret; } } diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 50aa44665f1f..58e3983d860a 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1218,7 +1218,7 @@ static CLOSURE_CALLBACK(bch2_journal_read_device) out: bch_verbose(c, "journal read done on device %s, ret %i", ca->name, ret); kvfree(buf.data); - percpu_ref_put(&ca->io_ref[READ]); + enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_journal_read); closure_return(cl); return; err: @@ -1253,7 +1253,8 @@ int bch2_journal_read(struct bch_fs *c, =20 if ((ca->mi.state =3D=3D BCH_MEMBER_STATE_rw || ca->mi.state =3D=3D BCH_MEMBER_STATE_ro) && - percpu_ref_tryget(&ca->io_ref[READ])) + enumerated_ref_tryget(&ca->io_ref[READ], + BCH_DEV_READ_REF_journal_read)) closure_call(&ca->journal.read, bch2_journal_read_device, system_unbound_wq, @@ -1768,7 +1769,7 @@ static void journal_write_endio(struct bio *bio) } =20 closure_put(&w->io); - percpu_ref_put(&ca->io_ref[WRITE]); + enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_journal_write); } =20 static CLOSURE_CALLBACK(journal_write_submit) @@ -1779,7 +1780,8 @@ static CLOSURE_CALLBACK(journal_write_submit) unsigned sectors =3D vstruct_sectors(w->data, c->block_bits); =20 extent_for_each_ptr(bkey_i_to_s_extent(&w->key), ptr) { - struct bch_dev *ca =3D bch2_dev_get_ioref(c, ptr->dev, WRITE); + struct bch_dev *ca =3D bch2_dev_get_ioref(c, ptr->dev, WRITE, + BCH_DEV_WRITE_REF_journal_write); if (!ca) { /* XXX: fix this */ bch_err(c, "missing device for journal write\n"); @@ -1842,8 +1844,9 @@ static CLOSURE_CALLBACK(journal_write_preflush) } =20 if (w->separate_flush) { - for_each_rw_member(c, ca) { - percpu_ref_get(&ca->io_ref[WRITE]); + for_each_rw_member(c, ca, BCH_DEV_WRITE_REF_journal_write) { + enumerated_ref_get(&ca->io_ref[WRITE], + BCH_DEV_WRITE_REF_journal_write); =20 struct journal_device *ja =3D &ca->journal; struct bio *bio =3D &ja->bio[w->idx]->bio; diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index 3ec4175db574..66bfb95f1ea4 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -285,7 +285,7 @@ void bch2_journal_do_discards(struct journal *j) =20 mutex_lock(&j->discard_lock); =20 - for_each_rw_member(c, ca) { + for_each_rw_member(c, ca, BCH_DEV_WRITE_REF_journal_do_discards) { struct journal_device *ja =3D &ca->journal; =20 while (should_discard_bucket(j, ja)) { diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h index 7fd971496f4f..0337e34dcb23 100644 --- a/fs/bcachefs/sb-members.h +++ b/fs/bcachefs/sb-members.h @@ -4,6 +4,7 @@ =20 #include "darray.h" #include "bkey_types.h" +#include "enumerated_ref.h" =20 extern char * const bch2_member_error_strs[]; =20 @@ -20,7 +21,7 @@ struct bch_member bch2_sb_member_get(struct bch_sb *sb, i= nt i); =20 static inline bool bch2_dev_is_online(struct bch_dev *ca) { - return !percpu_ref_is_zero(&ca->io_ref[READ]); + return !enumerated_ref_is_zero(&ca->io_ref[READ]); } =20 static inline struct bch_dev *bch2_dev_rcu(struct bch_fs *, unsigned); @@ -163,33 +164,33 @@ static inline struct bch_dev *bch2_get_next_dev(struc= t bch_fs *c, struct bch_dev static inline struct bch_dev *bch2_get_next_online_dev(struct bch_fs *c, struct bch_dev *ca, unsigned state_mask, - int rw) + int rw, unsigned ref_idx) { rcu_read_lock(); if (ca) - percpu_ref_put(&ca->io_ref[rw]); + enumerated_ref_put(&ca->io_ref[rw], ref_idx); =20 while ((ca =3D __bch2_next_dev(c, ca, NULL)) && (!((1 << ca->mi.state) & state_mask) || - !percpu_ref_tryget(&ca->io_ref[rw]))) + !enumerated_ref_tryget(&ca->io_ref[rw], ref_idx))) ; rcu_read_unlock(); =20 return ca; } =20 -#define __for_each_online_member(_c, _ca, state_mask, rw) \ +#define __for_each_online_member(_c, _ca, state_mask, rw, ref_idx) \ for (struct bch_dev *_ca =3D NULL; \ - (_ca =3D bch2_get_next_online_dev(_c, _ca, state_mask, rw));) + (_ca =3D bch2_get_next_online_dev(_c, _ca, state_mask, rw, ref_idx))= ;) =20 -#define for_each_online_member(c, ca) \ - __for_each_online_member(c, ca, ~0, READ) +#define for_each_online_member(c, ca, ref_idx) \ + __for_each_online_member(c, ca, ~0, READ, ref_idx) =20 -#define for_each_rw_member(c, ca) \ - __for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw), WRITE) +#define for_each_rw_member(c, ca, ref_idx) \ + __for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw), WRITE, ref_idx) =20 -#define for_each_readable_member(c, ca) \ - __for_each_online_member(c, ca, BIT( BCH_MEMBER_STATE_rw)|BIT(BCH_MEMBER_= STATE_ro), READ) +#define for_each_readable_member(c, ca, ref_idx) \ + __for_each_online_member(c, ca, BIT( BCH_MEMBER_STATE_rw)|BIT(BCH_MEMBER_= STATE_ro), READ, ref_idx) =20 static inline bool bch2_dev_exists(const struct bch_fs *c, unsigned dev) { @@ -290,13 +291,14 @@ static inline struct bch_dev *bch2_dev_iterate(struct= bch_fs *c, struct bch_dev return bch2_dev_tryget(c, dev_idx); } =20 -static inline struct bch_dev *bch2_dev_get_ioref(struct bch_fs *c, unsigne= d dev, int rw) +static inline struct bch_dev *bch2_dev_get_ioref(struct bch_fs *c, unsigne= d dev, + int rw, unsigned ref_idx) { might_sleep(); =20 rcu_read_lock(); struct bch_dev *ca =3D bch2_dev_rcu(c, dev); - if (ca && !percpu_ref_tryget(&ca->io_ref[rw])) + if (ca && !enumerated_ref_tryget(&ca->io_ref[rw], ref_idx)) ca =3D NULL; rcu_read_unlock(); =20 @@ -306,7 +308,7 @@ static inline struct bch_dev *bch2_dev_get_ioref(struct= bch_fs *c, unsigned dev, return ca; =20 if (ca) - percpu_ref_put(&ca->io_ref[rw]); + enumerated_ref_put(&ca->io_ref[rw], ref_idx); return NULL; } =20 diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 872707e5fa95..d53cbc5f9925 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -260,11 +260,11 @@ struct bch_sb_field *bch2_sb_field_resize_id(struct b= ch_sb_handle *sb, =20 /* XXX: we're not checking that offline device have enough space */ =20 - for_each_online_member(c, ca) { + for_each_online_member(c, ca, BCH_DEV_READ_REF_sb_field_resize) { struct bch_sb_handle *dev_sb =3D &ca->disk_sb; =20 if (bch2_sb_realloc(dev_sb, le32_to_cpu(dev_sb->sb->u64s) + d)) { - percpu_ref_put(&ca->io_ref[READ]); + enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_sb_field_resize= ); return NULL; } } @@ -967,7 +967,7 @@ static void write_super_endio(struct bio *bio) } =20 closure_put(&ca->fs->sb_write); - percpu_ref_put(&ca->io_ref[READ]); + enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_write_super); } =20 static void read_back_super(struct bch_fs *c, struct bch_dev *ca) @@ -985,7 +985,7 @@ static void read_back_super(struct bch_fs *c, struct bc= h_dev *ca) =20 this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_sb], bio_sectors(bio)); =20 - percpu_ref_get(&ca->io_ref[READ]); + enumerated_ref_get(&ca->io_ref[READ], BCH_DEV_READ_REF_write_super); closure_bio_submit(bio, &c->sb_write); } =20 @@ -1011,7 +1011,7 @@ static void write_one_super(struct bch_fs *c, struct = bch_dev *ca, unsigned idx) this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_sb], bio_sectors(bio)); =20 - percpu_ref_get(&ca->io_ref[READ]); + enumerated_ref_get(&ca->io_ref[READ], BCH_DEV_READ_REF_write_super); closure_bio_submit(bio, &c->sb_write); } =20 @@ -1043,13 +1043,13 @@ int bch2_write_super(struct bch_fs *c) * For now, we expect to be able to call write_super() when we're not * yet RW: */ - for_each_online_member(c, ca) { + for_each_online_member(c, ca, BCH_DEV_READ_REF_write_super) { ret =3D darray_push(&online_devices, ca); if (bch2_fs_fatal_err_on(ret, c, "%s: error allocating online devices", = __func__)) { - percpu_ref_put(&ca->io_ref[READ]); + enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_write_super); goto out; } - percpu_ref_get(&ca->io_ref[READ]); + enumerated_ref_get(&ca->io_ref[READ], BCH_DEV_READ_REF_write_super); } =20 /* Make sure we're using the new magic numbers: */ @@ -1216,7 +1216,7 @@ int bch2_write_super(struct bch_fs *c) /* Make new options visible after they're persistent: */ bch2_sb_update(c); darray_for_each(online_devices, ca) - percpu_ref_put(&(*ca)->io_ref[READ]); + enumerated_ref_put(&(*ca)->io_ref[READ], BCH_DEV_READ_REF_write_super); darray_exit(&online_devices); printbuf_exit(&err); return ret; diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 288e00779fff..bfb03213d4a5 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -78,13 +78,28 @@ MODULE_DESCRIPTION("bcachefs filesystem"); =20 typedef DARRAY(struct bch_sb_handle) bch_sb_handles; =20 -const char * const bch2_fs_flag_strs[] =3D { #define x(n) #n, +const char * const bch2_fs_flag_strs[] =3D { BCH_FS_FLAGS() -#undef x NULL }; =20 +const char * const bch2_write_refs[] =3D { + BCH_WRITE_REFS() + NULL +}; + +const char * const bch2_dev_read_refs[] =3D { + BCH_DEV_READ_REFS() + NULL +}; + +const char * const bch2_dev_write_refs[] =3D { + BCH_DEV_WRITE_REFS() + NULL +}; +#undef x + static void __bch2_print_str(struct bch_fs *c, const char *prefix, const char *str, bool nonblocking) { @@ -490,7 +505,7 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool = early) for_each_online_member_rcu(c, ca) if (ca->mi.state =3D=3D BCH_MEMBER_STATE_rw) { bch2_dev_allocator_add(c, ca); - percpu_ref_reinit(&ca->io_ref[WRITE]); + enumerated_ref_start(&ca->io_ref[WRITE]); } rcu_read_unlock(); =20 @@ -656,6 +671,12 @@ void __bch2_fs_stop(struct bch_fs *c) bch2_fs_read_only(c); up_write(&c->state_lock); =20 + for (unsigned i =3D 0; i < c->sb.nr_devices; i++) { + struct bch_dev *ca =3D rcu_dereference_protected(c->devs[i], true); + if (ca) + bch2_dev_io_ref_stop(ca, READ); + } + for_each_member_device(c, ca) bch2_dev_unlink(ca); =20 @@ -684,8 +705,6 @@ void __bch2_fs_stop(struct bch_fs *c) =20 void bch2_fs_free(struct bch_fs *c) { - unsigned i; - mutex_lock(&bch_fs_list_lock); list_del(&c->list); mutex_unlock(&bch_fs_list_lock); @@ -693,7 +712,7 @@ void bch2_fs_free(struct bch_fs *c) closure_sync(&c->cl); closure_debug_destroy(&c->cl); =20 - for (i =3D 0; i < c->sb.nr_devices; i++) { + for (unsigned i =3D 0; i < c->sb.nr_devices; i++) { struct bch_dev *ca =3D rcu_dereference_protected(c->devs[i], true); =20 if (ca) { @@ -1261,11 +1280,11 @@ static void bch2_dev_io_ref_stop(struct bch_dev *ca= , int rw) if (rw =3D=3D READ) clear_bit(ca->dev_idx, ca->fs->online_devs.d); =20 - if (!percpu_ref_is_zero(&ca->io_ref[rw])) { - reinit_completion(&ca->io_ref_completion[rw]); - percpu_ref_kill(&ca->io_ref[rw]); - wait_for_completion(&ca->io_ref_completion[rw]); - } + if (!enumerated_ref_is_zero(&ca->io_ref[rw])) + enumerated_ref_stop(&ca->io_ref[rw], + rw =3D=3D READ + ? bch2_dev_read_refs + : bch2_dev_write_refs); } =20 static void bch2_dev_release(struct kobject *kobj) @@ -1277,8 +1296,8 @@ static void bch2_dev_release(struct kobject *kobj) =20 static void bch2_dev_free(struct bch_dev *ca) { - WARN_ON(!percpu_ref_is_zero(&ca->io_ref[WRITE])); - WARN_ON(!percpu_ref_is_zero(&ca->io_ref[READ])); + WARN_ON(!enumerated_ref_is_zero(&ca->io_ref[WRITE])); + WARN_ON(!enumerated_ref_is_zero(&ca->io_ref[READ])); =20 cancel_work_sync(&ca->io_error_work); =20 @@ -1298,8 +1317,8 @@ static void bch2_dev_free(struct bch_dev *ca) bch2_time_stats_quantiles_exit(&ca->io_latency[WRITE]); bch2_time_stats_quantiles_exit(&ca->io_latency[READ]); =20 - percpu_ref_exit(&ca->io_ref[WRITE]); - percpu_ref_exit(&ca->io_ref[READ]); + enumerated_ref_exit(&ca->io_ref[WRITE]); + enumerated_ref_exit(&ca->io_ref[READ]); #ifndef CONFIG_BCACHEFS_DEBUG percpu_ref_exit(&ca->ref); #endif @@ -1311,7 +1330,7 @@ static void __bch2_dev_offline(struct bch_fs *c, stru= ct bch_dev *ca) =20 lockdep_assert_held(&c->state_lock); =20 - if (percpu_ref_is_zero(&ca->io_ref[READ])) + if (enumerated_ref_is_zero(&ca->io_ref[READ])) return; =20 __bch2_dev_read_only(c, ca); @@ -1333,20 +1352,6 @@ static void bch2_dev_ref_complete(struct percpu_ref = *ref) } #endif =20 -static void bch2_dev_io_ref_read_complete(struct percpu_ref *ref) -{ - struct bch_dev *ca =3D container_of(ref, struct bch_dev, io_ref[READ]); - - complete(&ca->io_ref_completion[READ]); -} - -static void bch2_dev_io_ref_write_complete(struct percpu_ref *ref) -{ - struct bch_dev *ca =3D container_of(ref, struct bch_dev, io_ref[WRITE]); - - complete(&ca->io_ref_completion[WRITE]); -} - static void bch2_dev_unlink(struct bch_dev *ca) { struct kobject *b; @@ -1408,8 +1413,6 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs= *c, =20 kobject_init(&ca->kobj, &bch2_dev_ktype); init_completion(&ca->ref_completion); - init_completion(&ca->io_ref_completion[READ]); - init_completion(&ca->io_ref_completion[WRITE]); =20 INIT_WORK(&ca->io_error_work, bch2_io_error_work); =20 @@ -1435,10 +1438,8 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_f= s *c, =20 bch2_dev_allocator_background_init(ca); =20 - if (percpu_ref_init(&ca->io_ref[READ], bch2_dev_io_ref_read_complete, - PERCPU_REF_INIT_DEAD, GFP_KERNEL) || - percpu_ref_init(&ca->io_ref[WRITE], bch2_dev_io_ref_write_complete, - PERCPU_REF_INIT_DEAD, GFP_KERNEL) || + if (enumerated_ref_init(&ca->io_ref[READ], BCH_DEV_READ_REF_NR, NULL) || + enumerated_ref_init(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_NR, NULL) || !(ca->sb_read_scratch =3D kmalloc(BCH_SB_READ_SCRATCH_BUF_SIZE, GFP_K= ERNEL)) || bch2_dev_buckets_alloc(c, ca) || !(ca->io_done =3D alloc_percpu(*ca->io_done))) @@ -1500,8 +1501,8 @@ static int __bch2_dev_attach_bdev(struct bch_dev *ca,= struct bch_sb_handle *sb) return -BCH_ERR_device_size_too_small; } =20 - BUG_ON(!percpu_ref_is_zero(&ca->io_ref[READ])); - BUG_ON(!percpu_ref_is_zero(&ca->io_ref[WRITE])); + BUG_ON(!enumerated_ref_is_zero(&ca->io_ref[READ])); + BUG_ON(!enumerated_ref_is_zero(&ca->io_ref[WRITE])); =20 ret =3D bch2_dev_journal_init(ca, sb->sb); if (ret) @@ -1520,7 +1521,7 @@ static int __bch2_dev_attach_bdev(struct bch_dev *ca,= struct bch_sb_handle *sb) =20 ca->dev =3D ca->disk_sb.bdev->bd_dev; =20 - percpu_ref_reinit(&ca->io_ref[READ]); + enumerated_ref_start(&ca->io_ref[READ]); =20 return 0; } @@ -1667,8 +1668,8 @@ static void __bch2_dev_read_write(struct bch_fs *c, s= truct bch_dev *ca) bch2_dev_allocator_add(c, ca); bch2_recalc_capacity(c); =20 - if (percpu_ref_is_zero(&ca->io_ref[WRITE])) - percpu_ref_reinit(&ca->io_ref[WRITE]); + if (enumerated_ref_is_zero(&ca->io_ref[WRITE])) + enumerated_ref_start(&ca->io_ref[WRITE]); =20 bch2_dev_do_discards(ca); } @@ -1818,7 +1819,7 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev = *ca, int flags) err: if (test_bit(BCH_FS_rw, &c->flags) && ca->mi.state =3D=3D BCH_MEMBER_STATE_rw && - !percpu_ref_is_zero(&ca->io_ref[READ])) + !enumerated_ref_is_zero(&ca->io_ref[READ])) __bch2_dev_read_write(c, ca); up_write(&c->state_lock); return ret; @@ -2119,7 +2120,7 @@ int bch2_fs_resize_on_mount(struct bch_fs *c) { down_write(&c->state_lock); =20 - for_each_online_member(c, ca) { + for_each_online_member(c, ca, BCH_DEV_READ_REF_fs_resize_on_mount) { u64 old_nbuckets =3D ca->mi.nbuckets; u64 new_nbuckets =3D div64_u64(get_capacity(ca->disk_sb.bdev->bd_disk), ca->mi.bucket_size); @@ -2130,7 +2131,8 @@ int bch2_fs_resize_on_mount(struct bch_fs *c) int ret =3D bch2_dev_buckets_resize(c, ca, new_nbuckets); bch_err_fn(ca, ret); if (ret) { - percpu_ref_put(&ca->io_ref[READ]); + enumerated_ref_put(&ca->io_ref[READ], + BCH_DEV_READ_REF_fs_resize_on_mount); up_write(&c->state_lock); return ret; } @@ -2148,7 +2150,8 @@ int bch2_fs_resize_on_mount(struct bch_fs *c) if (ca->mi.freespace_initialized) { ret =3D __bch2_dev_resize_alloc(ca, old_nbuckets, new_nbuckets); if (ret) { - percpu_ref_put(&ca->io_ref[READ]); + enumerated_ref_put(&ca->io_ref[READ], + BCH_DEV_READ_REF_fs_resize_on_mount); up_write(&c->state_lock); return ret; } diff --git a/fs/bcachefs/super.h b/fs/bcachefs/super.h index 502d6c57ebb2..dbf59547f67c 100644 --- a/fs/bcachefs/super.h +++ b/fs/bcachefs/super.h @@ -9,6 +9,9 @@ #include =20 extern const char * const bch2_fs_flag_strs[]; +extern const char * const bch2_write_refs[]; +extern const char * const bch2_dev_read_refs[]; +extern const char * const bch2_dev_write_refs[]; =20 struct bch_fs *bch2_dev_to_fs(dev_t); struct bch_fs *bch2_uuid_to_fs(__uuid_t); diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index 58be32bbd49c..7c840b470094 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -178,14 +178,9 @@ read_attribute(btree_reserve_cache); read_attribute(open_buckets); read_attribute(open_buckets_partial); read_attribute(nocow_lock_table); -read_attribute(write_refs); =20 -static const char * const bch2_write_refs[] =3D { -#define x(n) #n, - BCH_WRITE_REFS() -#undef x - NULL -}; +read_attribute(read_refs); +read_attribute(write_refs); =20 read_attribute(internal_uuid); read_attribute(disk_groups); @@ -314,7 +309,7 @@ static int bch2_read_fua_test(struct printbuf *out, str= uct bch_dev *ca) bch2_time_stats_init_no_pcpu(&stats_fua); bch2_time_stats_init_no_pcpu(&stats_random); =20 - if (!bch2_dev_get_ioref(c, ca->dev_idx, READ)) { + if (!bch2_dev_get_ioref(c, ca->dev_idx, READ, BCH_DEV_READ_REF_read_fua_t= est)) { prt_str(out, "offline\n"); return 0; } @@ -405,7 +400,7 @@ static int bch2_read_fua_test(struct printbuf *out, str= uct bch_dev *ca) err: kfree(buf); kfree(bio); - percpu_ref_put(&ca->io_ref[READ]); + enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_read_fua_test); bch_err_fn(c, ret); return ret; } @@ -905,6 +900,12 @@ SHOW(bch2_dev) if (opt_id >=3D 0) return sysfs_opt_show(c, ca, opt_id, out); =20 + if (attr =3D=3D &sysfs_read_refs) + enumerated_ref_to_text(out, &ca->io_ref[READ], bch2_dev_read_refs); + + if (attr =3D=3D &sysfs_write_refs) + enumerated_ref_to_text(out, &ca->io_ref[WRITE], bch2_dev_write_refs); + return 0; } =20 @@ -962,6 +963,9 @@ struct attribute *bch2_dev_files[] =3D { /* debug: */ &sysfs_alloc_debug, &sysfs_open_buckets, + + &sysfs_read_refs, + &sysfs_write_refs, NULL }; =20 --=20 2.49.0