bcachefs: fix goto jumping over guard initializers

[PATCH bcachefs-testing] bcachefs: fix goto jumping over guard initializers

Posted by Bharadwaj Raju 8 months, 2 weeks ago

Add scope to guard mutexes in bch2_accounting_read
and __bch2_fsck_err to prevent the unlock func
from running on an uninitialized value when
goto skips over the guard macro. This also makes
it compile on Clang 19.

Fixes: 96aae449b7083 ("bcachefs: convert percpu rwsems to guards")
Fixes: 5582ffabeae15 ("bcachefs: Replace mutex_lock() with guards")

Signed-off-by: Bharadwaj Raju <bharadwaj.raju777@gmail.com>
---
 fs/bcachefs/disk_accounting.c |  63 +++++-----
 fs/bcachefs/error.c           | 230 +++++++++++++++++-----------------
 2 files changed, 149 insertions(+), 144 deletions(-)

diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c
index 825420845cb8..3650b28ad48f 100644
--- a/fs/bcachefs/disk_accounting.c
+++ b/fs/bcachefs/disk_accounting.c
@@ -832,44 +832,47 @@ int bch2_accounting_read(struct bch_fs *c)
 	}
 	keys->gap = keys->nr = dst - keys->data;
 
-	guard(percpu_write)(&c->mark_lock);
+	{
+		guard(percpu_write)(&c->mark_lock);
 
-	darray_for_each_reverse(acc->k, i) {
-		struct disk_accounting_pos acc_k;
-		bpos_to_disk_accounting_pos(&acc_k, i->pos);
+		darray_for_each_reverse(acc->k, i) {
+			struct disk_accounting_pos acc_k;
+			bpos_to_disk_accounting_pos(&acc_k, i->pos);
 
-		u64 v[BCH_ACCOUNTING_MAX_COUNTERS];
-		memset(v, 0, sizeof(v));
+			u64 v[BCH_ACCOUNTING_MAX_COUNTERS];
+			memset(v, 0, sizeof(v));
 
-		for (unsigned j = 0; j < i->nr_counters; j++)
-			v[j] = percpu_u64_get(i->v[0] + j);
+			for (unsigned j = 0; j < i->nr_counters; j++)
+				v[j] = percpu_u64_get(i->v[0] + j);
 
-		/*
-		 * If the entry counters are zeroed, it should be treated as
-		 * nonexistent - it might point to an invalid device.
-		 *
-		 * Remove it, so that if it's re-added it gets re-marked in the
-		 * superblock:
-		 */
-		ret = bch2_is_zero(v, sizeof(v[0]) * i->nr_counters)
-			? -BCH_ERR_remove_disk_accounting_entry
-			: bch2_disk_accounting_validate_late(trans, &acc_k, v, i->nr_counters);
-
-		if (ret == -BCH_ERR_remove_disk_accounting_entry) {
-			free_percpu(i->v[0]);
-			free_percpu(i->v[1]);
-			darray_remove_item(&acc->k, i);
-			ret = 0;
-			continue;
+			/*
+			 * If the entry counters are zeroed, it should be treated as
+			 * nonexistent - it might point to an invalid device.
+			 *
+			 * Remove it, so that if it's re-added it gets re-marked in the
+			 * superblock:
+			 */
+			ret = bch2_is_zero(v, sizeof(v[0]) * i->nr_counters) ?
+							      -BCH_ERR_remove_disk_accounting_entry :
+							      bch2_disk_accounting_validate_late(
+								      trans, &acc_k, v, i->nr_counters);
+
+			if (ret == -BCH_ERR_remove_disk_accounting_entry) {
+				free_percpu(i->v[0]);
+				free_percpu(i->v[1]);
+				darray_remove_item(&acc->k, i);
+				ret = 0;
+				continue;
+			}
+
+			if (ret)
+				goto fsck_err;
 		}
 
-		if (ret)
-			goto fsck_err;
+		eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
+				accounting_pos_cmp, NULL);
 	}
 
-	eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
-			accounting_pos_cmp, NULL);
-
 	scoped_guard(preempt) {
 		struct bch_fs_usage_base *usage = this_cpu_ptr(c->usage);
 
diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c
index dc0fbc97a0e5..be16a4b56a15 100644
--- a/fs/bcachefs/error.c
+++ b/fs/bcachefs/error.c
@@ -497,131 +497,133 @@ int __bch2_fsck_err(struct bch_fs *c,
 			}
 		}
 	}
-
-	guard(mutex)(&c->fsck_error_msgs_lock);
-	bool repeat = false, print = true, suppress = false;
-	bool inconsistent = false, exiting = false;
-	struct fsck_err_state *s =
-		count_fsck_err_locked(c, err, buf.buf, &repeat, &print, &suppress);
-	if (repeat) {
-		ret = s->ret;
-		goto err;
-	}
-
-	if ((flags & FSCK_AUTOFIX) &&
-	    (c->opts.errors == BCH_ON_ERROR_continue ||
-	     c->opts.errors == BCH_ON_ERROR_fix_safe)) {
-		prt_str(out, ", ");
-		if (flags & FSCK_CAN_FIX) {
-			prt_actioning(out, action);
-			ret = -BCH_ERR_fsck_fix;
-		} else {
-			prt_str(out, ", continuing");
-			ret = -BCH_ERR_fsck_ignore;
+	{
+		guard(mutex)(&c->fsck_error_msgs_lock);
+		bool repeat = false, print = true, suppress = false;
+		bool inconsistent = false, exiting = false;
+		struct fsck_err_state *s =
+			count_fsck_err_locked(c, err, buf.buf, &repeat, &print, &suppress);
+		if (repeat) {
+			ret = s->ret;
+			goto err;
 		}
 
-		goto print;
-	} else if (!test_bit(BCH_FS_in_fsck, &c->flags)) {
-		if (c->opts.errors != BCH_ON_ERROR_continue ||
-		    !(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) {
-			prt_str_indented(out, ", shutting down\n"
-					 "error not marked as autofix and not in fsck\n"
-					 "run fsck, and forward to devs so error can be marked for self-healing");
-			inconsistent = true;
-			print = true;
+		if ((flags & FSCK_AUTOFIX) &&
+		    (c->opts.errors == BCH_ON_ERROR_continue ||
+		     c->opts.errors == BCH_ON_ERROR_fix_safe)) {
+			prt_str(out, ", ");
+			if (flags & FSCK_CAN_FIX) {
+				prt_actioning(out, action);
+				ret = -BCH_ERR_fsck_fix;
+			} else {
+				prt_str(out, ", continuing");
+				ret = -BCH_ERR_fsck_ignore;
+			}
+
+			goto print;
+		} else if (!test_bit(BCH_FS_in_fsck, &c->flags)) {
+			if (c->opts.errors != BCH_ON_ERROR_continue ||
+			    !(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) {
+				prt_str_indented(out, ", shutting down\n"
+						 "error not marked as autofix and not in fsck\n"
+						 "run fsck, and forward to devs so error can be marked for self-healing");
+				inconsistent = true;
+				print = true;
+				ret = -BCH_ERR_fsck_errors_not_fixed;
+			} else if (flags & FSCK_CAN_FIX) {
+				prt_str(out, ", ");
+				prt_actioning(out, action);
+				ret = -BCH_ERR_fsck_fix;
+			} else {
+				prt_str(out, ", continuing");
+				ret = -BCH_ERR_fsck_ignore;
+			}
+		} else if (c->opts.fix_errors == FSCK_FIX_exit) {
+			prt_str(out, ", exiting");
 			ret = -BCH_ERR_fsck_errors_not_fixed;
 		} else if (flags & FSCK_CAN_FIX) {
-			prt_str(out, ", ");
-			prt_actioning(out, action);
-			ret = -BCH_ERR_fsck_fix;
-		} else {
-			prt_str(out, ", continuing");
-			ret = -BCH_ERR_fsck_ignore;
+			int fix = s && s->fix
+				? s->fix
+				: c->opts.fix_errors;
+
+			if (fix == FSCK_FIX_ask) {
+				print = false;
+
+				ret = do_fsck_ask_yn(c, trans, out, action);
+				if (ret < 0)
+					goto err;
+
+				if (ret >= YN_ALLNO && s)
+					s->fix = ret == YN_ALLNO
+						? FSCK_FIX_no
+						: FSCK_FIX_yes;
+
+				ret = ret & 1
+					? -BCH_ERR_fsck_fix
+					: -BCH_ERR_fsck_ignore;
+			} else if (fix == FSCK_FIX_yes ||
+				   (c->opts.nochanges &&
+				    !(flags & FSCK_CAN_IGNORE))) {
+				prt_str(out, ", ");
+				prt_actioning(out, action);
+				ret = -BCH_ERR_fsck_fix;
+			} else {
+				prt_str(out, ", not ");
+				prt_actioning(out, action);
+			}
+		} else if (!(flags & FSCK_CAN_IGNORE)) {
+			prt_str(out, " (repair unimplemented)");
 		}
-	} else if (c->opts.fix_errors == FSCK_FIX_exit) {
-		prt_str(out, ", exiting");
-		ret = -BCH_ERR_fsck_errors_not_fixed;
-	} else if (flags & FSCK_CAN_FIX) {
-		int fix = s && s->fix
-			? s->fix
-			: c->opts.fix_errors;
-
-		if (fix == FSCK_FIX_ask) {
-			print = false;
-
-			ret = do_fsck_ask_yn(c, trans, out, action);
-			if (ret < 0)
-				goto err;
 
-			if (ret >= YN_ALLNO && s)
-				s->fix = ret == YN_ALLNO
-					? FSCK_FIX_no
-					: FSCK_FIX_yes;
-
-			ret = ret & 1
-				? -BCH_ERR_fsck_fix
-				: -BCH_ERR_fsck_ignore;
-		} else if (fix == FSCK_FIX_yes ||
-			   (c->opts.nochanges &&
-			    !(flags & FSCK_CAN_IGNORE))) {
-			prt_str(out, ", ");
-			prt_actioning(out, action);
-			ret = -BCH_ERR_fsck_fix;
-		} else {
-			prt_str(out, ", not ");
-			prt_actioning(out, action);
-		}
-	} else if (!(flags & FSCK_CAN_IGNORE)) {
-		prt_str(out, " (repair unimplemented)");
-	}
+		if (ret == -BCH_ERR_fsck_ignore &&
+		    (c->opts.fix_errors == FSCK_FIX_exit ||
+		     !(flags & FSCK_CAN_IGNORE)))
+			ret = -BCH_ERR_fsck_errors_not_fixed;
 
-	if (ret == -BCH_ERR_fsck_ignore &&
-	    (c->opts.fix_errors == FSCK_FIX_exit ||
-	     !(flags & FSCK_CAN_IGNORE)))
-		ret = -BCH_ERR_fsck_errors_not_fixed;
+		if (test_bit(BCH_FS_in_fsck, &c->flags) &&
+		    (ret != -BCH_ERR_fsck_fix &&
+		     ret != -BCH_ERR_fsck_ignore)) {
+			exiting = true;
+			print = true;
+		}
 
-	if (test_bit(BCH_FS_in_fsck, &c->flags) &&
-	    (ret != -BCH_ERR_fsck_fix &&
-	     ret != -BCH_ERR_fsck_ignore)) {
-		exiting = true;
-		print = true;
-	}
 print:
-	prt_newline(out);
-
-	if (inconsistent)
-		__bch2_inconsistent_error(c, out);
-	else if (exiting)
-		prt_printf(out, "Unable to continue, halting\n");
-	else if (suppress)
-		prt_printf(out, "Ratelimiting new instances of previous error\n");
-
-	if (print) {
-		/* possibly strip an empty line, from printbuf_indent_add */
-		while (out->pos && out->buf[out->pos - 1] == ' ')
-			--out->pos;
-		printbuf_nul_terminate(out);
-
-		if (bch2_fs_stdio_redirect(c))
-			bch2_print(c, "%s", out->buf);
-		else
-			bch2_print_str(c, KERN_ERR, out->buf);
-	}
+		prt_newline(out);
+
+		if (inconsistent)
+			__bch2_inconsistent_error(c, out);
+		else if (exiting)
+			prt_printf(out, "Unable to continue, halting\n");
+		else if (suppress)
+			prt_printf(out, "Ratelimiting new instances of previous error\n");
+
+		if (print) {
+			/* possibly strip an empty line, from printbuf_indent_add */
+			while (out->pos && out->buf[out->pos - 1] == ' ')
+				--out->pos;
+			printbuf_nul_terminate(out);
+
+			if (bch2_fs_stdio_redirect(c))
+				bch2_print(c, "%s", out->buf);
+			else
+				bch2_print_str(c, KERN_ERR, out->buf);
+		}
 
-	if (s)
-		s->ret = ret;
+		if (s)
+			s->ret = ret;
 
-	/*
-	 * We don't yet track whether the filesystem currently has errors, for
-	 * log_fsck_err()s: that would require us to track for every error type
-	 * which recovery pass corrects it, to get the fsck exit status correct:
-	 */
-	if (flags & FSCK_CAN_FIX) {
-		if (ret == -BCH_ERR_fsck_fix) {
-			set_bit(BCH_FS_errors_fixed, &c->flags);
-		} else {
-			set_bit(BCH_FS_errors_not_fixed, &c->flags);
-			set_bit(BCH_FS_error, &c->flags);
+		/*
+		 * We don't yet track whether the filesystem currently has errors, for
+		 * log_fsck_err()s: that would require us to track for every error type
+		 * which recovery pass corrects it, to get the fsck exit status correct:
+		 */
+		if (flags & FSCK_CAN_FIX) {
+			if (ret == -BCH_ERR_fsck_fix) {
+				set_bit(BCH_FS_errors_fixed, &c->flags);
+			} else {
+				set_bit(BCH_FS_errors_not_fixed, &c->flags);
+				set_bit(BCH_FS_error, &c->flags);
+			}
 		}
 	}
 err:
-- 
2.49.0

Re: [PATCH bcachefs-testing] bcachefs: fix goto jumping over guard initializers

Posted by Bharadwaj Raju 8 months, 2 weeks ago

Now I'm actually not sure this is the best solution -- the comments in
include/linux/cleanup.h say that gotos and cleanup helpers should never be
mixed in the same function in the first place. So ideally we should not use
these guard helpers until we develop helpers for everything that
we want to clean up using gotos, like printbuf_exit, bch2_trans_put,
and so on, and then switch over everything at once.

Re: [PATCH bcachefs-testing] bcachefs: fix goto jumping over guard initializers

Posted by Kent Overstreet 8 months, 2 weeks ago

On Tue, May 27, 2025 at 11:09:00PM +0530, Bharadwaj Raju wrote:
> Now I'm actually not sure this is the best solution -- the comments in
> include/linux/cleanup.h say that gotos and cleanup helpers should never be
> mixed in the same function in the first place. So ideally we should not use
> these guard helpers until we develop helpers for everything that
> we want to clean up using gotos, like printbuf_exit, bch2_trans_put,
> and so on, and then switch over everything at once.

I believe that's too strict; a goto that jumps after a CLASS()
initialized object in the same scope clearly doesn't work, and probably
the same is true for a goto that jumps backwards over a CLASS()
initialization in the same scope (doesn't come up as much, haven't
checked).

But a goto that works like a break statement - jumping forwards out of a
scope with a CLASS() object should be fine, and likewise for gotos that
stay in the same scope and don't jump over CLASS() object initializers.

(gotos that jump backwards out of scopes, or forwards into scopes, are
really problematic for compilers to model; 'break' style gotos are much
easier for them).

So we should be ok with those rare cases fixed, and I'm running the full
test suite on this series with lockdep/kasan/leak checking enabled.

Agreed that it would be good to get everything converted. I've already
done printbufs and btree_trans objects, but we still need darrays,
wrappers for various device/filesystem refcounts (and the
for_aach_device() macros might be tricky to convert, but worthwhile),
and btree iterators.

Btree iterators have an exit function that takes a pointer to
btree_trans as well, and it didn't look like DEFINE_CLASS() could handle
that directly - will have to look at how the lock/unlock guards work.