From nobody Sun Feb 8 20:29:12 2026 Received: from out-172.mta0.migadu.com (out-172.mta0.migadu.com [91.218.175.172]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 8DA431F4E34 for ; Fri, 2 May 2025 20:00:16 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=91.218.175.172 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1746216020; cv=none; b=oqzESFVmpLDqCM1ogUtpVeHfoItcKy4ys4Eb1DMEMJyVVPC0v6AxKV4JqnXc8uzsXNstZ/ta35CV1XDEbbqfGFWQLsSI6QjsIUWIHGq9eSYxT1O6EKODZplspDjvoTP3S0JCuBKainAtTSG9EMNgZatZYHgkmtTXi39PzEGLPb0= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1746216020; c=relaxed/simple; bh=x9+BBsUWn5CT6heUB5Cn0XLl9xnBkRFy8cicf5/iK1w=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=EaDrOmi2Ao6ykhDy2wNvpIwoIVJRb5KYQPUOSBTh1LbEo19DVuVQAP0p6k5qh5/30tadGaHmyM3UvklH8tCYU2JPqh1de+nqSX1tYiwJ4SiKGxsHpKX3aC13NabI5VhzcPRtzdsv6hoYbAqBYZMSg2gxy4cMaOci6Nv1a4uFAEA= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev; spf=pass smtp.mailfrom=linux.dev; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b=Z/BPyKfG; arc=none smtp.client-ip=91.218.175.172 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=linux.dev Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b="Z/BPyKfG" X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and include these headers. DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.dev; s=key1; t=1746216014; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=ovDVNBJHIHUHnzYc0hryULdGfBYXL9xjKAwOPlwvfkw=; b=Z/BPyKfGQ0BpXJOwC2hKF4LUf8P/GRdFXO/7xOlMxdsYclhrIm0PNWqdhnHlG3bTPKccvu pUEjHwjIHET+RmGZnVdplBgY7Y7sh8Ys5KL23kTIwleDpT0CQVwoUJGIMaVN3gK/xSvnv+ sl4tNYmH66kpOfSyh+JNiqupAiCrMe0= From: Kent Overstreet To: linux-bcachefs@vger.kernel.org, linux-kernel@vger.kernel.org Cc: Kent Overstreet Subject: [PATCH 7/8] bcachefs: bcachefs_metadata_version_snapshot_deletion_v2 Date: Fri, 2 May 2025 15:59:59 -0400 Message-ID: <20250502200002.1309862-8-kent.overstreet@linux.dev> In-Reply-To: <20250502200002.1309862-1-kent.overstreet@linux.dev> References: <20250502200002.1309862-1-kent.overstreet@linux.dev> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Migadu-Flow: FLOW_OUT Content-Type: text/plain; charset="utf-8" We're going to be speeding up snapshot deletion, by only having it process the extents/dirents/xattrs btrees if an inode of a given snapshot ID was present. This raises the possibility of 'bkey_in_missing_snapshot' errors popping up, if we ever accidentally don't do the corresponding inode update, or if the new algorithm has bugs. So instead of deleting snapshot IDs, add a new deleted flag, so that 'key in missing snapshot' errors can more definitively tell what happened and automatically repair. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_format.h | 3 +- fs/bcachefs/sb-errors_format.h | 3 +- fs/bcachefs/snapshot.c | 80 ++++++++++++++++++++++++++-------- fs/bcachefs/snapshot.h | 25 ++++++++--- fs/bcachefs/snapshot_format.h | 2 +- fs/bcachefs/snapshot_types.h | 30 +++++++++++++ fs/bcachefs/subvolume_types.h | 27 ------------ 7 files changed, 116 insertions(+), 54 deletions(-) diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 7ce475c565b5..0beff6af7ecf 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -695,7 +695,8 @@ struct bch_sb_field_ext { x(stripe_backpointers, BCH_VERSION(1, 22)) \ x(stripe_lru, BCH_VERSION(1, 23)) \ x(casefolding, BCH_VERSION(1, 24)) \ - x(extent_flags, BCH_VERSION(1, 25)) + x(extent_flags, BCH_VERSION(1, 25)) \ + x(snapshot_deletion_v2, BCH_VERSION(1, 26)) =20 enum bcachefs_metadata_version { bcachefs_metadata_version_min =3D 9, diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 82bc1906aa00..448326c01d13 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -209,6 +209,7 @@ enum bch_fsck_flags { x(subvol_to_missing_root, 188, 0) \ x(subvol_root_wrong_bi_subvol, 189, FSCK_AUTOFIX) \ x(bkey_in_missing_snapshot, 190, 0) \ + x(bkey_in_deleted_snapshot, 315, 0) \ x(inode_pos_inode_nonzero, 191, 0) \ x(inode_pos_blockdev_range, 192, 0) \ x(inode_alloc_cursor_inode_bad, 301, 0) \ @@ -324,7 +325,7 @@ enum bch_fsck_flags { x(dirent_stray_data_after_cf_name, 305, 0) \ x(rebalance_work_incorrectly_set, 309, FSCK_AUTOFIX) \ x(rebalance_work_incorrectly_unset, 310, FSCK_AUTOFIX) \ - x(MAX, 315, 0) + x(MAX, 316, 0) =20 enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t =3D n, diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 7349f7f33a4f..f074b9de5024 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -314,7 +314,9 @@ static int __bch2_mark_snapshot(struct btree_trans *tra= ns, if (new.k->type =3D=3D KEY_TYPE_snapshot) { struct bkey_s_c_snapshot s =3D bkey_s_c_to_snapshot(new); =20 - t->live =3D true; + t->state =3D !BCH_SNAPSHOT_DELETED(s.v) + ? SNAPSHOT_ID_live + : SNAPSHOT_ID_deleted; t->parent =3D le32_to_cpu(s.v->parent); t->children[0] =3D le32_to_cpu(s.v->children[0]); t->children[1] =3D le32_to_cpu(s.v->children[1]); @@ -711,6 +713,9 @@ static int check_snapshot(struct btree_trans *trans, memset(&s, 0, sizeof(s)); memcpy(&s, k.v, min(sizeof(s), bkey_val_bytes(k.k))); =20 + if (BCH_SNAPSHOT_DELETED(&s)) + return 0; + id =3D le32_to_cpu(s.parent); if (id) { ret =3D bch2_snapshot_lookup(trans, id, &v); @@ -998,7 +1003,7 @@ int bch2_reconstruct_snapshots(struct bch_fs *c) snapshot_id_list_to_text(&buf, t); =20 darray_for_each(*t, id) { - if (fsck_err_on(!bch2_snapshot_exists(c, *id), + if (fsck_err_on(bch2_snapshot_id_state(c, *id) =3D=3D SNAPSHOT_ID_empty, trans, snapshot_node_missing, "snapshot node %u from tree %s missing, recreate?", *id, buf.buf)) { if (t->nr > 1) { @@ -1023,22 +1028,38 @@ int bch2_reconstruct_snapshots(struct bch_fs *c) return ret; } =20 -int bch2_check_key_has_snapshot(struct btree_trans *trans, - struct btree_iter *iter, - struct bkey_s_c k) +int __bch2_check_key_has_snapshot(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_s_c k) { struct bch_fs *c =3D trans->c; struct printbuf buf =3D PRINTBUF; int ret =3D 0; + enum snapshot_id_state state =3D bch2_snapshot_id_state(c, k.k->p.snapsho= t); + + /* Snapshot was definitively deleted, this error is marked autofix */ + if (fsck_err_on(state =3D=3D SNAPSHOT_ID_deleted, + trans, bkey_in_deleted_snapshot, + "key in deleted snapshot %s, delete?", + (bch2_btree_id_to_text(&buf, iter->btree_id), + prt_char(&buf, ' '), + bch2_bkey_val_to_text(&buf, c, k), buf.buf))) + ret =3D bch2_btree_delete_at(trans, iter, + BTREE_UPDATE_internal_snapshot_node) ?: 1; =20 - if (fsck_err_on(!bch2_snapshot_exists(c, k.k->p.snapshot), + /* + * Snapshot missing: we should have caught this with btree_lost_data and + * kicked off reconstruct_snapshots, so if we end up here we have no + * idea what happened: + */ + if (fsck_err_on(state =3D=3D SNAPSHOT_ID_empty, trans, bkey_in_missing_snapshot, "key in missing snapshot %s, delete?", (bch2_btree_id_to_text(&buf, iter->btree_id), prt_char(&buf, ' '), bch2_bkey_val_to_text(&buf, c, k), buf.buf))) ret =3D bch2_btree_delete_at(trans, iter, - BTREE_UPDATE_internal_snapshot_node) ?: 1; + BTREE_UPDATE_internal_snapshot_node) ?: 1; fsck_err: printbuf_exit(&buf); return ret; @@ -1085,24 +1106,25 @@ static int bch2_snapshot_node_delete(struct btree_t= rans *trans, u32 id) struct btree_iter iter, p_iter =3D {}; struct btree_iter c_iter =3D {}; struct btree_iter tree_iter =3D {}; - struct bkey_s_c_snapshot s; u32 parent_id, child_id; unsigned i; int ret =3D 0; =20 - s =3D bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_snapshots, POS(0, i= d), - BTREE_ITER_intent, snapshot); - ret =3D bkey_err(s); + struct bkey_i_snapshot *s =3D + bch2_bkey_get_mut_typed(trans, &iter, BTREE_ID_snapshots, POS(0, id), + BTREE_ITER_intent, snapshot); + ret =3D PTR_ERR_OR_ZERO(s); bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, "missing snapshot %u", id); =20 if (ret) goto err; =20 - BUG_ON(s.v->children[1]); + BUG_ON(BCH_SNAPSHOT_DELETED(&s->v)); + BUG_ON(s->v.children[1]); =20 - parent_id =3D le32_to_cpu(s.v->parent); - child_id =3D le32_to_cpu(s.v->children[0]); + parent_id =3D le32_to_cpu(s->v.parent); + child_id =3D le32_to_cpu(s->v.children[0]); =20 if (parent_id) { struct bkey_i_snapshot *parent; @@ -1160,24 +1182,38 @@ static int bch2_snapshot_node_delete(struct btree_t= rans *trans, u32 id) */ struct bkey_i_snapshot_tree *s_t; =20 - BUG_ON(s.v->children[1]); + BUG_ON(s->v.children[1]); =20 s_t =3D bch2_bkey_get_mut_typed(trans, &tree_iter, - BTREE_ID_snapshot_trees, POS(0, le32_to_cpu(s.v->tree)), + BTREE_ID_snapshot_trees, POS(0, le32_to_cpu(s->v.tree)), 0, snapshot_tree); ret =3D PTR_ERR_OR_ZERO(s_t); if (ret) goto err; =20 - if (s.v->children[0]) { - s_t->v.root_snapshot =3D s.v->children[0]; + if (s->v.children[0]) { + s_t->v.root_snapshot =3D s->v.children[0]; } else { s_t->k.type =3D KEY_TYPE_deleted; set_bkey_val_u64s(&s_t->k, 0); } } =20 - ret =3D bch2_btree_delete_at(trans, &iter, 0); + if (!bch2_request_incompat_feature(c, bcachefs_metadata_version_snapshot_= deletion_v2)) { + SET_BCH_SNAPSHOT_DELETED(&s->v, true); + s->v.parent =3D 0; + s->v.children[0] =3D 0; + s->v.children[1] =3D 0; + s->v.subvol =3D 0; + s->v.tree =3D 0; + s->v.depth =3D 0; + s->v.skip[0] =3D 0; + s->v.skip[1] =3D 0; + s->v.skip[2] =3D 0; + } else { + s->k.type =3D KEY_TYPE_deleted; + set_bkey_val_u64s(&s->k, 0); + } err: bch2_trans_iter_exit(trans, &tree_iter); bch2_trans_iter_exit(trans, &p_iter); @@ -1468,6 +1504,9 @@ static int check_should_delete_snapshot(struct btree_= trans *trans, struct bkey_s if (BCH_SNAPSHOT_SUBVOL(s.v)) return 0; =20 + if (BCH_SNAPSHOT_DELETED(s.v)) + return 0; + for (unsigned i =3D 0; i < 2; i++) { u32 child =3D le32_to_cpu(s.v->children[i]); =20 @@ -1524,6 +1563,9 @@ static int bch2_fix_child_of_deleted_snapshot(struct = btree_trans *trans, struct bkey_i_snapshot *s; int ret; =20 + if (!bch2_snapshot_exists(c, k.k->p.offset)) + return 0; + if (k.k->type !=3D KEY_TYPE_snapshot) return 0; =20 diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h index 24a451bb7024..69c484b77729 100644 --- a/fs/bcachefs/snapshot.h +++ b/fs/bcachefs/snapshot.h @@ -120,21 +120,26 @@ static inline u32 bch2_snapshot_root(struct bch_fs *c= , u32 id) return id; } =20 -static inline bool __bch2_snapshot_exists(struct bch_fs *c, u32 id) +static inline enum snapshot_id_state __bch2_snapshot_id_state(struct bch_f= s *c, u32 id) { const struct snapshot_t *s =3D snapshot_t(c, id); - return s ? s->live : 0; + return s ? s->state : SNAPSHOT_ID_empty; } =20 -static inline bool bch2_snapshot_exists(struct bch_fs *c, u32 id) +static inline enum snapshot_id_state bch2_snapshot_id_state(struct bch_fs = *c, u32 id) { rcu_read_lock(); - bool ret =3D __bch2_snapshot_exists(c, id); + enum snapshot_id_state ret =3D __bch2_snapshot_id_state(c, id); rcu_read_unlock(); =20 return ret; } =20 +static inline bool bch2_snapshot_exists(struct bch_fs *c, u32 id) +{ + return bch2_snapshot_id_state(c, id) =3D=3D SNAPSHOT_ID_live; +} + static inline int bch2_snapshot_is_internal_node(struct bch_fs *c, u32 id) { rcu_read_lock(); @@ -241,7 +246,17 @@ int bch2_snapshot_node_create(struct btree_trans *, u3= 2, int bch2_check_snapshot_trees(struct bch_fs *); int bch2_check_snapshots(struct bch_fs *); int bch2_reconstruct_snapshots(struct bch_fs *); -int bch2_check_key_has_snapshot(struct btree_trans *, struct btree_iter *,= struct bkey_s_c); + +int __bch2_check_key_has_snapshot(struct btree_trans *, struct btree_iter = *, struct bkey_s_c); + +static inline int bch2_check_key_has_snapshot(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_s_c k) +{ + return likely(bch2_snapshot_exists(trans->c, k.k->p.snapshot)) + ? 0 + : __bch2_check_key_has_snapshot(trans, iter, k); +} =20 int bch2_snapshot_node_set_deleted(struct btree_trans *, u32); =20 diff --git a/fs/bcachefs/snapshot_format.h b/fs/bcachefs/snapshot_format.h index 685a9fe209ab..9bccae1f3590 100644 --- a/fs/bcachefs/snapshot_format.h +++ b/fs/bcachefs/snapshot_format.h @@ -16,9 +16,9 @@ struct bch_snapshot { }; =20 LE32_BITMASK(BCH_SNAPSHOT_WILL_DELETE, struct bch_snapshot, flags, 0, 1) - /* True if a subvolume points to this snapshot node: */ LE32_BITMASK(BCH_SNAPSHOT_SUBVOL, struct bch_snapshot, flags, 1, 2) +LE32_BITMASK(BCH_SNAPSHOT_DELETED, struct bch_snapshot, flags, 2, 3) =20 /* * Snapshot trees: diff --git a/fs/bcachefs/snapshot_types.h b/fs/bcachefs/snapshot_types.h index 39fb47f43183..a64f4b942655 100644 --- a/fs/bcachefs/snapshot_types.h +++ b/fs/bcachefs/snapshot_types.h @@ -3,8 +3,38 @@ #define _BCACHEFS_SNAPSHOT_TYPES_H =20 #include "bbpos_types.h" +#include "darray.h" #include "subvolume_types.h" =20 +typedef DARRAY(u32) snapshot_id_list; + +#define IS_ANCESTOR_BITMAP 128 + +struct snapshot_t { + enum snapshot_id_state { + SNAPSHOT_ID_empty, + SNAPSHOT_ID_live, + SNAPSHOT_ID_deleted, + } state; + u32 parent; + u32 skip[3]; + u32 depth; + u32 children[2]; + u32 subvol; /* Nonzero only if a subvolume points to this node: */ + u32 tree; + unsigned long is_ancestor[BITS_TO_LONGS(IS_ANCESTOR_BITMAP)]; +}; + +struct snapshot_table { + struct rcu_head rcu; + size_t nr; +#ifndef RUST_BINDGEN + DECLARE_FLEX_ARRAY(struct snapshot_t, s); +#else + struct snapshot_t s[0]; +#endif +}; + struct snapshot_interior_delete { u32 id; u32 live_child; diff --git a/fs/bcachefs/subvolume_types.h b/fs/bcachefs/subvolume_types.h index 1549d6daf7af..9d634b906dcd 100644 --- a/fs/bcachefs/subvolume_types.h +++ b/fs/bcachefs/subvolume_types.h @@ -2,33 +2,6 @@ #ifndef _BCACHEFS_SUBVOLUME_TYPES_H #define _BCACHEFS_SUBVOLUME_TYPES_H =20 -#include "darray.h" - -typedef DARRAY(u32) snapshot_id_list; - -#define IS_ANCESTOR_BITMAP 128 - -struct snapshot_t { - bool live; - u32 parent; - u32 skip[3]; - u32 depth; - u32 children[2]; - u32 subvol; /* Nonzero only if a subvolume points to this node: */ - u32 tree; - unsigned long is_ancestor[BITS_TO_LONGS(IS_ANCESTOR_BITMAP)]; -}; - -struct snapshot_table { - struct rcu_head rcu; - size_t nr; -#ifndef RUST_BINDGEN - DECLARE_FLEX_ARRAY(struct snapshot_t, s); -#else - struct snapshot_t s[0]; -#endif -}; - typedef struct { /* we can't have padding in this struct: */ u64 subvol; --=20 2.49.0