This patch adds base support for atomic writes via statx getattr.
On bs < ps systems, we can create FS with say bs of 16k. That means
both atomic write min and max unit can be set to 16k for supporting
atomic writes.
Later patches adds support for bigalloc as well so that ext4 can also
support doing atomic writes for bs = ps systems.
Co-developed-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>
Signed-off-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>
Signed-off-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
---
fs/ext4/ext4.h | 7 ++++++-
fs/ext4/inode.c | 14 ++++++++++++++
fs/ext4/super.c | 32 ++++++++++++++++++++++++++++++++
3 files changed, 52 insertions(+), 1 deletion(-)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 44b0d418143c..a41e56c2c628 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1729,6 +1729,10 @@ struct ext4_sb_info {
*/
struct work_struct s_sb_upd_work;
+ /* Atomic write unit values */
+ unsigned int fs_awu_min;
+ unsigned int fs_awu_max;
+
/* Ext4 fast commit sub transaction ID */
atomic_t s_fc_subtid;
@@ -1820,7 +1824,8 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
*/
enum {
EXT4_MF_MNTDIR_SAMPLED,
- EXT4_MF_FC_INELIGIBLE /* Fast commit ineligible */
+ EXT4_MF_FC_INELIGIBLE, /* Fast commit ineligible */
+ EXT4_MF_ATOMIC_WRITE /* Supports atomic write */
};
static inline void ext4_set_mount_flag(struct super_block *sb, int bit)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 54bdd4884fe6..897c028d5bc9 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5578,6 +5578,20 @@ int ext4_getattr(struct mnt_idmap *idmap, const struct path *path,
}
}
+ if (S_ISREG(inode->i_mode) && (request_mask & STATX_WRITE_ATOMIC)) {
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ unsigned int awu_min, awu_max;
+
+ if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_ATOMIC_WRITE)) {
+ awu_min = sbi->fs_awu_min;
+ awu_max = sbi->fs_awu_max;
+ } else {
+ awu_min = awu_max = 0;
+ }
+
+ generic_fill_statx_atomic_writes(stat, awu_min, awu_max);
+ }
+
flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
if (flags & EXT4_APPEND_FL)
stat->attributes |= STATX_ATTR_APPEND;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 16a4ce704460..f5c075aff060 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4425,6 +4425,37 @@ static int ext4_handle_clustersize(struct super_block *sb)
return 0;
}
+/*
+ * ext4_atomic_write_init: Initializes filesystem min & max atomic write units.
+ * @sb: super block
+ * TODO: Later add support for bigalloc
+ */
+static void ext4_atomic_write_init(struct super_block *sb)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct block_device *bdev = sb->s_bdev;
+
+ if (!bdev_can_atomic_write(bdev))
+ return;
+
+ if (!ext4_has_feature_extents(sb))
+ return;
+
+ sbi->fs_awu_min = max(sb->s_blocksize,
+ bdev_atomic_write_unit_min_bytes(bdev));
+ sbi->fs_awu_max = min(sb->s_blocksize,
+ bdev_atomic_write_unit_max_bytes(bdev));
+ if (sbi->fs_awu_min && sbi->fs_awu_max &&
+ sbi->fs_awu_min <= sbi->fs_awu_max) {
+ ext4_set_mount_flag(sb, EXT4_MF_ATOMIC_WRITE);
+ ext4_msg(sb, KERN_NOTICE, "Supports atomic writes awu_min: %u, awu_max: %u",
+ sbi->fs_awu_min, sbi->fs_awu_max);
+ } else {
+ sbi->fs_awu_min = 0;
+ sbi->fs_awu_max = 0;
+ }
+}
+
static void ext4_fast_commit_init(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -5336,6 +5367,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
spin_lock_init(&sbi->s_bdev_wb_lock);
+ ext4_atomic_write_init(sb);
ext4_fast_commit_init(sb);
sb->s_root = NULL;
--
2.46.0
On 25/10/2024 04:45, Ritesh Harjani (IBM) wrote: > This patch adds base support for atomic writes via statx getattr. > On bs < ps systems, we can create FS with say bs of 16k. That means > both atomic write min and max unit can be set to 16k for supporting > atomic writes. > > Later patches adds support for bigalloc as well so that ext4 can also > support doing atomic writes for bs = ps systems. > > Co-developed-by: Ojaswin Mujoo <ojaswin@linux.ibm.com> > Signed-off-by: Ojaswin Mujoo <ojaswin@linux.ibm.com> > Signed-off-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com> > --- > fs/ext4/ext4.h | 7 ++++++- > fs/ext4/inode.c | 14 ++++++++++++++ > fs/ext4/super.c | 32 ++++++++++++++++++++++++++++++++ > 3 files changed, 52 insertions(+), 1 deletion(-) > > diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h > index 44b0d418143c..a41e56c2c628 100644 > --- a/fs/ext4/ext4.h > +++ b/fs/ext4/ext4.h > @@ -1729,6 +1729,10 @@ struct ext4_sb_info { > */ > struct work_struct s_sb_upd_work; > > + /* Atomic write unit values */ > + unsigned int fs_awu_min; > + unsigned int fs_awu_max; > + > /* Ext4 fast commit sub transaction ID */ > atomic_t s_fc_subtid; > > @@ -1820,7 +1824,8 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) > */ > enum { > EXT4_MF_MNTDIR_SAMPLED, > - EXT4_MF_FC_INELIGIBLE /* Fast commit ineligible */ > + EXT4_MF_FC_INELIGIBLE, /* Fast commit ineligible */ > + EXT4_MF_ATOMIC_WRITE /* Supports atomic write */ Does this flag really buy us much? > }; > > static inline void ext4_set_mount_flag(struct super_block *sb, int bit) > diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c > index 54bdd4884fe6..897c028d5bc9 100644 > --- a/fs/ext4/inode.c > +++ b/fs/ext4/inode.c > @@ -5578,6 +5578,20 @@ int ext4_getattr(struct mnt_idmap *idmap, const struct path *path, > } > } > > + if (S_ISREG(inode->i_mode) && (request_mask & STATX_WRITE_ATOMIC)) { > + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); > + unsigned int awu_min, awu_max; > + > + if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_ATOMIC_WRITE)) { I'd use ext4_inode_can_atomicwrite() here, similar to what is done for xfs > + awu_min = sbi->fs_awu_min; > + awu_max = sbi->fs_awu_max; > + } else { > + awu_min = awu_max = 0; > + } > + > + generic_fill_statx_atomic_writes(stat, awu_min, awu_max); > + } > + > flags = ei->i_flags & EXT4_FL_USER_VISIBLE; > if (flags & EXT4_APPEND_FL) > stat->attributes |= STATX_ATTR_APPEND; > diff --git a/fs/ext4/super.c b/fs/ext4/super.c > index 16a4ce704460..f5c075aff060 100644 > --- a/fs/ext4/super.c > +++ b/fs/ext4/super.c > @@ -4425,6 +4425,37 @@ static int ext4_handle_clustersize(struct super_block *sb) > return 0; > } > > +/*
John Garry <john.g.garry@oracle.com> writes: > On 25/10/2024 04:45, Ritesh Harjani (IBM) wrote: >> This patch adds base support for atomic writes via statx getattr. >> On bs < ps systems, we can create FS with say bs of 16k. That means >> both atomic write min and max unit can be set to 16k for supporting >> atomic writes. >> >> Later patches adds support for bigalloc as well so that ext4 can also >> support doing atomic writes for bs = ps systems. >> >> Co-developed-by: Ojaswin Mujoo <ojaswin@linux.ibm.com> >> Signed-off-by: Ojaswin Mujoo <ojaswin@linux.ibm.com> >> Signed-off-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com> >> --- >> fs/ext4/ext4.h | 7 ++++++- >> fs/ext4/inode.c | 14 ++++++++++++++ >> fs/ext4/super.c | 32 ++++++++++++++++++++++++++++++++ >> 3 files changed, 52 insertions(+), 1 deletion(-) >> >> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h >> index 44b0d418143c..a41e56c2c628 100644 >> --- a/fs/ext4/ext4.h >> +++ b/fs/ext4/ext4.h >> @@ -1729,6 +1729,10 @@ struct ext4_sb_info { >> */ >> struct work_struct s_sb_upd_work; >> >> + /* Atomic write unit values */ >> + unsigned int fs_awu_min; >> + unsigned int fs_awu_max; >> + >> /* Ext4 fast commit sub transaction ID */ >> atomic_t s_fc_subtid; >> >> @@ -1820,7 +1824,8 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) >> */ >> enum { >> EXT4_MF_MNTDIR_SAMPLED, >> - EXT4_MF_FC_INELIGIBLE /* Fast commit ineligible */ >> + EXT4_MF_FC_INELIGIBLE, /* Fast commit ineligible */ >> + EXT4_MF_ATOMIC_WRITE /* Supports atomic write */ > > Does this flag really buy us much? > I felt it is cleaner this way than comparing non-zero values of fs_awu_min and fs_awu_max. Now that you pointed at it - Maybe a question for others who might have the history of which one to use when - or do we think there is a scope of merging the two into just one as a later cleanup? I know that s_mount_flags was added for fastcommit and it needed the state manipulations to be done in atomic way. Similarly s_ext4_flags also was renamed from s_resize_flags for more general purpose use. Both of these looks like could be merged isn't it? >> }; >> >> static inline void ext4_set_mount_flag(struct super_block *sb, int bit) >> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c >> index 54bdd4884fe6..897c028d5bc9 100644 >> --- a/fs/ext4/inode.c >> +++ b/fs/ext4/inode.c >> @@ -5578,6 +5578,20 @@ int ext4_getattr(struct mnt_idmap *idmap, const struct path *path, >> } >> } >> >> + if (S_ISREG(inode->i_mode) && (request_mask & STATX_WRITE_ATOMIC)) { >> + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); >> + unsigned int awu_min, awu_max; >> + >> + if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_ATOMIC_WRITE)) { > > I'd use ext4_inode_can_atomicwrite() here, similar to what is done for xfs > Sure since it is inode operation, we can check against ext4_inode_can_atomicwrite(). >> + awu_min = sbi->fs_awu_min; >> + awu_max = sbi->fs_awu_max; >> + } else { >> + awu_min = awu_max = 0; >> + } >> + >> + generic_fill_statx_atomic_writes(stat, awu_min, awu_max); >> + } >> + >> flags = ei->i_flags & EXT4_FL_USER_VISIBLE; >> if (flags & EXT4_APPEND_FL) >> stat->attributes |= STATX_ATTR_APPEND; >> diff --git a/fs/ext4/super.c b/fs/ext4/super.c >> index 16a4ce704460..f5c075aff060 100644 >> --- a/fs/ext4/super.c >> +++ b/fs/ext4/super.c >> @@ -4425,6 +4425,37 @@ static int ext4_handle_clustersize(struct super_block *sb) >> return 0; >> } >> >> +/*
On Fri, Oct 25, 2024 at 03:38:03PM +0530, Ritesh Harjani wrote: > John Garry <john.g.garry@oracle.com> writes: > > > On 25/10/2024 04:45, Ritesh Harjani (IBM) wrote: > >> This patch adds base support for atomic writes via statx getattr. > >> On bs < ps systems, we can create FS with say bs of 16k. That means > >> both atomic write min and max unit can be set to 16k for supporting > >> atomic writes. > >> > >> Later patches adds support for bigalloc as well so that ext4 can also > >> support doing atomic writes for bs = ps systems. > >> > >> Co-developed-by: Ojaswin Mujoo <ojaswin@linux.ibm.com> > >> Signed-off-by: Ojaswin Mujoo <ojaswin@linux.ibm.com> > >> Signed-off-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com> > >> --- > >> fs/ext4/ext4.h | 7 ++++++- > >> fs/ext4/inode.c | 14 ++++++++++++++ > >> fs/ext4/super.c | 32 ++++++++++++++++++++++++++++++++ > >> 3 files changed, 52 insertions(+), 1 deletion(-) > >> > >> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h > >> index 44b0d418143c..a41e56c2c628 100644 > >> --- a/fs/ext4/ext4.h > >> +++ b/fs/ext4/ext4.h > >> @@ -1729,6 +1729,10 @@ struct ext4_sb_info { > >> */ > >> struct work_struct s_sb_upd_work; > >> > >> + /* Atomic write unit values */ > >> + unsigned int fs_awu_min; > >> + unsigned int fs_awu_max; > >> + > >> /* Ext4 fast commit sub transaction ID */ > >> atomic_t s_fc_subtid; > >> > >> @@ -1820,7 +1824,8 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) > >> */ > >> enum { > >> EXT4_MF_MNTDIR_SAMPLED, > >> - EXT4_MF_FC_INELIGIBLE /* Fast commit ineligible */ > >> + EXT4_MF_FC_INELIGIBLE, /* Fast commit ineligible */ > >> + EXT4_MF_ATOMIC_WRITE /* Supports atomic write */ > > > > Does this flag really buy us much? > > > > I felt it is cleaner this way than comparing non-zero values of > fs_awu_min and fs_awu_max. What does it mean when MF_ATOMIC_WRITE is set and fs_awu_* are zero? The awu values don't change at runtime, so I think you can save yourself an atomic test by checking (non-atomically) for awu_min>0. (I don't know anything about the flags, those came after my time iirc.) --D > Now that you pointed at it - Maybe a question for others who might have > the history of which one to use when - or do we think there is a scope > of merging the two into just one as a later cleanup? > > I know that s_mount_flags was added for fastcommit and it needed the > state manipulations to be done in atomic way. Similarly s_ext4_flags > also was renamed from s_resize_flags for more general purpose use. Both > of these looks like could be merged isn't it? > > > > >> }; > >> > >> static inline void ext4_set_mount_flag(struct super_block *sb, int bit) > >> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c > >> index 54bdd4884fe6..897c028d5bc9 100644 > >> --- a/fs/ext4/inode.c > >> +++ b/fs/ext4/inode.c > >> @@ -5578,6 +5578,20 @@ int ext4_getattr(struct mnt_idmap *idmap, const struct path *path, > >> } > >> } > >> > >> + if (S_ISREG(inode->i_mode) && (request_mask & STATX_WRITE_ATOMIC)) { > >> + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); > >> + unsigned int awu_min, awu_max; > >> + > >> + if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_ATOMIC_WRITE)) { > > > > I'd use ext4_inode_can_atomicwrite() here, similar to what is done for xfs > > > > Sure since it is inode operation, we can check against ext4_inode_can_atomicwrite(). > > > >> + awu_min = sbi->fs_awu_min; > >> + awu_max = sbi->fs_awu_max; > >> + } else { > >> + awu_min = awu_max = 0; > >> + } > >> + > >> + generic_fill_statx_atomic_writes(stat, awu_min, awu_max); > >> + } > >> + > >> flags = ei->i_flags & EXT4_FL_USER_VISIBLE; > >> if (flags & EXT4_APPEND_FL) > >> stat->attributes |= STATX_ATTR_APPEND; > >> diff --git a/fs/ext4/super.c b/fs/ext4/super.c > >> index 16a4ce704460..f5c075aff060 100644 > >> --- a/fs/ext4/super.c > >> +++ b/fs/ext4/super.c > >> @@ -4425,6 +4425,37 @@ static int ext4_handle_clustersize(struct super_block *sb) > >> return 0; > >> } > >> > >> +/* >
"Darrick J. Wong" <djwong@kernel.org> writes: > On Fri, Oct 25, 2024 at 03:38:03PM +0530, Ritesh Harjani wrote: >> John Garry <john.g.garry@oracle.com> writes: >> >> > On 25/10/2024 04:45, Ritesh Harjani (IBM) wrote: >> >> This patch adds base support for atomic writes via statx getattr. >> >> On bs < ps systems, we can create FS with say bs of 16k. That means >> >> both atomic write min and max unit can be set to 16k for supporting >> >> atomic writes. >> >> >> >> Later patches adds support for bigalloc as well so that ext4 can also >> >> support doing atomic writes for bs = ps systems. >> >> >> >> Co-developed-by: Ojaswin Mujoo <ojaswin@linux.ibm.com> >> >> Signed-off-by: Ojaswin Mujoo <ojaswin@linux.ibm.com> >> >> Signed-off-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com> >> >> --- >> >> fs/ext4/ext4.h | 7 ++++++- >> >> fs/ext4/inode.c | 14 ++++++++++++++ >> >> fs/ext4/super.c | 32 ++++++++++++++++++++++++++++++++ >> >> 3 files changed, 52 insertions(+), 1 deletion(-) >> >> >> >> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h >> >> index 44b0d418143c..a41e56c2c628 100644 >> >> --- a/fs/ext4/ext4.h >> >> +++ b/fs/ext4/ext4.h >> >> @@ -1729,6 +1729,10 @@ struct ext4_sb_info { >> >> */ >> >> struct work_struct s_sb_upd_work; >> >> >> >> + /* Atomic write unit values */ >> >> + unsigned int fs_awu_min; >> >> + unsigned int fs_awu_max; >> >> + >> >> /* Ext4 fast commit sub transaction ID */ >> >> atomic_t s_fc_subtid; >> >> >> >> @@ -1820,7 +1824,8 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) >> >> */ >> >> enum { >> >> EXT4_MF_MNTDIR_SAMPLED, >> >> - EXT4_MF_FC_INELIGIBLE /* Fast commit ineligible */ >> >> + EXT4_MF_FC_INELIGIBLE, /* Fast commit ineligible */ >> >> + EXT4_MF_ATOMIC_WRITE /* Supports atomic write */ >> > >> > Does this flag really buy us much? >> > >> >> I felt it is cleaner this way than comparing non-zero values of >> fs_awu_min and fs_awu_max. > > What does it mean when MF_ATOMIC_WRITE is set and fs_awu_* are zero? > The awu values don't change at runtime, so I think you can save yourself > an atomic test by checking (non-atomically) for awu_min>0. Sure. I agree with the reasoning that we can just check for awu_min > 0. I can write an inline helper for this. > > (I don't know anything about the flags, those came after my time iirc.) > Thanks for the review :) -ritesh > --D > >> Now that you pointed at it - Maybe a question for others who might have >> the history of which one to use when - or do we think there is a scope >> of merging the two into just one as a later cleanup? >> >> I know that s_mount_flags was added for fastcommit and it needed the >> state manipulations to be done in atomic way. Similarly s_ext4_flags >> also was renamed from s_resize_flags for more general purpose use. Both >> of these looks like could be merged isn't it? >> >> >> >> >> }; >> >> >> >> static inline void ext4_set_mount_flag(struct super_block *sb, int bit) >> >> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c >> >> index 54bdd4884fe6..897c028d5bc9 100644 >> >> --- a/fs/ext4/inode.c >> >> +++ b/fs/ext4/inode.c >> >> @@ -5578,6 +5578,20 @@ int ext4_getattr(struct mnt_idmap *idmap, const struct path *path, >> >> } >> >> } >> >> >> >> + if (S_ISREG(inode->i_mode) && (request_mask & STATX_WRITE_ATOMIC)) { >> >> + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); >> >> + unsigned int awu_min, awu_max; >> >> + >> >> + if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_ATOMIC_WRITE)) { >> > >> > I'd use ext4_inode_can_atomicwrite() here, similar to what is done for xfs >> > >> >> Sure since it is inode operation, we can check against ext4_inode_can_atomicwrite(). >> >> >> >> + awu_min = sbi->fs_awu_min; >> >> + awu_max = sbi->fs_awu_max; >> >> + } else { >> >> + awu_min = awu_max = 0; >> >> + } >> >> + >> >> + generic_fill_statx_atomic_writes(stat, awu_min, awu_max); >> >> + } >> >> + >> >> flags = ei->i_flags & EXT4_FL_USER_VISIBLE; >> >> if (flags & EXT4_APPEND_FL) >> >> stat->attributes |= STATX_ATTR_APPEND; >> >> diff --git a/fs/ext4/super.c b/fs/ext4/super.c >> >> index 16a4ce704460..f5c075aff060 100644 >> >> --- a/fs/ext4/super.c >> >> +++ b/fs/ext4/super.c >> >> @@ -4425,6 +4425,37 @@ static int ext4_handle_clustersize(struct super_block *sb) >> >> return 0; >> >> } >> >> >> >> +/* >>
© 2016 - 2024 Red Hat, Inc.