fs/f2fs/data.c | 5 +++++ fs/f2fs/extent_cache.c | 45 +++++++++++++++++++++++++++++++++++++++++- fs/f2fs/f2fs.h | 5 +++++ fs/f2fs/file.c | 36 +++++++++++++++++++++++++++++---- fs/f2fs/inode.c | 19 +++++++++++++++++- fs/f2fs/super.c | 4 ++++ fs/f2fs/sysfs.c | 2 ++ 7 files changed, 110 insertions(+), 6 deletions(-)
From: Daeho Jeong <daehojeong@google.com>
F2FS should understand how the device aliasing file works and support
deleting the file after use. A device aliasing file can be created by
mkfs.f2fs tool and it can map the whole device with an extrent, not
using node blocks. The file space should be pinned and normally used for
read-only usages.
Signed-off-by: Daeho Jeong <daehojeong@google.com>
Signed-off-by: Chao Yu <chao@kernel.org>
---
v4: added file pinning check in sanity check
v3: merged Chao's extent cache sanity check.
prevented device aliasing support with noextent mount option
v2: changed the position of f2fs_destroy_extent_tree() only for device
aliasing files
---
fs/f2fs/data.c | 5 +++++
fs/f2fs/extent_cache.c | 45 +++++++++++++++++++++++++++++++++++++++++-
fs/f2fs/f2fs.h | 5 +++++
fs/f2fs/file.c | 36 +++++++++++++++++++++++++++++----
fs/f2fs/inode.c | 19 +++++++++++++++++-
fs/f2fs/super.c | 4 ++++
fs/f2fs/sysfs.c | 2 ++
7 files changed, 110 insertions(+), 6 deletions(-)
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index b94cf6eea2f9..385b46e62ede 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -3441,6 +3441,11 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
if (!f2fs_lookup_read_extent_cache_block(inode, index,
&dn.data_blkaddr)) {
+ if (IS_DEVICE_ALIASING(inode)) {
+ err = -ENODATA;
+ goto out;
+ }
+
if (locked) {
err = f2fs_reserve_block(&dn, index);
goto out;
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 62ac440d9416..019c1f7b7fa5 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -24,6 +24,7 @@ bool sanity_check_extent_cache(struct inode *inode, struct page *ipage)
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext;
struct extent_info ei;
+ int devi;
get_read_extent_info(&ei, i_ext);
@@ -38,7 +39,36 @@ bool sanity_check_extent_cache(struct inode *inode, struct page *ipage)
ei.blk, ei.fofs, ei.len);
return false;
}
- return true;
+
+ if (!IS_DEVICE_ALIASING(inode))
+ return true;
+
+ for (devi = 0; devi < sbi->s_ndevs; devi++) {
+ if (FDEV(devi).start_blk != ei.blk ||
+ FDEV(devi).end_blk != ei.blk + ei.len - 1)
+ continue;
+
+ if (devi == 0) {
+ f2fs_warn(sbi,
+ "%s: inode (ino=%lx) is an alias of meta device",
+ __func__, inode->i_ino);
+ return false;
+ }
+
+ if (bdev_is_zoned(FDEV(devi).bdev)) {
+ f2fs_warn(sbi,
+ "%s: device alias inode (ino=%lx)'s extent info "
+ "[%u, %u, %u] maps to zoned block device",
+ __func__, inode->i_ino, ei.blk, ei.fofs, ei.len);
+ return false;
+ }
+ return true;
+ }
+
+ f2fs_warn(sbi, "%s: device alias inode (ino=%lx)'s extent info "
+ "[%u, %u, %u] is inconsistent w/ any devices",
+ __func__, inode->i_ino, ei.blk, ei.fofs, ei.len);
+ return false;
}
static void __set_extent_info(struct extent_info *ei,
@@ -76,6 +106,9 @@ static bool __init_may_extent_tree(struct inode *inode, enum extent_type type)
static bool __may_extent_tree(struct inode *inode, enum extent_type type)
{
+ if (IS_DEVICE_ALIASING(inode) && type == EX_READ)
+ return true;
+
/*
* for recovered files during mount do not create extents
* if shrinker is not registered.
@@ -401,6 +434,11 @@ void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage)
if (atomic_read(&et->node_cnt) || !ei.len)
goto skip;
+ if (IS_DEVICE_ALIASING(inode)) {
+ et->largest = ei;
+ goto skip;
+ }
+
en = __attach_extent_node(sbi, et, &ei, NULL,
&et->root.rb_root.rb_node, true);
if (en) {
@@ -463,6 +501,11 @@ static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
goto out;
}
+ if (IS_DEVICE_ALIASING(inode)) {
+ ret = false;
+ goto out;
+ }
+
en = __lookup_extent_node(&et->root, et->cached_en, pgofs);
if (!en)
goto out;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 33f5449dc22d..b6ba22a1da47 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -213,6 +213,7 @@ struct f2fs_mount_info {
#define F2FS_FEATURE_CASEFOLD 0x00001000
#define F2FS_FEATURE_COMPRESSION 0x00002000
#define F2FS_FEATURE_RO 0x00004000
+#define F2FS_FEATURE_DEVICE_ALIAS 0x00008000
#define __F2FS_HAS_FEATURE(raw_super, mask) \
((raw_super->feature & cpu_to_le32(mask)) != 0)
@@ -3046,6 +3047,7 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr)
#define F2FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
#define F2FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
#define F2FS_CASEFOLD_FL 0x40000000 /* Casefolded file */
+#define F2FS_DEVICE_ALIAS_FL 0x80000000 /* File for aliasing a device */
#define F2FS_QUOTA_DEFAULT_FL (F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL)
@@ -3061,6 +3063,8 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr)
/* Flags that are appropriate for non-directories/regular files. */
#define F2FS_OTHER_FLMASK (F2FS_NODUMP_FL | F2FS_NOATIME_FL)
+#define IS_DEVICE_ALIASING(inode) (F2FS_I(inode)->i_flags & F2FS_DEVICE_ALIAS_FL)
+
static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags)
{
if (S_ISDIR(mode))
@@ -4510,6 +4514,7 @@ F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM);
F2FS_FEATURE_FUNCS(casefold, CASEFOLD);
F2FS_FEATURE_FUNCS(compression, COMPRESSION);
F2FS_FEATURE_FUNCS(readonly, RO);
+F2FS_FEATURE_FUNCS(device_alias, DEVICE_ALIAS);
#ifdef CONFIG_BLK_DEV_ZONED
static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi,
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 99903eafa7fe..f2d2d84d025b 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -725,6 +725,11 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock)
trace_f2fs_truncate_blocks_enter(inode, from);
+ if (IS_DEVICE_ALIASING(inode) && from) {
+ err = -EINVAL;
+ goto out_err;
+ }
+
free_from = (pgoff_t)F2FS_BLK_ALIGN(from);
if (free_from >= max_file_blocks(inode))
@@ -739,6 +744,21 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock)
goto out;
}
+ if (IS_DEVICE_ALIASING(inode)) {
+ struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ];
+ struct extent_info ei = et->largest;
+ unsigned int i;
+
+ for (i = 0; i < ei.len; i++)
+ f2fs_invalidate_blocks(sbi, ei.blk + i);
+
+ dec_valid_block_count(sbi, inode, ei.len);
+ f2fs_update_time(sbi, REQ_TIME);
+
+ f2fs_put_page(ipage, 1);
+ goto out;
+ }
+
if (f2fs_has_inline_data(inode)) {
f2fs_truncate_inline_inode(inode, ipage, from);
f2fs_put_page(ipage, 1);
@@ -774,7 +794,7 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock)
/* lastly zero out the first data page */
if (!err)
err = truncate_partial_data_page(inode, from, truncate_page);
-
+out_err:
trace_f2fs_truncate_blocks_exit(inode, err);
return err;
}
@@ -992,7 +1012,8 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
return -EPERM;
if ((attr->ia_valid & ATTR_SIZE)) {
- if (!f2fs_is_compress_backend_ready(inode))
+ if (!f2fs_is_compress_backend_ready(inode) ||
+ IS_DEVICE_ALIASING(inode))
return -EOPNOTSUPP;
if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) &&
!IS_ALIGNED(attr->ia_size,
@@ -1860,7 +1881,7 @@ static long f2fs_fallocate(struct file *file, int mode,
return -EIO;
if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode)))
return -ENOSPC;
- if (!f2fs_is_compress_backend_ready(inode))
+ if (!f2fs_is_compress_backend_ready(inode) || IS_DEVICE_ALIASING(inode))
return -EOPNOTSUPP;
/* f2fs only support ->fallocate for regular file */
@@ -3296,6 +3317,9 @@ int f2fs_pin_file_control(struct inode *inode, bool inc)
struct f2fs_inode_info *fi = F2FS_I(inode);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ if (IS_DEVICE_ALIASING(inode))
+ return -EINVAL;
+
if (fi->i_gc_failures >= sbi->gc_pin_file_threshold) {
f2fs_warn(sbi, "%s: Enable GC = ino %lx after %x GC trials",
__func__, inode->i_ino, fi->i_gc_failures);
@@ -3326,6 +3350,9 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
if (f2fs_readonly(sbi->sb))
return -EROFS;
+ if (!pin && IS_DEVICE_ALIASING(inode))
+ return -EOPNOTSUPP;
+
ret = mnt_want_write_file(filp);
if (ret)
return ret;
@@ -4764,7 +4791,8 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter,
else
return 0;
- map.m_may_create = true;
+ if (!IS_DEVICE_ALIASING(inode))
+ map.m_may_create = true;
if (dio) {
map.m_seg_type = f2fs_rw_hint_to_seg_type(sbi,
inode->i_write_hint);
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 1ed86df343a5..e2d30fc79644 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -372,6 +372,19 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
return false;
}
+ if (fi->i_flags & F2FS_DEVICE_ALIAS_FL) {
+ if (!f2fs_sb_has_device_alias(sbi)) {
+ f2fs_warn(sbi, "%s: inode (ino=%lx) has device alias flag, but the feature is off",
+ __func__, inode->i_ino);
+ return false;
+ }
+ if (!f2fs_is_pinned_file(inode)) {
+ f2fs_warn(sbi, "%s: inode (ino=%lx) has device alias flag, but is not pinned",
+ __func__, inode->i_ino);
+ return false;
+ }
+ }
+
return true;
}
@@ -823,7 +836,8 @@ void f2fs_evict_inode(struct inode *inode)
f2fs_bug_on(sbi, get_dirty_pages(inode));
f2fs_remove_dirty_inode(inode);
- f2fs_destroy_extent_tree(inode);
+ if (!IS_DEVICE_ALIASING(inode))
+ f2fs_destroy_extent_tree(inode);
if (inode->i_nlink || is_bad_inode(inode))
goto no_delete;
@@ -879,6 +893,9 @@ void f2fs_evict_inode(struct inode *inode)
goto retry;
}
+ if (IS_DEVICE_ALIASING(inode))
+ f2fs_destroy_extent_tree(inode);
+
if (err) {
f2fs_update_inode_page(inode);
if (dquot_initialize_needed(inode))
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index fc2c586c7619..95097498b544 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -834,6 +834,10 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
set_opt(sbi, READ_EXTENT_CACHE);
break;
case Opt_noextent_cache:
+ if (F2FS_HAS_FEATURE(sbi, F2FS_FEATURE_DEVICE_ALIAS)) {
+ f2fs_err(sbi, "device aliasing requires extent cache");
+ return -EINVAL;
+ }
clear_opt(sbi, READ_EXTENT_CACHE);
break;
case Opt_noinline_data:
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index c56e8c873935..e51304bc65ea 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -1313,6 +1313,7 @@ F2FS_SB_FEATURE_RO_ATTR(sb_checksum, SB_CHKSUM);
F2FS_SB_FEATURE_RO_ATTR(casefold, CASEFOLD);
F2FS_SB_FEATURE_RO_ATTR(compression, COMPRESSION);
F2FS_SB_FEATURE_RO_ATTR(readonly, RO);
+F2FS_SB_FEATURE_RO_ATTR(device_alias, DEVICE_ALIAS);
static struct attribute *f2fs_sb_feat_attrs[] = {
ATTR_LIST(sb_encryption),
@@ -1329,6 +1330,7 @@ static struct attribute *f2fs_sb_feat_attrs[] = {
ATTR_LIST(sb_casefold),
ATTR_LIST(sb_compression),
ATTR_LIST(sb_readonly),
+ ATTR_LIST(sb_device_alias),
NULL,
};
ATTRIBUTE_GROUPS(f2fs_sb_feat);
--
2.47.0.rc0.187.ge670bccf7e-goog
On 2024/10/9 2:05, Daeho Jeong wrote: > From: Daeho Jeong <daehojeong@google.com> > > F2FS should understand how the device aliasing file works and support > deleting the file after use. A device aliasing file can be created by > mkfs.f2fs tool and it can map the whole device with an extrent, not > using node blocks. The file space should be pinned and normally used for > read-only usages. > > Signed-off-by: Daeho Jeong <daehojeong@google.com> > Signed-off-by: Chao Yu <chao@kernel.org> > --- > v4: added file pinning check in sanity check > v3: merged Chao's extent cache sanity check. > prevented device aliasing support with noextent mount option > v2: changed the position of f2fs_destroy_extent_tree() only for device > aliasing files > --- > fs/f2fs/data.c | 5 +++++ > fs/f2fs/extent_cache.c | 45 +++++++++++++++++++++++++++++++++++++++++- > fs/f2fs/f2fs.h | 5 +++++ > fs/f2fs/file.c | 36 +++++++++++++++++++++++++++++---- > fs/f2fs/inode.c | 19 +++++++++++++++++- > fs/f2fs/super.c | 4 ++++ > fs/f2fs/sysfs.c | 2 ++ > 7 files changed, 110 insertions(+), 6 deletions(-) > > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c > index b94cf6eea2f9..385b46e62ede 100644 > --- a/fs/f2fs/data.c > +++ b/fs/f2fs/data.c > @@ -3441,6 +3441,11 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, > > if (!f2fs_lookup_read_extent_cache_block(inode, index, > &dn.data_blkaddr)) { > + if (IS_DEVICE_ALIASING(inode)) { > + err = -ENODATA; > + goto out; > + } > + > if (locked) { > err = f2fs_reserve_block(&dn, index); > goto out; > diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c > index 62ac440d9416..019c1f7b7fa5 100644 > --- a/fs/f2fs/extent_cache.c > +++ b/fs/f2fs/extent_cache.c > @@ -24,6 +24,7 @@ bool sanity_check_extent_cache(struct inode *inode, struct page *ipage) > struct f2fs_sb_info *sbi = F2FS_I_SB(inode); > struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext; > struct extent_info ei; > + int devi; > > get_read_extent_info(&ei, i_ext); > > @@ -38,7 +39,36 @@ bool sanity_check_extent_cache(struct inode *inode, struct page *ipage) > ei.blk, ei.fofs, ei.len); > return false; > } > - return true; > + > + if (!IS_DEVICE_ALIASING(inode)) > + return true; > + > + for (devi = 0; devi < sbi->s_ndevs; devi++) { > + if (FDEV(devi).start_blk != ei.blk || > + FDEV(devi).end_blk != ei.blk + ei.len - 1) > + continue; > + > + if (devi == 0) { > + f2fs_warn(sbi, > + "%s: inode (ino=%lx) is an alias of meta device", > + __func__, inode->i_ino); > + return false; > + } > + > + if (bdev_is_zoned(FDEV(devi).bdev)) { > + f2fs_warn(sbi, > + "%s: device alias inode (ino=%lx)'s extent info " > + "[%u, %u, %u] maps to zoned block device", > + __func__, inode->i_ino, ei.blk, ei.fofs, ei.len); > + return false; > + } > + return true; > + } > + > + f2fs_warn(sbi, "%s: device alias inode (ino=%lx)'s extent info " > + "[%u, %u, %u] is inconsistent w/ any devices", > + __func__, inode->i_ino, ei.blk, ei.fofs, ei.len); > + return false; > } > > static void __set_extent_info(struct extent_info *ei, > @@ -76,6 +106,9 @@ static bool __init_may_extent_tree(struct inode *inode, enum extent_type type) > > static bool __may_extent_tree(struct inode *inode, enum extent_type type) > { > + if (IS_DEVICE_ALIASING(inode) && type == EX_READ) > + return true; > + > /* > * for recovered files during mount do not create extents > * if shrinker is not registered. > @@ -401,6 +434,11 @@ void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage) > if (atomic_read(&et->node_cnt) || !ei.len) > goto skip; > > + if (IS_DEVICE_ALIASING(inode)) { > + et->largest = ei; > + goto skip; > + } > + > en = __attach_extent_node(sbi, et, &ei, NULL, > &et->root.rb_root.rb_node, true); > if (en) { > @@ -463,6 +501,11 @@ static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs, > goto out; > } > > + if (IS_DEVICE_ALIASING(inode)) { > + ret = false; > + goto out; > + } > + > en = __lookup_extent_node(&et->root, et->cached_en, pgofs); > if (!en) > goto out; > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h > index 33f5449dc22d..b6ba22a1da47 100644 > --- a/fs/f2fs/f2fs.h > +++ b/fs/f2fs/f2fs.h > @@ -213,6 +213,7 @@ struct f2fs_mount_info { > #define F2FS_FEATURE_CASEFOLD 0x00001000 > #define F2FS_FEATURE_COMPRESSION 0x00002000 > #define F2FS_FEATURE_RO 0x00004000 > +#define F2FS_FEATURE_DEVICE_ALIAS 0x00008000 > > #define __F2FS_HAS_FEATURE(raw_super, mask) \ > ((raw_super->feature & cpu_to_le32(mask)) != 0) > @@ -3046,6 +3047,7 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr) > #define F2FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ > #define F2FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ > #define F2FS_CASEFOLD_FL 0x40000000 /* Casefolded file */ > +#define F2FS_DEVICE_ALIAS_FL 0x80000000 /* File for aliasing a device */ Is there any way to know which inode is device-alias one? maybe we can export this flag to userspace via .fileattr_get? or via newly introduced ioctl interface? > > #define F2FS_QUOTA_DEFAULT_FL (F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL) > > @@ -3061,6 +3063,8 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr) > /* Flags that are appropriate for non-directories/regular files. */ > #define F2FS_OTHER_FLMASK (F2FS_NODUMP_FL | F2FS_NOATIME_FL) > > +#define IS_DEVICE_ALIASING(inode) (F2FS_I(inode)->i_flags & F2FS_DEVICE_ALIAS_FL) > + > static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags) > { > if (S_ISDIR(mode)) > @@ -4510,6 +4514,7 @@ F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM); > F2FS_FEATURE_FUNCS(casefold, CASEFOLD); > F2FS_FEATURE_FUNCS(compression, COMPRESSION); > F2FS_FEATURE_FUNCS(readonly, RO); > +F2FS_FEATURE_FUNCS(device_alias, DEVICE_ALIAS); > > #ifdef CONFIG_BLK_DEV_ZONED > static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi, > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c > index 99903eafa7fe..f2d2d84d025b 100644 > --- a/fs/f2fs/file.c > +++ b/fs/f2fs/file.c > @@ -725,6 +725,11 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) > > trace_f2fs_truncate_blocks_enter(inode, from); > > + if (IS_DEVICE_ALIASING(inode) && from) { > + err = -EINVAL; > + goto out_err; > + } > + > free_from = (pgoff_t)F2FS_BLK_ALIGN(from); > > if (free_from >= max_file_blocks(inode)) > @@ -739,6 +744,21 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) > goto out; > } > > + if (IS_DEVICE_ALIASING(inode)) { > + struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ]; > + struct extent_info ei = et->largest; > + unsigned int i; > + > + for (i = 0; i < ei.len; i++) > + f2fs_invalidate_blocks(sbi, ei.blk + i); > + > + dec_valid_block_count(sbi, inode, ei.len); > + f2fs_update_time(sbi, REQ_TIME); > + > + f2fs_put_page(ipage, 1); > + goto out; > + } > + > if (f2fs_has_inline_data(inode)) { > f2fs_truncate_inline_inode(inode, ipage, from); > f2fs_put_page(ipage, 1); > @@ -774,7 +794,7 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) > /* lastly zero out the first data page */ > if (!err) > err = truncate_partial_data_page(inode, from, truncate_page); > - > +out_err: > trace_f2fs_truncate_blocks_exit(inode, err); > return err; > } > @@ -992,7 +1012,8 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, > return -EPERM; > > if ((attr->ia_valid & ATTR_SIZE)) { > - if (!f2fs_is_compress_backend_ready(inode)) > + if (!f2fs_is_compress_backend_ready(inode) || > + IS_DEVICE_ALIASING(inode)) > return -EOPNOTSUPP; > if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) && > !IS_ALIGNED(attr->ia_size, > @@ -1860,7 +1881,7 @@ static long f2fs_fallocate(struct file *file, int mode, > return -EIO; > if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode))) > return -ENOSPC; > - if (!f2fs_is_compress_backend_ready(inode)) > + if (!f2fs_is_compress_backend_ready(inode) || IS_DEVICE_ALIASING(inode)) > return -EOPNOTSUPP; > > /* f2fs only support ->fallocate for regular file */ > @@ -3296,6 +3317,9 @@ int f2fs_pin_file_control(struct inode *inode, bool inc) > struct f2fs_inode_info *fi = F2FS_I(inode); > struct f2fs_sb_info *sbi = F2FS_I_SB(inode); > > + if (IS_DEVICE_ALIASING(inode)) > + return -EINVAL; > + > if (fi->i_gc_failures >= sbi->gc_pin_file_threshold) { > f2fs_warn(sbi, "%s: Enable GC = ino %lx after %x GC trials", > __func__, inode->i_ino, fi->i_gc_failures); > @@ -3326,6 +3350,9 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) > if (f2fs_readonly(sbi->sb)) > return -EROFS; > > + if (!pin && IS_DEVICE_ALIASING(inode)) > + return -EOPNOTSUPP; > + > ret = mnt_want_write_file(filp); > if (ret) > return ret; > @@ -4764,7 +4791,8 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter, > else > return 0; > > - map.m_may_create = true; > + if (!IS_DEVICE_ALIASING(inode)) > + map.m_may_create = true; > if (dio) { > map.m_seg_type = f2fs_rw_hint_to_seg_type(sbi, > inode->i_write_hint); > diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c > index 1ed86df343a5..e2d30fc79644 100644 > --- a/fs/f2fs/inode.c > +++ b/fs/f2fs/inode.c > @@ -372,6 +372,19 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) > return false; > } > > + if (fi->i_flags & F2FS_DEVICE_ALIAS_FL) { Trivial cleanup. IS_DEVICE_ALIASING(inode) Thanks, > + if (!f2fs_sb_has_device_alias(sbi)) { > + f2fs_warn(sbi, "%s: inode (ino=%lx) has device alias flag, but the feature is off", > + __func__, inode->i_ino); > + return false; > + } > + if (!f2fs_is_pinned_file(inode)) { > + f2fs_warn(sbi, "%s: inode (ino=%lx) has device alias flag, but is not pinned", > + __func__, inode->i_ino); > + return false; > + } > + } > + > return true; > } > > @@ -823,7 +836,8 @@ void f2fs_evict_inode(struct inode *inode) > f2fs_bug_on(sbi, get_dirty_pages(inode)); > f2fs_remove_dirty_inode(inode); > > - f2fs_destroy_extent_tree(inode); > + if (!IS_DEVICE_ALIASING(inode)) > + f2fs_destroy_extent_tree(inode); > > if (inode->i_nlink || is_bad_inode(inode)) > goto no_delete; > @@ -879,6 +893,9 @@ void f2fs_evict_inode(struct inode *inode) > goto retry; > } > > + if (IS_DEVICE_ALIASING(inode)) > + f2fs_destroy_extent_tree(inode); > + > if (err) { > f2fs_update_inode_page(inode); > if (dquot_initialize_needed(inode)) > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c > index fc2c586c7619..95097498b544 100644 > --- a/fs/f2fs/super.c > +++ b/fs/f2fs/super.c > @@ -834,6 +834,10 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) > set_opt(sbi, READ_EXTENT_CACHE); > break; > case Opt_noextent_cache: > + if (F2FS_HAS_FEATURE(sbi, F2FS_FEATURE_DEVICE_ALIAS)) { > + f2fs_err(sbi, "device aliasing requires extent cache"); > + return -EINVAL; > + } > clear_opt(sbi, READ_EXTENT_CACHE); > break; > case Opt_noinline_data: > diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c > index c56e8c873935..e51304bc65ea 100644 > --- a/fs/f2fs/sysfs.c > +++ b/fs/f2fs/sysfs.c > @@ -1313,6 +1313,7 @@ F2FS_SB_FEATURE_RO_ATTR(sb_checksum, SB_CHKSUM); > F2FS_SB_FEATURE_RO_ATTR(casefold, CASEFOLD); > F2FS_SB_FEATURE_RO_ATTR(compression, COMPRESSION); > F2FS_SB_FEATURE_RO_ATTR(readonly, RO); > +F2FS_SB_FEATURE_RO_ATTR(device_alias, DEVICE_ALIAS); > > static struct attribute *f2fs_sb_feat_attrs[] = { > ATTR_LIST(sb_encryption), > @@ -1329,6 +1330,7 @@ static struct attribute *f2fs_sb_feat_attrs[] = { > ATTR_LIST(sb_casefold), > ATTR_LIST(sb_compression), > ATTR_LIST(sb_readonly), > + ATTR_LIST(sb_device_alias), > NULL, > }; > ATTRIBUTE_GROUPS(f2fs_sb_feat);
On Tue, Oct 8, 2024 at 7:32 PM Chao Yu <chao@kernel.org> wrote: > > On 2024/10/9 2:05, Daeho Jeong wrote: > > From: Daeho Jeong <daehojeong@google.com> > > > > F2FS should understand how the device aliasing file works and support > > deleting the file after use. A device aliasing file can be created by > > mkfs.f2fs tool and it can map the whole device with an extrent, not > > using node blocks. The file space should be pinned and normally used for > > read-only usages. > > > > Signed-off-by: Daeho Jeong <daehojeong@google.com> > > Signed-off-by: Chao Yu <chao@kernel.org> > > --- > > v4: added file pinning check in sanity check > > v3: merged Chao's extent cache sanity check. > > prevented device aliasing support with noextent mount option > > v2: changed the position of f2fs_destroy_extent_tree() only for device > > aliasing files > > --- > > fs/f2fs/data.c | 5 +++++ > > fs/f2fs/extent_cache.c | 45 +++++++++++++++++++++++++++++++++++++++++- > > fs/f2fs/f2fs.h | 5 +++++ > > fs/f2fs/file.c | 36 +++++++++++++++++++++++++++++---- > > fs/f2fs/inode.c | 19 +++++++++++++++++- > > fs/f2fs/super.c | 4 ++++ > > fs/f2fs/sysfs.c | 2 ++ > > 7 files changed, 110 insertions(+), 6 deletions(-) > > > > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c > > index b94cf6eea2f9..385b46e62ede 100644 > > --- a/fs/f2fs/data.c > > +++ b/fs/f2fs/data.c > > @@ -3441,6 +3441,11 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, > > > > if (!f2fs_lookup_read_extent_cache_block(inode, index, > > &dn.data_blkaddr)) { > > + if (IS_DEVICE_ALIASING(inode)) { > > + err = -ENODATA; > > + goto out; > > + } > > + > > if (locked) { > > err = f2fs_reserve_block(&dn, index); > > goto out; > > diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c > > index 62ac440d9416..019c1f7b7fa5 100644 > > --- a/fs/f2fs/extent_cache.c > > +++ b/fs/f2fs/extent_cache.c > > @@ -24,6 +24,7 @@ bool sanity_check_extent_cache(struct inode *inode, struct page *ipage) > > struct f2fs_sb_info *sbi = F2FS_I_SB(inode); > > struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext; > > struct extent_info ei; > > + int devi; > > > > get_read_extent_info(&ei, i_ext); > > > > @@ -38,7 +39,36 @@ bool sanity_check_extent_cache(struct inode *inode, struct page *ipage) > > ei.blk, ei.fofs, ei.len); > > return false; > > } > > - return true; > > + > > + if (!IS_DEVICE_ALIASING(inode)) > > + return true; > > + > > + for (devi = 0; devi < sbi->s_ndevs; devi++) { > > + if (FDEV(devi).start_blk != ei.blk || > > + FDEV(devi).end_blk != ei.blk + ei.len - 1) > > + continue; > > + > > + if (devi == 0) { > > + f2fs_warn(sbi, > > + "%s: inode (ino=%lx) is an alias of meta device", > > + __func__, inode->i_ino); > > + return false; > > + } > > + > > + if (bdev_is_zoned(FDEV(devi).bdev)) { > > + f2fs_warn(sbi, > > + "%s: device alias inode (ino=%lx)'s extent info " > > + "[%u, %u, %u] maps to zoned block device", > > + __func__, inode->i_ino, ei.blk, ei.fofs, ei.len); > > + return false; > > + } > > + return true; > > + } > > + > > + f2fs_warn(sbi, "%s: device alias inode (ino=%lx)'s extent info " > > + "[%u, %u, %u] is inconsistent w/ any devices", > > + __func__, inode->i_ino, ei.blk, ei.fofs, ei.len); > > + return false; > > } > > > > static void __set_extent_info(struct extent_info *ei, > > @@ -76,6 +106,9 @@ static bool __init_may_extent_tree(struct inode *inode, enum extent_type type) > > > > static bool __may_extent_tree(struct inode *inode, enum extent_type type) > > { > > + if (IS_DEVICE_ALIASING(inode) && type == EX_READ) > > + return true; > > + > > /* > > * for recovered files during mount do not create extents > > * if shrinker is not registered. > > @@ -401,6 +434,11 @@ void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage) > > if (atomic_read(&et->node_cnt) || !ei.len) > > goto skip; > > > > + if (IS_DEVICE_ALIASING(inode)) { > > + et->largest = ei; > > + goto skip; > > + } > > + > > en = __attach_extent_node(sbi, et, &ei, NULL, > > &et->root.rb_root.rb_node, true); > > if (en) { > > @@ -463,6 +501,11 @@ static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs, > > goto out; > > } > > > > + if (IS_DEVICE_ALIASING(inode)) { > > + ret = false; > > + goto out; > > + } > > + > > en = __lookup_extent_node(&et->root, et->cached_en, pgofs); > > if (!en) > > goto out; > > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h > > index 33f5449dc22d..b6ba22a1da47 100644 > > --- a/fs/f2fs/f2fs.h > > +++ b/fs/f2fs/f2fs.h > > @@ -213,6 +213,7 @@ struct f2fs_mount_info { > > #define F2FS_FEATURE_CASEFOLD 0x00001000 > > #define F2FS_FEATURE_COMPRESSION 0x00002000 > > #define F2FS_FEATURE_RO 0x00004000 > > +#define F2FS_FEATURE_DEVICE_ALIAS 0x00008000 > > > > #define __F2FS_HAS_FEATURE(raw_super, mask) \ > > ((raw_super->feature & cpu_to_le32(mask)) != 0) > > @@ -3046,6 +3047,7 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr) > > #define F2FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ > > #define F2FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ > > #define F2FS_CASEFOLD_FL 0x40000000 /* Casefolded file */ > > +#define F2FS_DEVICE_ALIAS_FL 0x80000000 /* File for aliasing a device */ > > Is there any way to know which inode is device-alias one? maybe > we can export this flag to userspace via .fileattr_get? or via > newly introduced ioctl interface? Right, we need to add a new ioctl interface to get the flag. Let me take care of this. Thanks, > > > > > #define F2FS_QUOTA_DEFAULT_FL (F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL) > > > > @@ -3061,6 +3063,8 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr) > > /* Flags that are appropriate for non-directories/regular files. */ > > #define F2FS_OTHER_FLMASK (F2FS_NODUMP_FL | F2FS_NOATIME_FL) > > > > +#define IS_DEVICE_ALIASING(inode) (F2FS_I(inode)->i_flags & F2FS_DEVICE_ALIAS_FL) > > + > > static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags) > > { > > if (S_ISDIR(mode)) > > @@ -4510,6 +4514,7 @@ F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM); > > F2FS_FEATURE_FUNCS(casefold, CASEFOLD); > > F2FS_FEATURE_FUNCS(compression, COMPRESSION); > > F2FS_FEATURE_FUNCS(readonly, RO); > > +F2FS_FEATURE_FUNCS(device_alias, DEVICE_ALIAS); > > > > #ifdef CONFIG_BLK_DEV_ZONED > > static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi, > > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c > > index 99903eafa7fe..f2d2d84d025b 100644 > > --- a/fs/f2fs/file.c > > +++ b/fs/f2fs/file.c > > @@ -725,6 +725,11 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) > > > > trace_f2fs_truncate_blocks_enter(inode, from); > > > > + if (IS_DEVICE_ALIASING(inode) && from) { > > + err = -EINVAL; > > + goto out_err; > > + } > > + > > free_from = (pgoff_t)F2FS_BLK_ALIGN(from); > > > > if (free_from >= max_file_blocks(inode)) > > @@ -739,6 +744,21 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) > > goto out; > > } > > > > + if (IS_DEVICE_ALIASING(inode)) { > > + struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ]; > > + struct extent_info ei = et->largest; > > + unsigned int i; > > + > > + for (i = 0; i < ei.len; i++) > > + f2fs_invalidate_blocks(sbi, ei.blk + i); > > + > > + dec_valid_block_count(sbi, inode, ei.len); > > + f2fs_update_time(sbi, REQ_TIME); > > + > > + f2fs_put_page(ipage, 1); > > + goto out; > > + } > > + > > if (f2fs_has_inline_data(inode)) { > > f2fs_truncate_inline_inode(inode, ipage, from); > > f2fs_put_page(ipage, 1); > > @@ -774,7 +794,7 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) > > /* lastly zero out the first data page */ > > if (!err) > > err = truncate_partial_data_page(inode, from, truncate_page); > > - > > +out_err: > > trace_f2fs_truncate_blocks_exit(inode, err); > > return err; > > } > > @@ -992,7 +1012,8 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, > > return -EPERM; > > > > if ((attr->ia_valid & ATTR_SIZE)) { > > - if (!f2fs_is_compress_backend_ready(inode)) > > + if (!f2fs_is_compress_backend_ready(inode) || > > + IS_DEVICE_ALIASING(inode)) > > return -EOPNOTSUPP; > > if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) && > > !IS_ALIGNED(attr->ia_size, > > @@ -1860,7 +1881,7 @@ static long f2fs_fallocate(struct file *file, int mode, > > return -EIO; > > if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode))) > > return -ENOSPC; > > - if (!f2fs_is_compress_backend_ready(inode)) > > + if (!f2fs_is_compress_backend_ready(inode) || IS_DEVICE_ALIASING(inode)) > > return -EOPNOTSUPP; > > > > /* f2fs only support ->fallocate for regular file */ > > @@ -3296,6 +3317,9 @@ int f2fs_pin_file_control(struct inode *inode, bool inc) > > struct f2fs_inode_info *fi = F2FS_I(inode); > > struct f2fs_sb_info *sbi = F2FS_I_SB(inode); > > > > + if (IS_DEVICE_ALIASING(inode)) > > + return -EINVAL; > > + > > if (fi->i_gc_failures >= sbi->gc_pin_file_threshold) { > > f2fs_warn(sbi, "%s: Enable GC = ino %lx after %x GC trials", > > __func__, inode->i_ino, fi->i_gc_failures); > > @@ -3326,6 +3350,9 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) > > if (f2fs_readonly(sbi->sb)) > > return -EROFS; > > > > + if (!pin && IS_DEVICE_ALIASING(inode)) > > + return -EOPNOTSUPP; > > + > > ret = mnt_want_write_file(filp); > > if (ret) > > return ret; > > @@ -4764,7 +4791,8 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter, > > else > > return 0; > > > > - map.m_may_create = true; > > + if (!IS_DEVICE_ALIASING(inode)) > > + map.m_may_create = true; > > if (dio) { > > map.m_seg_type = f2fs_rw_hint_to_seg_type(sbi, > > inode->i_write_hint); > > diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c > > index 1ed86df343a5..e2d30fc79644 100644 > > --- a/fs/f2fs/inode.c > > +++ b/fs/f2fs/inode.c > > @@ -372,6 +372,19 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) > > return false; > > } > > > > + if (fi->i_flags & F2FS_DEVICE_ALIAS_FL) { > > Trivial cleanup. > > IS_DEVICE_ALIASING(inode) > > Thanks, > > > + if (!f2fs_sb_has_device_alias(sbi)) { > > + f2fs_warn(sbi, "%s: inode (ino=%lx) has device alias flag, but the feature is off", > > + __func__, inode->i_ino); > > + return false; > > + } > > + if (!f2fs_is_pinned_file(inode)) { > > + f2fs_warn(sbi, "%s: inode (ino=%lx) has device alias flag, but is not pinned", > > + __func__, inode->i_ino); > > + return false; > > + } > > + } > > + > > return true; > > } > > > > @@ -823,7 +836,8 @@ void f2fs_evict_inode(struct inode *inode) > > f2fs_bug_on(sbi, get_dirty_pages(inode)); > > f2fs_remove_dirty_inode(inode); > > > > - f2fs_destroy_extent_tree(inode); > > + if (!IS_DEVICE_ALIASING(inode)) > > + f2fs_destroy_extent_tree(inode); > > > > if (inode->i_nlink || is_bad_inode(inode)) > > goto no_delete; > > @@ -879,6 +893,9 @@ void f2fs_evict_inode(struct inode *inode) > > goto retry; > > } > > > > + if (IS_DEVICE_ALIASING(inode)) > > + f2fs_destroy_extent_tree(inode); > > + > > if (err) { > > f2fs_update_inode_page(inode); > > if (dquot_initialize_needed(inode)) > > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c > > index fc2c586c7619..95097498b544 100644 > > --- a/fs/f2fs/super.c > > +++ b/fs/f2fs/super.c > > @@ -834,6 +834,10 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) > > set_opt(sbi, READ_EXTENT_CACHE); > > break; > > case Opt_noextent_cache: > > + if (F2FS_HAS_FEATURE(sbi, F2FS_FEATURE_DEVICE_ALIAS)) { > > + f2fs_err(sbi, "device aliasing requires extent cache"); > > + return -EINVAL; > > + } > > clear_opt(sbi, READ_EXTENT_CACHE); > > break; > > case Opt_noinline_data: > > diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c > > index c56e8c873935..e51304bc65ea 100644 > > --- a/fs/f2fs/sysfs.c > > +++ b/fs/f2fs/sysfs.c > > @@ -1313,6 +1313,7 @@ F2FS_SB_FEATURE_RO_ATTR(sb_checksum, SB_CHKSUM); > > F2FS_SB_FEATURE_RO_ATTR(casefold, CASEFOLD); > > F2FS_SB_FEATURE_RO_ATTR(compression, COMPRESSION); > > F2FS_SB_FEATURE_RO_ATTR(readonly, RO); > > +F2FS_SB_FEATURE_RO_ATTR(device_alias, DEVICE_ALIAS); > > > > static struct attribute *f2fs_sb_feat_attrs[] = { > > ATTR_LIST(sb_encryption), > > @@ -1329,6 +1330,7 @@ static struct attribute *f2fs_sb_feat_attrs[] = { > > ATTR_LIST(sb_casefold), > > ATTR_LIST(sb_compression), > > ATTR_LIST(sb_readonly), > > + ATTR_LIST(sb_device_alias), > > NULL, > > }; > > ATTRIBUTE_GROUPS(f2fs_sb_feat); >
© 2016 - 2024 Red Hat, Inc.