For direct block device opened with O_DIRECT, use copy_file_range to
issue device copy offload, and fallback to generic_copy_file_range incase
device copy offload capability is absent.
Modify checks to allow bdevs to use copy_file_range.
Suggested-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
---
block/blk-lib.c | 26 ++++++++++++++++++++++++++
block/fops.c | 20 ++++++++++++++++++++
fs/read_write.c | 7 +++++--
include/linux/blkdev.h | 4 ++++
4 files changed, 55 insertions(+), 2 deletions(-)
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 09e0d5d51d03..7d8e09a99254 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -473,6 +473,32 @@ ssize_t blkdev_copy_offload(
}
EXPORT_SYMBOL_GPL(blkdev_copy_offload);
+/* Copy source offset from source block device to destination block
+ * device. Returns the length of bytes copied.
+ */
+ssize_t blkdev_copy_offload_failfast(
+ struct block_device *bdev_in, loff_t pos_in,
+ struct block_device *bdev_out, loff_t pos_out,
+ size_t len, gfp_t gfp_mask)
+{
+ struct request_queue *in_q = bdev_get_queue(bdev_in);
+ struct request_queue *out_q = bdev_get_queue(bdev_out);
+ ssize_t ret = 0;
+
+ if (blkdev_copy_sanity_check(bdev_in, pos_in, bdev_out, pos_out, len))
+ return 0;
+
+ if (blk_queue_copy(in_q) && blk_queue_copy(out_q)) {
+ ret = __blkdev_copy_offload(bdev_in, pos_in, bdev_out, pos_out,
+ len, NULL, NULL, gfp_mask);
+ if (ret < 0)
+ return 0;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(blkdev_copy_offload_failfast);
+
static int __blkdev_issue_write_zeroes(struct block_device *bdev,
sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
struct bio **biop, unsigned flags)
diff --git a/block/fops.c b/block/fops.c
index a286bf3325c5..a1576304f269 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -621,6 +621,25 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
return ret;
}
+static ssize_t blkdev_copy_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ size_t len, unsigned int flags)
+{
+ struct block_device *in_bdev = I_BDEV(bdev_file_inode(file_in));
+ struct block_device *out_bdev = I_BDEV(bdev_file_inode(file_out));
+ ssize_t comp_len = 0;
+
+ if ((file_in->f_iocb_flags & IOCB_DIRECT) &&
+ (file_out->f_iocb_flags & IOCB_DIRECT))
+ comp_len = blkdev_copy_offload_failfast(in_bdev, pos_in,
+ out_bdev, pos_out, len, GFP_KERNEL);
+ if (comp_len != len)
+ comp_len = generic_copy_file_range(file_in, pos_in + comp_len,
+ file_out, pos_out + comp_len, len - comp_len, flags);
+
+ return comp_len;
+}
+
#define BLKDEV_FALLOC_FL_SUPPORTED \
(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \
FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE)
@@ -714,6 +733,7 @@ const struct file_operations def_blk_fops = {
.splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = blkdev_fallocate,
+ .copy_file_range = blkdev_copy_file_range,
};
static __init int blkdev_init(void)
diff --git a/fs/read_write.c b/fs/read_write.c
index b07de77ef126..d27148a2543f 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1447,7 +1447,8 @@ static int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
return -EOVERFLOW;
/* Shorten the copy to EOF */
- size_in = i_size_read(inode_in);
+ size_in = i_size_read(file_in->f_mapping->host);
+
if (pos_in >= size_in)
count = 0;
else
@@ -1708,7 +1709,9 @@ int generic_file_rw_checks(struct file *file_in, struct file *file_out)
/* Don't copy dirs, pipes, sockets... */
if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
return -EISDIR;
- if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
+
+ if ((!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) &&
+ (!S_ISBLK(inode_in->i_mode) || !S_ISBLK(inode_out->i_mode)))
return -EINVAL;
if (!(file_in->f_mode & FMODE_READ) ||
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c176bf6173c5..850168cad080 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1047,6 +1047,10 @@ ssize_t blkdev_copy_offload(
struct block_device *bdev_in, loff_t pos_in,
struct block_device *bdev_out, loff_t pos_out,
size_t len, cio_iodone_t end_io, void *private, gfp_t gfp_mask);
+ssize_t blkdev_copy_offload_failfast(
+ struct block_device *bdev_in, loff_t pos_in,
+ struct block_device *bdev_out, loff_t pos_out,
+ size_t len, gfp_t gfp_mask);
struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
gfp_t gfp_mask);
void bio_map_kern_endio(struct bio *bio);
--
2.35.1.500.gb896f729e2
> +/* Copy source offset from source block device to destination block > + * device. Returns the length of bytes copied. > + */ > +ssize_t blkdev_copy_offload_failfast( > + struct block_device *bdev_in, loff_t pos_in, > + struct block_device *bdev_out, loff_t pos_out, > + size_t len, gfp_t gfp_mask) This is an odd and very misnamed interface. Either we have a klkdev_copy() interface that automatically falls back to a fallback (maybe with an opt-out), or we have separate blkdev_copy_offload/blkdev_copy_emulated interface and let the caller decide. But none of that really is "failfast". Also this needs to go into the helpers patch and not a patch that is supposed to just wire copying up for block device node. > index b07de77ef126..d27148a2543f 100644 > --- a/fs/read_write.c > +++ b/fs/read_write.c > @@ -1447,7 +1447,8 @@ static int generic_copy_file_checks(struct file *file_in, loff_t pos_in, > return -EOVERFLOW; > > /* Shorten the copy to EOF */ > - size_in = i_size_read(inode_in); > + size_in = i_size_read(file_in->f_mapping->host); generic_copy_file_checks needs to be fixed to use ->mapping->host both or inode_in and inode_out at the top of the file instead of this band aid. And that needs to be a separate patch with a Fixes tag. > @@ -1708,7 +1709,9 @@ int generic_file_rw_checks(struct file *file_in, struct file *file_out) > /* Don't copy dirs, pipes, sockets... */ > if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) > return -EISDIR; > - if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) > + > + if ((!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) && > + (!S_ISBLK(inode_in->i_mode) || !S_ISBLK(inode_out->i_mode))) This is using weird indentation, and might also not be doing exactly what we want. I think the better thing to do here is to: 1) check for the accetable types only on the in inode 2) have a check that the mode matches for the in and out inodes And please do this as a separate prep patch instead of hiding it here.
On 23/07/20 09:57AM, Christoph Hellwig wrote: >> +/* Copy source offset from source block device to destination block >> + * device. Returns the length of bytes copied. >> + */ >> +ssize_t blkdev_copy_offload_failfast( >> + struct block_device *bdev_in, loff_t pos_in, >> + struct block_device *bdev_out, loff_t pos_out, >> + size_t len, gfp_t gfp_mask) > >This is an odd and very misnamed interface. > >Either we have a klkdev_copy() interface that automatically falls back >to a fallback (maybe with an opt-out), or we have separate >blkdev_copy_offload/blkdev_copy_emulated interface and let the caller >decide. But none of that really is "failfast". > >Also this needs to go into the helpers patch and not a patch that is >supposed to just wire copying up for block device node. > Acked. >> index b07de77ef126..d27148a2543f 100644 >> --- a/fs/read_write.c >> +++ b/fs/read_write.c >> @@ -1447,7 +1447,8 @@ static int generic_copy_file_checks(struct file *file_in, loff_t pos_in, >> return -EOVERFLOW; >> >> /* Shorten the copy to EOF */ >> - size_in = i_size_read(inode_in); >> + size_in = i_size_read(file_in->f_mapping->host); > >generic_copy_file_checks needs to be fixed to use ->mapping->host both >or inode_in and inode_out at the top of the file instead of this >band aid. And that needs to be a separate patch with a Fixes tag. > Addressed below. >> @@ -1708,7 +1709,9 @@ int generic_file_rw_checks(struct file *file_in, struct file *file_out) >> /* Don't copy dirs, pipes, sockets... */ >> if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) >> return -EISDIR; >> - if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) >> + >> + if ((!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) && >> + (!S_ISBLK(inode_in->i_mode) || !S_ISBLK(inode_out->i_mode))) > >This is using weird indentation, and might also not be doing >exactly what we want. I think the better thing to do here is to: > > 1) check for the accetable types only on the in inode > 2) have a check that the mode matches for the in and out inodes > >And please do this as a separate prep patch instead of hiding it here. > Agreed. We will send a separate patch, that enables copy_file_range on block devices. Thank you, Nitesh Shetty
On 6/28/23 03:36, Nitesh Shetty wrote:
> For direct block device opened with O_DIRECT, use copy_file_range to
> issue device copy offload, and fallback to generic_copy_file_range incase
> device copy offload capability is absent.
...if the device does not support copy offload or the device files are not open
with O_DIRECT.
No ?
> Modify checks to allow bdevs to use copy_file_range.
>
> Suggested-by: Ming Lei <ming.lei@redhat.com>
> Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
> Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
> ---
> block/blk-lib.c | 26 ++++++++++++++++++++++++++
> block/fops.c | 20 ++++++++++++++++++++
> fs/read_write.c | 7 +++++--
> include/linux/blkdev.h | 4 ++++
> 4 files changed, 55 insertions(+), 2 deletions(-)
>
> diff --git a/block/blk-lib.c b/block/blk-lib.c
> index 09e0d5d51d03..7d8e09a99254 100644
> --- a/block/blk-lib.c
> +++ b/block/blk-lib.c
> @@ -473,6 +473,32 @@ ssize_t blkdev_copy_offload(
> }
> EXPORT_SYMBOL_GPL(blkdev_copy_offload);
>
> +/* Copy source offset from source block device to destination block
> + * device. Returns the length of bytes copied.
> + */
Multi-line comment style: start with a "/*" line please.
> +ssize_t blkdev_copy_offload_failfast(
What is the "failfast" in the name for ?
> + struct block_device *bdev_in, loff_t pos_in,
> + struct block_device *bdev_out, loff_t pos_out,
> + size_t len, gfp_t gfp_mask)
> +{
> + struct request_queue *in_q = bdev_get_queue(bdev_in);
> + struct request_queue *out_q = bdev_get_queue(bdev_out);
> + ssize_t ret = 0;
You do not need this initialization.
> +
> + if (blkdev_copy_sanity_check(bdev_in, pos_in, bdev_out, pos_out, len))
> + return 0;
> +
> + if (blk_queue_copy(in_q) && blk_queue_copy(out_q)) {
Given that I think we do not allow copies between different devices, in_q and
out_q should always be the same, no ?
> + ret = __blkdev_copy_offload(bdev_in, pos_in, bdev_out, pos_out,
> + len, NULL, NULL, gfp_mask);
Same here. Why pass 2 bdevs if we only allow copies within the same device ?
> + if (ret < 0)
> + return 0;
> + }
> +
> + return ret;
return 0;
> +}
> +EXPORT_SYMBOL_GPL(blkdev_copy_offload_failfast);
> +
> static int __blkdev_issue_write_zeroes(struct block_device *bdev,
> sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
> struct bio **biop, unsigned flags)
> diff --git a/block/fops.c b/block/fops.c
> index a286bf3325c5..a1576304f269 100644
> --- a/block/fops.c
> +++ b/block/fops.c
> @@ -621,6 +621,25 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
> return ret;
> }
>
> +static ssize_t blkdev_copy_file_range(struct file *file_in, loff_t pos_in,
> + struct file *file_out, loff_t pos_out,
> + size_t len, unsigned int flags)
> +{
> + struct block_device *in_bdev = I_BDEV(bdev_file_inode(file_in));
> + struct block_device *out_bdev = I_BDEV(bdev_file_inode(file_out));
> + ssize_t comp_len = 0;
> +
> + if ((file_in->f_iocb_flags & IOCB_DIRECT) &&
> + (file_out->f_iocb_flags & IOCB_DIRECT))
> + comp_len = blkdev_copy_offload_failfast(in_bdev, pos_in,
> + out_bdev, pos_out, len, GFP_KERNEL);
> + if (comp_len != len)
> + comp_len = generic_copy_file_range(file_in, pos_in + comp_len,
> + file_out, pos_out + comp_len, len - comp_len, flags);
> +
> + return comp_len;
> +}
> +
> #define BLKDEV_FALLOC_FL_SUPPORTED \
> (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \
> FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE)
> @@ -714,6 +733,7 @@ const struct file_operations def_blk_fops = {
> .splice_read = filemap_splice_read,
> .splice_write = iter_file_splice_write,
> .fallocate = blkdev_fallocate,
> + .copy_file_range = blkdev_copy_file_range,
> };
>
> static __init int blkdev_init(void)
> diff --git a/fs/read_write.c b/fs/read_write.c
> index b07de77ef126..d27148a2543f 100644
> --- a/fs/read_write.c
> +++ b/fs/read_write.c
> @@ -1447,7 +1447,8 @@ static int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
> return -EOVERFLOW;
>
> /* Shorten the copy to EOF */
> - size_in = i_size_read(inode_in);
> + size_in = i_size_read(file_in->f_mapping->host);
> +
> if (pos_in >= size_in)
> count = 0;
> else
> @@ -1708,7 +1709,9 @@ int generic_file_rw_checks(struct file *file_in, struct file *file_out)
> /* Don't copy dirs, pipes, sockets... */
> if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
> return -EISDIR;
> - if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
> +
> + if ((!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) &&
> + (!S_ISBLK(inode_in->i_mode) || !S_ISBLK(inode_out->i_mode)))
> return -EINVAL;
>
> if (!(file_in->f_mode & FMODE_READ) ||
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index c176bf6173c5..850168cad080 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -1047,6 +1047,10 @@ ssize_t blkdev_copy_offload(
> struct block_device *bdev_in, loff_t pos_in,
> struct block_device *bdev_out, loff_t pos_out,
> size_t len, cio_iodone_t end_io, void *private, gfp_t gfp_mask);
> +ssize_t blkdev_copy_offload_failfast(
> + struct block_device *bdev_in, loff_t pos_in,
> + struct block_device *bdev_out, loff_t pos_out,
> + size_t len, gfp_t gfp_mask);
> struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
> gfp_t gfp_mask);
> void bio_map_kern_endio(struct bio *bio);
--
Damien Le Moal
Western Digital Research
On 23/06/28 03:51PM, Damien Le Moal wrote:
>On 6/28/23 03:36, Nitesh Shetty wrote:
>> For direct block device opened with O_DIRECT, use copy_file_range to
>> issue device copy offload, and fallback to generic_copy_file_range incase
>> device copy offload capability is absent.
>
>...if the device does not support copy offload or the device files are not open
>with O_DIRECT.
>
>No ?
>
Yes your right. We will fallback to generic_copy_file_range in either of
these cases.
>> Modify checks to allow bdevs to use copy_file_range.
>>
>> Suggested-by: Ming Lei <ming.lei@redhat.com>
>> Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
>> Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
>> ---
>> block/blk-lib.c | 26 ++++++++++++++++++++++++++
>> block/fops.c | 20 ++++++++++++++++++++
>> fs/read_write.c | 7 +++++--
>> include/linux/blkdev.h | 4 ++++
>> 4 files changed, 55 insertions(+), 2 deletions(-)
>>
>> diff --git a/block/blk-lib.c b/block/blk-lib.c
>> index 09e0d5d51d03..7d8e09a99254 100644
>> --- a/block/blk-lib.c
>> +++ b/block/blk-lib.c
>> @@ -473,6 +473,32 @@ ssize_t blkdev_copy_offload(
>> }
>> EXPORT_SYMBOL_GPL(blkdev_copy_offload);
>>
>> +/* Copy source offset from source block device to destination block
>> + * device. Returns the length of bytes copied.
>> + */
>
>Multi-line comment style: start with a "/*" line please.
>
acked
>> +ssize_t blkdev_copy_offload_failfast(
>
>What is the "failfast" in the name for ?
We dont want failed copy offload IOs to fallback to block layer copy emulation.
We wanted a API to return error, if offload fails.
>
>> + struct block_device *bdev_in, loff_t pos_in,
>> + struct block_device *bdev_out, loff_t pos_out,
>> + size_t len, gfp_t gfp_mask)
>> +{
>> + struct request_queue *in_q = bdev_get_queue(bdev_in);
>> + struct request_queue *out_q = bdev_get_queue(bdev_out);
>> + ssize_t ret = 0;
>
>You do not need this initialization.
>
we need this initialization, because __blkdev_copy_offload return number of
bytes copied or error value.
So we can not return 0, incase of success/partial completion.
blkdev_copy_offload_failfast is expected to return number of bytes copied.
>> +
>> + if (blkdev_copy_sanity_check(bdev_in, pos_in, bdev_out, pos_out, len))
>> + return 0;
>> +
>> + if (blk_queue_copy(in_q) && blk_queue_copy(out_q)) {
>
>Given that I think we do not allow copies between different devices, in_q and
>out_q should always be the same, no ?
acked, will update this.
>
>> + ret = __blkdev_copy_offload(bdev_in, pos_in, bdev_out, pos_out,
>> + len, NULL, NULL, gfp_mask);
>
>Same here. Why pass 2 bdevs if we only allow copies within the same device ?
>
acked, will update function arguments to take single bdev.
>> + if (ret < 0)
>> + return 0;
>> + }
>> +
>> + return ret;
>
>return 0;
>
Nack, explained above.
Thank you,
Nitesh Shetty
© 2016 - 2026 Red Hat, Inc.