From: Nitesh Shetty <nj.shetty@samsung.com>
For direct block device opened with O_DIRECT, use copy_file_range to
issue device copy offload, and fallback to generic_copy_file_range incase
device copy offload capability is absent.
Modify checks to allow bdevs to use copy_file_range.
Suggested-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
---
block/blk-lib.c | 22 ++++++++++++++++++++++
block/fops.c | 20 ++++++++++++++++++++
fs/read_write.c | 11 +++++++++--
include/linux/blkdev.h | 3 +++
4 files changed, 54 insertions(+), 2 deletions(-)
diff --git a/block/blk-lib.c b/block/blk-lib.c
index a21819e59b29..c288573c7e77 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -475,6 +475,28 @@ static inline bool blk_check_copy_offload(struct request_queue *q_in,
return blk_queue_copy(q_in) && blk_queue_copy(q_out);
}
+int blkdev_copy_offload(struct block_device *bdev_in, loff_t pos_in,
+ struct block_device *bdev_out, loff_t pos_out, size_t len,
+ cio_iodone_t end_io, void *private, gfp_t gfp_mask)
+{
+ struct request_queue *in_q = bdev_get_queue(bdev_in);
+ struct request_queue *out_q = bdev_get_queue(bdev_out);
+ int ret = -EINVAL;
+ bool offload = false;
+
+ ret = blk_copy_sanity_check(bdev_in, pos_in, bdev_out, pos_out, len);
+ if (ret)
+ return ret;
+
+ offload = blk_check_copy_offload(in_q, out_q);
+ if (offload)
+ ret = __blk_copy_offload(bdev_in, pos_in, bdev_out, pos_out,
+ len, end_io, private, gfp_mask);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(blkdev_copy_offload);
+
/*
* @bdev_in: source block device
* @pos_in: source offset
diff --git a/block/fops.c b/block/fops.c
index d2e6be4e3d1c..3b7c05831d5c 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -611,6 +611,25 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
return ret;
}
+static ssize_t blkdev_copy_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ size_t len, unsigned int flags)
+{
+ struct block_device *in_bdev = I_BDEV(bdev_file_inode(file_in));
+ struct block_device *out_bdev = I_BDEV(bdev_file_inode(file_out));
+ int comp_len = 0;
+
+ if ((file_in->f_iocb_flags & IOCB_DIRECT) &&
+ (file_out->f_iocb_flags & IOCB_DIRECT))
+ comp_len = blkdev_copy_offload(in_bdev, pos_in, out_bdev,
+ pos_out, len, NULL, NULL, GFP_KERNEL);
+ if (comp_len != len)
+ comp_len = generic_copy_file_range(file_in, pos_in + comp_len,
+ file_out, pos_out + comp_len, len - comp_len, flags);
+
+ return comp_len;
+}
+
#define BLKDEV_FALLOC_FL_SUPPORTED \
(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \
FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE)
@@ -694,6 +713,7 @@ const struct file_operations def_blk_fops = {
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = blkdev_fallocate,
+ .copy_file_range = blkdev_copy_file_range,
};
static __init int blkdev_init(void)
diff --git a/fs/read_write.c b/fs/read_write.c
index 7a2ff6157eda..62e925e9b2f0 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -20,6 +20,7 @@
#include <linux/compat.h>
#include <linux/mount.h>
#include <linux/fs.h>
+#include <linux/blkdev.h>
#include "internal.h"
#include <linux/uaccess.h>
@@ -1448,7 +1449,11 @@ static int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
return -EOVERFLOW;
/* Shorten the copy to EOF */
- size_in = i_size_read(inode_in);
+ if (S_ISBLK(inode_in->i_mode))
+ size_in = bdev_nr_bytes(I_BDEV(file_in->f_mapping->host));
+ else
+ size_in = i_size_read(inode_in);
+
if (pos_in >= size_in)
count = 0;
else
@@ -1709,7 +1714,9 @@ int generic_file_rw_checks(struct file *file_in, struct file *file_out)
/* Don't copy dirs, pipes, sockets... */
if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
return -EISDIR;
- if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
+
+ if ((!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) &&
+ (!S_ISBLK(inode_in->i_mode) || !S_ISBLK(inode_out->i_mode)))
return -EINVAL;
if (!(file_in->f_mode & FMODE_READ) ||
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index a54153610800..468d5f3378e2 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1057,6 +1057,9 @@ int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector,
int blkdev_issue_copy(struct block_device *bdev_in, loff_t pos_in,
struct block_device *bdev_out, loff_t pos_out, size_t len,
cio_iodone_t end_io, void *private, gfp_t gfp_mask);
+int blkdev_copy_offload(struct block_device *bdev_in, loff_t pos_in,
+ struct block_device *bdev_out, loff_t pos_out, size_t len,
+ cio_iodone_t end_io, void *private, gfp_t gfp_mask);
struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
gfp_t gfp_mask);
void bio_map_kern_endio(struct bio *bio);
--
2.35.1.500.gb896f729e2
Hi Anuj,
Thank you for the patch! Yet something to improve:
[auto build test ERROR on axboe-block/for-next]
[also build test ERROR on device-mapper-dm/for-next linus/master v6.3-rc4 next-20230329]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Anuj-Gupta/block-Add-copy-offload-support-infrastructure/20230329-162018
base: https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git for-next
patch link: https://lore.kernel.org/r/20230327084103.21601-5-anuj20.g%40samsung.com
patch subject: [PATCH v8 4/9] fs, block: copy_file_range for def_blk_ops for direct block device.
config: x86_64-randconfig-a013 (https://download.01.org/0day-ci/archive/20230329/202303292349.ED70Fxdw-lkp@intel.com/config)
compiler: gcc-11 (Debian 11.3.0-8) 11.3.0
reproduce (this is a W=1 build):
# https://github.com/intel-lab-lkp/linux/commit/61819d260936954ddd6688548f074e7063dcf39e
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Anuj-Gupta/block-Add-copy-offload-support-infrastructure/20230329-162018
git checkout 61819d260936954ddd6688548f074e7063dcf39e
# save the config file
mkdir build_dir && cp config build_dir/.config
make W=1 O=build_dir ARCH=x86_64 olddefconfig
make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash
If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202303292349.ED70Fxdw-lkp@intel.com/
All errors (new ones prefixed by >>):
ld: vmlinux.o: in function `generic_copy_file_checks':
>> fs/read_write.c:1453: undefined reference to `I_BDEV'
vim +1453 fs/read_write.c
1398
1399 /*
1400 * Performs necessary checks before doing a file copy
1401 *
1402 * Can adjust amount of bytes to copy via @req_count argument.
1403 * Returns appropriate error code that caller should return or
1404 * zero in case the copy should be allowed.
1405 */
1406 static int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
1407 struct file *file_out, loff_t pos_out,
1408 size_t *req_count, unsigned int flags)
1409 {
1410 struct inode *inode_in = file_inode(file_in);
1411 struct inode *inode_out = file_inode(file_out);
1412 uint64_t count = *req_count;
1413 loff_t size_in;
1414 int ret;
1415
1416 ret = generic_file_rw_checks(file_in, file_out);
1417 if (ret)
1418 return ret;
1419
1420 /*
1421 * We allow some filesystems to handle cross sb copy, but passing
1422 * a file of the wrong filesystem type to filesystem driver can result
1423 * in an attempt to dereference the wrong type of ->private_data, so
1424 * avoid doing that until we really have a good reason.
1425 *
1426 * nfs and cifs define several different file_system_type structures
1427 * and several different sets of file_operations, but they all end up
1428 * using the same ->copy_file_range() function pointer.
1429 */
1430 if (flags & COPY_FILE_SPLICE) {
1431 /* cross sb splice is allowed */
1432 } else if (file_out->f_op->copy_file_range) {
1433 if (file_in->f_op->copy_file_range !=
1434 file_out->f_op->copy_file_range)
1435 return -EXDEV;
1436 } else if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) {
1437 return -EXDEV;
1438 }
1439
1440 /* Don't touch certain kinds of inodes */
1441 if (IS_IMMUTABLE(inode_out))
1442 return -EPERM;
1443
1444 if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
1445 return -ETXTBSY;
1446
1447 /* Ensure offsets don't wrap. */
1448 if (pos_in + count < pos_in || pos_out + count < pos_out)
1449 return -EOVERFLOW;
1450
1451 /* Shorten the copy to EOF */
1452 if (S_ISBLK(inode_in->i_mode))
> 1453 size_in = bdev_nr_bytes(I_BDEV(file_in->f_mapping->host));
1454 else
1455 size_in = i_size_read(inode_in);
1456
1457 if (pos_in >= size_in)
1458 count = 0;
1459 else
1460 count = min(count, size_in - (uint64_t)pos_in);
1461
1462 ret = generic_write_check_limits(file_out, pos_out, &count);
1463 if (ret)
1464 return ret;
1465
1466 /* Don't allow overlapped copying within the same file. */
1467 if (inode_in == inode_out &&
1468 pos_out + count > pos_in &&
1469 pos_out < pos_in + count)
1470 return -EINVAL;
1471
1472 *req_count = count;
1473 return 0;
1474 }
1475
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests
Hi Anuj,
Thank you for the patch! Yet something to improve:
[auto build test ERROR on axboe-block/for-next]
[also build test ERROR on device-mapper-dm/for-next linus/master v6.3-rc4 next-20230329]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Anuj-Gupta/block-Add-copy-offload-support-infrastructure/20230329-162018
base: https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git for-next
patch link: https://lore.kernel.org/r/20230327084103.21601-5-anuj20.g%40samsung.com
patch subject: [PATCH v8 4/9] fs, block: copy_file_range for def_blk_ops for direct block device.
config: loongarch-randconfig-r001-20230329 (https://download.01.org/0day-ci/archive/20230329/202303292151.7DDOUCIt-lkp@intel.com/config)
compiler: loongarch64-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/intel-lab-lkp/linux/commit/61819d260936954ddd6688548f074e7063dcf39e
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Anuj-Gupta/block-Add-copy-offload-support-infrastructure/20230329-162018
git checkout 61819d260936954ddd6688548f074e7063dcf39e
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=loongarch olddefconfig
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=loongarch SHELL=/bin/bash
If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202303292151.7DDOUCIt-lkp@intel.com/
All errors (new ones prefixed by >>):
loongarch64-linux-ld: fs/read_write.o: in function `.L633':
>> read_write.c:(.text+0x42e0): undefined reference to `I_BDEV'
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests
On Mon, Mar 27, 2023 at 02:10:52PM +0530, Anuj Gupta wrote:
> From: Nitesh Shetty <nj.shetty@samsung.com>
>
> For direct block device opened with O_DIRECT, use copy_file_range to
> issue device copy offload, and fallback to generic_copy_file_range incase
> device copy offload capability is absent.
> Modify checks to allow bdevs to use copy_file_range.
>
> Suggested-by: Ming Lei <ming.lei@redhat.com>
> Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
> Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
> ---
> block/blk-lib.c | 22 ++++++++++++++++++++++
> block/fops.c | 20 ++++++++++++++++++++
> fs/read_write.c | 11 +++++++++--
> include/linux/blkdev.h | 3 +++
> 4 files changed, 54 insertions(+), 2 deletions(-)
>
> diff --git a/block/blk-lib.c b/block/blk-lib.c
> index a21819e59b29..c288573c7e77 100644
> --- a/block/blk-lib.c
> +++ b/block/blk-lib.c
> @@ -475,6 +475,28 @@ static inline bool blk_check_copy_offload(struct request_queue *q_in,
> return blk_queue_copy(q_in) && blk_queue_copy(q_out);
> }
>
> +int blkdev_copy_offload(struct block_device *bdev_in, loff_t pos_in,
> + struct block_device *bdev_out, loff_t pos_out, size_t len,
> + cio_iodone_t end_io, void *private, gfp_t gfp_mask)
> +{
> + struct request_queue *in_q = bdev_get_queue(bdev_in);
> + struct request_queue *out_q = bdev_get_queue(bdev_out);
> + int ret = -EINVAL;
Why initialize to -EINVAL if blk_copy_sanity_check() initializes it
right away anyway?
> + bool offload = false;
Same thing with initializing offload.
> +
> + ret = blk_copy_sanity_check(bdev_in, pos_in, bdev_out, pos_out, len);
> + if (ret)
> + return ret;
> +
> + offload = blk_check_copy_offload(in_q, out_q);
> + if (offload)
> + ret = __blk_copy_offload(bdev_in, pos_in, bdev_out, pos_out,
> + len, end_io, private, gfp_mask);
> +
> + return ret;
> +}
> +EXPORT_SYMBOL_GPL(blkdev_copy_offload);
> +
> /*
> * @bdev_in: source block device
> * @pos_in: source offset
> diff --git a/block/fops.c b/block/fops.c
> index d2e6be4e3d1c..3b7c05831d5c 100644
> --- a/block/fops.c
> +++ b/block/fops.c
> @@ -611,6 +611,25 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
> return ret;
> }
>
> +static ssize_t blkdev_copy_file_range(struct file *file_in, loff_t pos_in,
> + struct file *file_out, loff_t pos_out,
> + size_t len, unsigned int flags)
> +{
> + struct block_device *in_bdev = I_BDEV(bdev_file_inode(file_in));
> + struct block_device *out_bdev = I_BDEV(bdev_file_inode(file_out));
> + int comp_len = 0;
> +
> + if ((file_in->f_iocb_flags & IOCB_DIRECT) &&
> + (file_out->f_iocb_flags & IOCB_DIRECT))
> + comp_len = blkdev_copy_offload(in_bdev, pos_in, out_bdev,
> + pos_out, len, NULL, NULL, GFP_KERNEL);
> + if (comp_len != len)
> + comp_len = generic_copy_file_range(file_in, pos_in + comp_len,
> + file_out, pos_out + comp_len, len - comp_len, flags);
I'm not deeply familiar with this code but this looks odd. It at least
seems possible that comp_len could be -EINVAL and len 20 at which point
you'd be doing len - comp_len aka 20 - 22 = -2 in generic_copy_file_range().
On Wed, Mar 29, 2023 at 02:14:40PM +0200, Christian Brauner wrote:
> On Mon, Mar 27, 2023 at 02:10:52PM +0530, Anuj Gupta wrote:
> > From: Nitesh Shetty <nj.shetty@samsung.com>
> >
> > For direct block device opened with O_DIRECT, use copy_file_range to
> > issue device copy offload, and fallback to generic_copy_file_range incase
> > device copy offload capability is absent.
> > Modify checks to allow bdevs to use copy_file_range.
> >
> > Suggested-by: Ming Lei <ming.lei@redhat.com>
> > Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
> > Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
> > ---
> > block/blk-lib.c | 22 ++++++++++++++++++++++
> > block/fops.c | 20 ++++++++++++++++++++
> > fs/read_write.c | 11 +++++++++--
> > include/linux/blkdev.h | 3 +++
> > 4 files changed, 54 insertions(+), 2 deletions(-)
> >
> > diff --git a/block/blk-lib.c b/block/blk-lib.c
> > index a21819e59b29..c288573c7e77 100644
> > --- a/block/blk-lib.c
> > +++ b/block/blk-lib.c
> > @@ -475,6 +475,28 @@ static inline bool blk_check_copy_offload(struct request_queue *q_in,
> > return blk_queue_copy(q_in) && blk_queue_copy(q_out);
> > }
> >
> > +int blkdev_copy_offload(struct block_device *bdev_in, loff_t pos_in,
> > + struct block_device *bdev_out, loff_t pos_out, size_t len,
> > + cio_iodone_t end_io, void *private, gfp_t gfp_mask)
> > +{
> > + struct request_queue *in_q = bdev_get_queue(bdev_in);
> > + struct request_queue *out_q = bdev_get_queue(bdev_out);
> > + int ret = -EINVAL;
>
> Why initialize to -EINVAL if blk_copy_sanity_check() initializes it
> right away anyway?
>
acked.
> > + bool offload = false;
>
> Same thing with initializing offload.
>
acked
> > +
> > + ret = blk_copy_sanity_check(bdev_in, pos_in, bdev_out, pos_out, len);
> > + if (ret)
> > + return ret;
> > +
> > + offload = blk_check_copy_offload(in_q, out_q);
> > + if (offload)
> > + ret = __blk_copy_offload(bdev_in, pos_in, bdev_out, pos_out,
> > + len, end_io, private, gfp_mask);
> > +
> > + return ret;
> > +}
> > +EXPORT_SYMBOL_GPL(blkdev_copy_offload);
> > +
> > /*
> > * @bdev_in: source block device
> > * @pos_in: source offset
> > diff --git a/block/fops.c b/block/fops.c
> > index d2e6be4e3d1c..3b7c05831d5c 100644
> > --- a/block/fops.c
> > +++ b/block/fops.c
> > @@ -611,6 +611,25 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
> > return ret;
> > }
> >
> > +static ssize_t blkdev_copy_file_range(struct file *file_in, loff_t pos_in,
> > + struct file *file_out, loff_t pos_out,
> > + size_t len, unsigned int flags)
> > +{
> > + struct block_device *in_bdev = I_BDEV(bdev_file_inode(file_in));
> > + struct block_device *out_bdev = I_BDEV(bdev_file_inode(file_out));
> > + int comp_len = 0;
> > +
> > + if ((file_in->f_iocb_flags & IOCB_DIRECT) &&
> > + (file_out->f_iocb_flags & IOCB_DIRECT))
> > + comp_len = blkdev_copy_offload(in_bdev, pos_in, out_bdev,
> > + pos_out, len, NULL, NULL, GFP_KERNEL);
> > + if (comp_len != len)
> > + comp_len = generic_copy_file_range(file_in, pos_in + comp_len,
> > + file_out, pos_out + comp_len, len - comp_len, flags);
>
> I'm not deeply familiar with this code but this looks odd. It at least
> seems possible that comp_len could be -EINVAL and len 20 at which point
> you'd be doing len - comp_len aka 20 - 22 = -2 in generic_copy_file_range().
comp_len should be 0 incase of error. We do agree, some function
description needs to be updated. We will recheck this completion path to
make sure not to return negative value, incase of failure.
Thank You,
Nitesh Shetty
On Wed, Mar 29, 2023 at 06:12:36PM +0530, Nitesh Shetty wrote:
> On Wed, Mar 29, 2023 at 02:14:40PM +0200, Christian Brauner wrote:
> > On Mon, Mar 27, 2023 at 02:10:52PM +0530, Anuj Gupta wrote:
> > > From: Nitesh Shetty <nj.shetty@samsung.com>
> > >
> > > For direct block device opened with O_DIRECT, use copy_file_range to
> > > issue device copy offload, and fallback to generic_copy_file_range incase
> > > device copy offload capability is absent.
> > > Modify checks to allow bdevs to use copy_file_range.
> > >
> > > Suggested-by: Ming Lei <ming.lei@redhat.com>
> > > Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
> > > Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
> > > ---
> > > block/blk-lib.c | 22 ++++++++++++++++++++++
> > > block/fops.c | 20 ++++++++++++++++++++
> > > fs/read_write.c | 11 +++++++++--
> > > include/linux/blkdev.h | 3 +++
> > > 4 files changed, 54 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/block/blk-lib.c b/block/blk-lib.c
> > > index a21819e59b29..c288573c7e77 100644
> > > --- a/block/blk-lib.c
> > > +++ b/block/blk-lib.c
> > > @@ -475,6 +475,28 @@ static inline bool blk_check_copy_offload(struct request_queue *q_in,
> > > return blk_queue_copy(q_in) && blk_queue_copy(q_out);
> > > }
> > >
> > > +int blkdev_copy_offload(struct block_device *bdev_in, loff_t pos_in,
> > > + struct block_device *bdev_out, loff_t pos_out, size_t len,
> > > + cio_iodone_t end_io, void *private, gfp_t gfp_mask)
> > > +{
> > > + struct request_queue *in_q = bdev_get_queue(bdev_in);
> > > + struct request_queue *out_q = bdev_get_queue(bdev_out);
> > > + int ret = -EINVAL;
> >
> > Why initialize to -EINVAL if blk_copy_sanity_check() initializes it
> > right away anyway?
> >
>
> acked.
>
> > > + bool offload = false;
> >
> > Same thing with initializing offload.
> >
> acked
>
> > > +
> > > + ret = blk_copy_sanity_check(bdev_in, pos_in, bdev_out, pos_out, len);
> > > + if (ret)
> > > + return ret;
> > > +
> > > + offload = blk_check_copy_offload(in_q, out_q);
> > > + if (offload)
> > > + ret = __blk_copy_offload(bdev_in, pos_in, bdev_out, pos_out,
> > > + len, end_io, private, gfp_mask);
> > > +
> > > + return ret;
> > > +}
> > > +EXPORT_SYMBOL_GPL(blkdev_copy_offload);
> > > +
> > > /*
> > > * @bdev_in: source block device
> > > * @pos_in: source offset
> > > diff --git a/block/fops.c b/block/fops.c
> > > index d2e6be4e3d1c..3b7c05831d5c 100644
> > > --- a/block/fops.c
> > > +++ b/block/fops.c
> > > @@ -611,6 +611,25 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
> > > return ret;
> > > }
> > >
> > > +static ssize_t blkdev_copy_file_range(struct file *file_in, loff_t pos_in,
> > > + struct file *file_out, loff_t pos_out,
> > > + size_t len, unsigned int flags)
> > > +{
> > > + struct block_device *in_bdev = I_BDEV(bdev_file_inode(file_in));
> > > + struct block_device *out_bdev = I_BDEV(bdev_file_inode(file_out));
> > > + int comp_len = 0;
> > > +
> > > + if ((file_in->f_iocb_flags & IOCB_DIRECT) &&
> > > + (file_out->f_iocb_flags & IOCB_DIRECT))
> > > + comp_len = blkdev_copy_offload(in_bdev, pos_in, out_bdev,
> > > + pos_out, len, NULL, NULL, GFP_KERNEL);
> > > + if (comp_len != len)
> > > + comp_len = generic_copy_file_range(file_in, pos_in + comp_len,
> > > + file_out, pos_out + comp_len, len - comp_len, flags);
> >
> > I'm not deeply familiar with this code but this looks odd. It at least
> > seems possible that comp_len could be -EINVAL and len 20 at which point
> > you'd be doing len - comp_len aka 20 - 22 = -2 in generic_copy_file_range().
20 - -22 = 44 ofc
>
> comp_len should be 0 incase of error. We do agree, some function
I mean, not to hammer on this point too much but just to be clear
blk_copy_sanity_check(), which is introduced in the second patch, can
return both -EPERM and -EINVAL and is first called in
blkdev_copy_offload() so it's definitely possible for comp_len to be
negative.
On Thu, Mar 30, 2023 at 11:18 AM Christian Brauner <brauner@kernel.org> wrote:
>
> On Wed, Mar 29, 2023 at 06:12:36PM +0530, Nitesh Shetty wrote:
> > On Wed, Mar 29, 2023 at 02:14:40PM +0200, Christian Brauner wrote:
> > > On Mon, Mar 27, 2023 at 02:10:52PM +0530, Anuj Gupta wrote:
> > > > From: Nitesh Shetty <nj.shetty@samsung.com>
> > > >
> > > > For direct block device opened with O_DIRECT, use copy_file_range to
> > > > issue device copy offload, and fallback to generic_copy_file_range incase
> > > > device copy offload capability is absent.
> > > > Modify checks to allow bdevs to use copy_file_range.
> > > >
> > > > Suggested-by: Ming Lei <ming.lei@redhat.com>
> > > > Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
> > > > Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
> > > > ---
> > > > block/blk-lib.c | 22 ++++++++++++++++++++++
> > > > block/fops.c | 20 ++++++++++++++++++++
> > > > fs/read_write.c | 11 +++++++++--
> > > > include/linux/blkdev.h | 3 +++
> > > > 4 files changed, 54 insertions(+), 2 deletions(-)
> > > >
> > > > diff --git a/block/blk-lib.c b/block/blk-lib.c
> > > > index a21819e59b29..c288573c7e77 100644
> > > > --- a/block/blk-lib.c
> > > > +++ b/block/blk-lib.c
> > > > @@ -475,6 +475,28 @@ static inline bool blk_check_copy_offload(struct request_queue *q_in,
> > > > return blk_queue_copy(q_in) && blk_queue_copy(q_out);
> > > > }
> > > >
> > > > +int blkdev_copy_offload(struct block_device *bdev_in, loff_t pos_in,
> > > > + struct block_device *bdev_out, loff_t pos_out, size_t len,
> > > > + cio_iodone_t end_io, void *private, gfp_t gfp_mask)
> > > > +{
> > > > + struct request_queue *in_q = bdev_get_queue(bdev_in);
> > > > + struct request_queue *out_q = bdev_get_queue(bdev_out);
> > > > + int ret = -EINVAL;
> > >
> > > Why initialize to -EINVAL if blk_copy_sanity_check() initializes it
> > > right away anyway?
> > >
> >
> > acked.
> >
> > > > + bool offload = false;
> > >
> > > Same thing with initializing offload.
> > >
> > acked
> >
> > > > +
> > > > + ret = blk_copy_sanity_check(bdev_in, pos_in, bdev_out, pos_out, len);
> > > > + if (ret)
> > > > + return ret;
> > > > +
> > > > + offload = blk_check_copy_offload(in_q, out_q);
> > > > + if (offload)
> > > > + ret = __blk_copy_offload(bdev_in, pos_in, bdev_out, pos_out,
> > > > + len, end_io, private, gfp_mask);
> > > > +
> > > > + return ret;
> > > > +}
> > > > +EXPORT_SYMBOL_GPL(blkdev_copy_offload);
> > > > +
> > > > /*
> > > > * @bdev_in: source block device
> > > > * @pos_in: source offset
> > > > diff --git a/block/fops.c b/block/fops.c
> > > > index d2e6be4e3d1c..3b7c05831d5c 100644
> > > > --- a/block/fops.c
> > > > +++ b/block/fops.c
> > > > @@ -611,6 +611,25 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
> > > > return ret;
> > > > }
> > > >
> > > > +static ssize_t blkdev_copy_file_range(struct file *file_in, loff_t pos_in,
> > > > + struct file *file_out, loff_t pos_out,
> > > > + size_t len, unsigned int flags)
> > > > +{
> > > > + struct block_device *in_bdev = I_BDEV(bdev_file_inode(file_in));
> > > > + struct block_device *out_bdev = I_BDEV(bdev_file_inode(file_out));
> > > > + int comp_len = 0;
> > > > +
> > > > + if ((file_in->f_iocb_flags & IOCB_DIRECT) &&
> > > > + (file_out->f_iocb_flags & IOCB_DIRECT))
> > > > + comp_len = blkdev_copy_offload(in_bdev, pos_in, out_bdev,
> > > > + pos_out, len, NULL, NULL, GFP_KERNEL);
> > > > + if (comp_len != len)
> > > > + comp_len = generic_copy_file_range(file_in, pos_in + comp_len,
> > > > + file_out, pos_out + comp_len, len - comp_len, flags);
> > >
> > > I'm not deeply familiar with this code but this looks odd. It at least
> > > seems possible that comp_len could be -EINVAL and len 20 at which point
> > > you'd be doing len - comp_len aka 20 - 22 = -2 in generic_copy_file_range().
>
> 20 - -22 = 44 ofc
>
> >
> > comp_len should be 0 incase of error. We do agree, some function
>
> I mean, not to hammer on this point too much but just to be clear
> blk_copy_sanity_check(), which is introduced in the second patch, can
> return both -EPERM and -EINVAL and is first called in
> blkdev_copy_offload() so it's definitely possible for comp_len to be
> negative.
Acked. Will be updated in the next version.
Thank you,
Nitesh Shetty
© 2016 - 2026 Red Hat, Inc.