[PATCH v3 3/4] udmabuf: Implement udmabuf rw_file callback

wangtao posted 4 patches 6 months, 2 weeks ago
[PATCH v3 3/4] udmabuf: Implement udmabuf rw_file callback
Posted by wangtao 6 months, 2 weeks ago
Construct bio_vec from folios, then call the other file's
r/w callbacks for IO operations.
Test data shows direct I/O copy_file_range improves performance by
over 50% vs direct I/O mmap&read (2557 vs 1534).

Test data:
|    32x32MB Read 1024MB  |Creat-ms|Close-ms|  I/O-ms|I/O-MB/s| I/O%
|-------------------------|--------|--------|--------|--------|-----
| 1)Beg udmabuf buffer R/W|    580 |    323 |   1238 |    867 | 100%
| 2)     dmabuf buffer R/W|     48 |      5 |   1149 |    934 | 107%
| 3) udma+memfd buffer R/W|    597 |    340 |   2157 |    497 |  57%
| 4) udma+memfd direct R/W|    573 |    340 |    700 |   1534 | 176%
| 5) u+mfd buffer sendfile|    577 |    340 |   1204 |    891 | 102%
| 6) u+mfd direct sendfile|    567 |    339 |   2272 |    472 |  54%
| 7)   u+mfd buffer splice|    570 |    337 |   1114 |    964 | 111%
| 8)   u+mfd direct splice|    564 |    335 |    793 |   1355 | 156%
| 9)  udmabuf buffer c_f_r|    577 |    323 |   1059 |   1014 | 116%
|10)  udmabuf direct c_f_r|    582 |    325 |    420 |   2557 | 294%
|11)End udmabuf buffer R/W|    586 |    323 |   1188 |    903 | 104%

Signed-off-by: wangtao <tao.wangtao@honor.com>
---
 drivers/dma-buf/udmabuf.c | 59 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
index e74e36a8ecda..573275a51674 100644
--- a/drivers/dma-buf/udmabuf.c
+++ b/drivers/dma-buf/udmabuf.c
@@ -284,6 +284,64 @@ static int end_cpu_udmabuf(struct dma_buf *buf,
 	return 0;
 }
 
+static ssize_t udmabuf_rw_file(struct dma_buf *dmabuf, loff_t my_pos,
+			struct file *other, loff_t pos,
+			size_t count, bool is_write)
+{
+	struct udmabuf *ubuf = dmabuf->priv;
+	loff_t my_end = my_pos + count, bv_beg, bv_end = 0;
+	pgoff_t pg_idx = my_pos / PAGE_SIZE;
+	pgoff_t pg_end = DIV_ROUND_UP(my_end, PAGE_SIZE);
+	size_t i, bv_off, bv_len, bv_num, bv_idx = 0, bv_total = 0;
+	struct bio_vec *bvec;
+	struct kiocb kiocb;
+	struct iov_iter iter;
+	unsigned int direction = is_write ? ITER_SOURCE : ITER_DEST;
+	ssize_t ret = 0, rw_total = 0;
+	struct folio *folio;
+
+	bv_num = min_t(size_t, pg_end - pg_idx + 1, 1024);
+	bvec = kvcalloc(bv_num, sizeof(*bvec), GFP_KERNEL);
+	if (!bvec)
+		return -ENOMEM;
+
+	init_sync_kiocb(&kiocb, other);
+	kiocb.ki_pos = pos;
+
+	for (i = 0; i < ubuf->nr_pinned && my_pos < my_end; i++) {
+		folio = ubuf->pinned_folios[i];
+		bv_beg = bv_end;
+		bv_end += folio_size(folio);
+		if (bv_end <= my_pos)
+			continue;
+
+		bv_len = min(bv_end, my_end) - my_pos;
+		bv_off = my_pos - bv_beg;
+		my_pos += bv_len;
+		bv_total += bv_len;
+		bvec_set_page(&bvec[bv_idx], &folio->page, bv_len, bv_off);
+		if (++bv_idx < bv_num && my_pos < my_end)
+			continue;
+
+		/* start R/W if bvec is full or count reaches zero. */
+		iov_iter_bvec(&iter, direction, bvec, bv_idx, bv_total);
+		if (is_write)
+			ret = other->f_op->write_iter(&kiocb, &iter);
+		else
+			ret = other->f_op->read_iter(&kiocb, &iter);
+		if (ret <= 0)
+			break;
+		rw_total += ret;
+		if (ret < bv_total || fatal_signal_pending(current))
+			break;
+
+		bv_idx = bv_total = 0;
+	}
+	kvfree(bvec);
+
+	return rw_total > 0 ? rw_total : ret;
+}
+
 static const struct dma_buf_ops udmabuf_ops = {
 	.cache_sgt_mapping = true,
 	.map_dma_buf	   = map_udmabuf,
@@ -294,6 +352,7 @@ static const struct dma_buf_ops udmabuf_ops = {
 	.vunmap		   = vunmap_udmabuf,
 	.begin_cpu_access  = begin_cpu_udmabuf,
 	.end_cpu_access    = end_cpu_udmabuf,
+	.rw_file = udmabuf_rw_file,
 };
 
 #define SEALS_WANTED (F_SEAL_SHRINK)
-- 
2.17.1
Re: [PATCH v3 3/4] udmabuf: Implement udmabuf rw_file callback
Posted by kernel test robot 6 months, 2 weeks ago
Hi wangtao,

kernel test robot noticed the following build errors:

[auto build test ERROR on brauner-vfs/vfs.all]
[also build test ERROR on next-20250530]
[cannot apply to linus/master v6.15]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/wangtao/fs-allow-cross-FS-copy_file_range-for-memory-backed-files/20250530-184146
base:   https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git vfs.all
patch link:    https://lore.kernel.org/r/20250530103941.11092-4-tao.wangtao%40honor.com
patch subject: [PATCH v3 3/4] udmabuf: Implement udmabuf rw_file callback
config: sparc64-randconfig-002-20250530 (https://download.01.org/0day-ci/archive/20250530/202505302235.mDzENMSm-lkp@intel.com/config)
compiler: sparc64-linux-gcc (GCC) 15.1.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250530/202505302235.mDzENMSm-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202505302235.mDzENMSm-lkp@intel.com/

All error/warnings (new ones prefixed by >>):

   drivers/dma-buf/udmabuf.c: In function 'udmabuf_rw_file':
>> drivers/dma-buf/udmabuf.c:298:25: error: storage size of 'iter' isn't known
     298 |         struct iov_iter iter;
         |                         ^~~~
>> drivers/dma-buf/udmabuf.c:299:45: error: 'ITER_SOURCE' undeclared (first use in this function)
     299 |         unsigned int direction = is_write ? ITER_SOURCE : ITER_DEST;
         |                                             ^~~~~~~~~~~
   drivers/dma-buf/udmabuf.c:299:45: note: each undeclared identifier is reported only once for each function it appears in
>> drivers/dma-buf/udmabuf.c:299:59: error: 'ITER_DEST' undeclared (first use in this function)
     299 |         unsigned int direction = is_write ? ITER_SOURCE : ITER_DEST;
         |                                                           ^~~~~~~~~
>> drivers/dma-buf/udmabuf.c:327:17: error: implicit declaration of function 'iov_iter_bvec'; did you mean 'bvec_iter_bvec'? [-Wimplicit-function-declaration]
     327 |                 iov_iter_bvec(&iter, direction, bvec, bv_idx, bv_total);
         |                 ^~~~~~~~~~~~~
         |                 bvec_iter_bvec
>> drivers/dma-buf/udmabuf.c:298:25: warning: unused variable 'iter' [-Wunused-variable]
     298 |         struct iov_iter iter;
         |                         ^~~~


vim +298 drivers/dma-buf/udmabuf.c

   286	
   287	static ssize_t udmabuf_rw_file(struct dma_buf *dmabuf, loff_t my_pos,
   288				struct file *other, loff_t pos,
   289				size_t count, bool is_write)
   290	{
   291		struct udmabuf *ubuf = dmabuf->priv;
   292		loff_t my_end = my_pos + count, bv_beg, bv_end = 0;
   293		pgoff_t pg_idx = my_pos / PAGE_SIZE;
   294		pgoff_t pg_end = DIV_ROUND_UP(my_end, PAGE_SIZE);
   295		size_t i, bv_off, bv_len, bv_num, bv_idx = 0, bv_total = 0;
   296		struct bio_vec *bvec;
   297		struct kiocb kiocb;
 > 298		struct iov_iter iter;
 > 299		unsigned int direction = is_write ? ITER_SOURCE : ITER_DEST;
   300		ssize_t ret = 0, rw_total = 0;
   301		struct folio *folio;
   302	
   303		bv_num = min_t(size_t, pg_end - pg_idx + 1, 1024);
   304		bvec = kvcalloc(bv_num, sizeof(*bvec), GFP_KERNEL);
   305		if (!bvec)
   306			return -ENOMEM;
   307	
   308		init_sync_kiocb(&kiocb, other);
   309		kiocb.ki_pos = pos;
   310	
   311		for (i = 0; i < ubuf->nr_pinned && my_pos < my_end; i++) {
   312			folio = ubuf->pinned_folios[i];
   313			bv_beg = bv_end;
   314			bv_end += folio_size(folio);
   315			if (bv_end <= my_pos)
   316				continue;
   317	
   318			bv_len = min(bv_end, my_end) - my_pos;
   319			bv_off = my_pos - bv_beg;
   320			my_pos += bv_len;
   321			bv_total += bv_len;
   322			bvec_set_page(&bvec[bv_idx], &folio->page, bv_len, bv_off);
   323			if (++bv_idx < bv_num && my_pos < my_end)
   324				continue;
   325	
   326			/* start R/W if bvec is full or count reaches zero. */
 > 327			iov_iter_bvec(&iter, direction, bvec, bv_idx, bv_total);
   328			if (is_write)
   329				ret = other->f_op->write_iter(&kiocb, &iter);
   330			else
   331				ret = other->f_op->read_iter(&kiocb, &iter);
   332			if (ret <= 0)
   333				break;
   334			rw_total += ret;
   335			if (ret < bv_total || fatal_signal_pending(current))
   336				break;
   337	
   338			bv_idx = bv_total = 0;
   339		}
   340		kvfree(bvec);
   341	
   342		return rw_total > 0 ? rw_total : ret;
   343	}
   344	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
RE: [PATCH v3 3/4] udmabuf: Implement udmabuf rw_file callback
Posted by wangtao 6 months, 2 weeks ago

> -----Original Message-----
> From: kernel test robot <lkp@intel.com>
> Sent: Friday, May 30, 2025 10:25 PM
> To: wangtao <tao.wangtao@honor.com>; sumit.semwal@linaro.org;
> christian.koenig@amd.com; kraxel@redhat.com; vivek.kasireddy@intel.com;
> viro@zeniv.linux.org.uk; brauner@kernel.org; hughd@google.com;
> akpm@linux-foundation.org; amir73il@gmail.com
> Cc: oe-kbuild-all@lists.linux.dev; benjamin.gaignard@collabora.com;
> Brian.Starkey@arm.com; jstultz@google.com; tjmercier@google.com;
> jack@suse.cz; baolin.wang@linux.alibaba.com; linux-media@vger.kernel.org;
> dri-devel@lists.freedesktop.org; linaro-mm-sig@lists.linaro.org; linux-
> kernel@vger.kernel.org; linux-fsdevel@vger.kernel.org; linux-
> mm@kvack.org; wangbintian(BintianWang) <bintian.wang@honor.com>;
> yipengxiang <yipengxiang@honor.com>; liulu 00013167
> <liulu.liu@honor.com>; hanfeng 00012985 <feng.han@honor.com>; wangtao
> <tao.wangtao@honor.com>
> Subject: Re: [PATCH v3 3/4] udmabuf: Implement udmabuf rw_file callback
> 
> Hi wangtao,
> 
> kernel test robot noticed the following build errors:
> 
> [auto build test ERROR on brauner-vfs/vfs.all] [also build test ERROR on next-
> 20250530] [cannot apply to linus/master v6.15] [If your patch is applied to the
> wrong git tree, kindly drop us a note.
> And when submitting patch, we suggest to use '--base' as documented in
> https://git-scm.com/docs/git-format-patch#_base_tree_information]
> 
> url:    https://github.com/intel-lab-lkp/linux/commits/wangtao/fs-allow-
> cross-FS-copy_file_range-for-memory-backed-files/20250530-184146
> base:   https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git vfs.all
> patch link:    https://lore.kernel.org/r/20250530103941.11092-4-
> tao.wangtao%40honor.com
> patch subject: [PATCH v3 3/4] udmabuf: Implement udmabuf rw_file callback
> config: sparc64-randconfig-002-20250530 (https://download.01.org/0day-
> ci/archive/20250530/202505302235.mDzENMSm-lkp@intel.com/config)
> compiler: sparc64-linux-gcc (GCC) 15.1.0 reproduce (this is a W=1 build):
> (https://download.01.org/0day-
> ci/archive/20250530/202505302235.mDzENMSm-lkp@intel.com/reproduce)
> 
> If you fix the issue in a separate patch/commit (i.e. not just a new version of
> the same patch/commit), kindly add following tags
> | Reported-by: kernel test robot <lkp@intel.com>
> | Closes:
> | https://lore.kernel.org/oe-kbuild-all/202505302235.mDzENMSm-lkp@intel.
> | com/
> 
> All error/warnings (new ones prefixed by >>):
> 
Quick note: I don't have local sparc64 compilation setup, so I'll
explicitly add the header dependencies to ensure safety.

Regards,
Wangtao.
>    drivers/dma-buf/udmabuf.c: In function 'udmabuf_rw_file':
> >> drivers/dma-buf/udmabuf.c:298:25: error: storage size of 'iter' isn't
> >> known
>      298 |         struct iov_iter iter;
>          |                         ^~~~
> >> drivers/dma-buf/udmabuf.c:299:45: error: 'ITER_SOURCE' undeclared
> >> (first use in this function)
>      299 |         unsigned int direction = is_write ? ITER_SOURCE : ITER_DEST;
>          |                                             ^~~~~~~~~~~
>    drivers/dma-buf/udmabuf.c:299:45: note: each undeclared identifier is
> reported only once for each function it appears in
> >> drivers/dma-buf/udmabuf.c:299:59: error: 'ITER_DEST' undeclared
> >> (first use in this function)
>      299 |         unsigned int direction = is_write ? ITER_SOURCE : ITER_DEST;
>          |                                                           ^~~~~~~~~
> >> drivers/dma-buf/udmabuf.c:327:17: error: implicit declaration of
> >> function 'iov_iter_bvec'; did you mean 'bvec_iter_bvec'?
> >> [-Wimplicit-function-declaration]
>      327 |                 iov_iter_bvec(&iter, direction, bvec, bv_idx, bv_total);
>          |                 ^~~~~~~~~~~~~
>          |                 bvec_iter_bvec
> >> drivers/dma-buf/udmabuf.c:298:25: warning: unused variable 'iter'
> >> [-Wunused-variable]
>      298 |         struct iov_iter iter;
>          |                         ^~~~
> 
> 
> vim +298 drivers/dma-buf/udmabuf.c
> 
>    286
>    287	static ssize_t udmabuf_rw_file(struct dma_buf *dmabuf, loff_t
> my_pos,
>    288				struct file *other, loff_t pos,
>    289				size_t count, bool is_write)
>    290	{
>    291		struct udmabuf *ubuf = dmabuf->priv;
>    292		loff_t my_end = my_pos + count, bv_beg, bv_end = 0;
>    293		pgoff_t pg_idx = my_pos / PAGE_SIZE;
>    294		pgoff_t pg_end = DIV_ROUND_UP(my_end, PAGE_SIZE);
>    295		size_t i, bv_off, bv_len, bv_num, bv_idx = 0, bv_total = 0;
>    296		struct bio_vec *bvec;
>    297		struct kiocb kiocb;
>  > 298		struct iov_iter iter;
>  > 299		unsigned int direction = is_write ? ITER_SOURCE : ITER_DEST;
>    300		ssize_t ret = 0, rw_total = 0;
>    301		struct folio *folio;
>    302
>    303		bv_num = min_t(size_t, pg_end - pg_idx + 1, 1024);
>    304		bvec = kvcalloc(bv_num, sizeof(*bvec), GFP_KERNEL);
>    305		if (!bvec)
>    306			return -ENOMEM;
>    307
>    308		init_sync_kiocb(&kiocb, other);
>    309		kiocb.ki_pos = pos;
>    310
>    311		for (i = 0; i < ubuf->nr_pinned && my_pos < my_end; i++) {
>    312			folio = ubuf->pinned_folios[i];
>    313			bv_beg = bv_end;
>    314			bv_end += folio_size(folio);
>    315			if (bv_end <= my_pos)
>    316				continue;
>    317
>    318			bv_len = min(bv_end, my_end) - my_pos;
>    319			bv_off = my_pos - bv_beg;
>    320			my_pos += bv_len;
>    321			bv_total += bv_len;
>    322			bvec_set_page(&bvec[bv_idx], &folio->page, bv_len,
> bv_off);
>    323			if (++bv_idx < bv_num && my_pos < my_end)
>    324				continue;
>    325
>    326			/* start R/W if bvec is full or count reaches zero. */
>  > 327			iov_iter_bvec(&iter, direction, bvec, bv_idx,
> bv_total);
>    328			if (is_write)
>    329				ret = other->f_op->write_iter(&kiocb, &iter);
>    330			else
>    331				ret = other->f_op->read_iter(&kiocb, &iter);
>    332			if (ret <= 0)
>    333				break;
>    334			rw_total += ret;
>    335			if (ret < bv_total || fatal_signal_pending(current))
>    336				break;
>    337
>    338			bv_idx = bv_total = 0;
>    339		}
>    340		kvfree(bvec);
>    341
>    342		return rw_total > 0 ? rw_total : ret;
>    343	}
>    344
> 
> --
> 0-DAY CI Kernel Test Service
> https://github.com/intel/lkp-tests/wiki