From nobody Mon Feb 9 01:23:45 2026 Received: from mta20.hihonor.com (mta20.honor.com [81.70.206.69]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 949EA2820BF; Tue, 3 Jun 2025 09:53:59 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=81.70.206.69 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1748944441; cv=none; b=ebukuInO0clMdH16FtJ+2z6GWp3x6Q5qZ1DHT42KiVZ50LxCie/kX02A67kwxLOXvZCAvGjznt+Di6Tg10fPIi20YkIoSfZ61ZuukTx9GVzL35CRDWGmMsOGL4WK1Gb9apF3jjhr/6eVDvDkIDCuwWkUR3DTrLQ08x19wV/yIdE= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1748944441; c=relaxed/simple; bh=yUi7+p00jMqRela2NQwhtGNLkrlaHVdgOe80QIGCYOU=; h=From:To:CC:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=JYEm//TFq9LpzWXwTdR2e9pq/gLV66i8mnHMVPS3ne5x2LB5gLgoXJO5sJNWvsrj4GZH86Z/NaobWSZg+yOGCKgSgJRXRo9GuhY0VbR0Osdg5KSsAE+4ViRAtmTeqxF+uyjpgzY+NMXTKIJa9OB2jc2TuEfdSMHUNIpJmSg2VCI= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=honor.com; spf=pass smtp.mailfrom=honor.com; arc=none smtp.client-ip=81.70.206.69 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=honor.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=honor.com Received: from w002.hihonor.com (unknown [10.68.28.120]) by mta20.hihonor.com (SkyGuard) with ESMTPS id 4bBQw53G5YzYlP5Y; Tue, 3 Jun 2025 17:51:33 +0800 (CST) Received: from a010.hihonor.com (10.68.16.52) by w002.hihonor.com (10.68.28.120) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.2.1544.11; Tue, 3 Jun 2025 17:53:50 +0800 Received: from localhost.localdomain (10.144.18.117) by a010.hihonor.com (10.68.16.52) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.2.1544.11; Tue, 3 Jun 2025 17:53:50 +0800 From: wangtao To: , , , , , , , , CC: , , , , , , , , , , , , , , , , wangtao Subject: [PATCH v4 1/4] fs: allow cross-FS copy_file_range for memory file with direct I/O Date: Tue, 3 Jun 2025 17:52:42 +0800 Message-ID: <20250603095245.17478-2-tao.wangtao@honor.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20250603095245.17478-1-tao.wangtao@honor.com> References: <20250603095245.17478-1-tao.wangtao@honor.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable X-ClientProxiedBy: w002.hihonor.com (10.68.28.120) To a010.hihonor.com (10.68.16.52) Memory files can optimize copy performance via copy_file_range callbacks: -Compared to mmap&read: reduces GUP (get_user_pages) overhead -Compared to sendfile/splice: eliminates one memory copy -Supports dma-buf direct I/O zero-copy implementation Suggested by: Christian K=C3=B6nig Suggested by: Amir Goldstein Signed-off-by: wangtao --- fs/read_write.c | 64 +++++++++++++++++++++++++++++++++++++--------- include/linux/fs.h | 2 ++ 2 files changed, 54 insertions(+), 12 deletions(-) diff --git a/fs/read_write.c b/fs/read_write.c index bb0ed26a0b3a..ecb4f753c632 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1469,6 +1469,31 @@ COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int,= in_fd, } #endif =20 +static const struct file_operations *memory_copy_file_ops( + struct file *file_in, struct file *file_out) +{ + if ((file_in->f_op->fop_flags & FOP_MEMORY_FILE) && + (file_in->f_mode & FMODE_CAN_ODIRECT) && + file_in->f_op->copy_file_range && file_out->f_op->write_iter) + return file_in->f_op; + else if ((file_out->f_op->fop_flags & FOP_MEMORY_FILE) && + (file_out->f_mode & FMODE_CAN_ODIRECT) && + file_in->f_op->read_iter && file_out->f_op->copy_file_range) + return file_out->f_op; + else + return NULL; +} + +static int essential_file_rw_checks(struct file *file_in, struct file *fil= e_out) +{ + if (!(file_in->f_mode & FMODE_READ) || + !(file_out->f_mode & FMODE_WRITE) || + (file_out->f_flags & O_APPEND)) + return -EBADF; + + return 0; +} + /* * Performs necessary checks before doing a file copy * @@ -1484,9 +1509,16 @@ static int generic_copy_file_checks(struct file *fil= e_in, loff_t pos_in, struct inode *inode_out =3D file_inode(file_out); uint64_t count =3D *req_count; loff_t size_in; + bool splice =3D flags & COPY_FILE_SPLICE; + const struct file_operations *mem_fops; int ret; =20 - ret =3D generic_file_rw_checks(file_in, file_out); + /* The dma-buf file is not a regular file. */ + mem_fops =3D memory_copy_file_ops(file_in, file_out); + if (splice || mem_fops =3D=3D NULL) + ret =3D generic_file_rw_checks(file_in, file_out); + else + ret =3D essential_file_rw_checks(file_in, file_out); if (ret) return ret; =20 @@ -1500,8 +1532,10 @@ static int generic_copy_file_checks(struct file *fil= e_in, loff_t pos_in, * and several different sets of file_operations, but they all end up * using the same ->copy_file_range() function pointer. */ - if (flags & COPY_FILE_SPLICE) { + if (splice) { /* cross sb splice is allowed */ + } else if (mem_fops !=3D NULL) { + /* cross-fs copy is allowed for memory file. */ } else if (file_out->f_op->copy_file_range) { if (file_in->f_op->copy_file_range !=3D file_out->f_op->copy_file_range) @@ -1554,6 +1588,7 @@ ssize_t vfs_copy_file_range(struct file *file_in, lof= f_t pos_in, ssize_t ret; bool splice =3D flags & COPY_FILE_SPLICE; bool samesb =3D file_inode(file_in)->i_sb =3D=3D file_inode(file_out)->i_= sb; + const struct file_operations *mem_fops; =20 if (flags & ~COPY_FILE_SPLICE) return -EINVAL; @@ -1574,18 +1609,27 @@ ssize_t vfs_copy_file_range(struct file *file_in, l= off_t pos_in, if (len =3D=3D 0) return 0; =20 + if (splice) + goto do_splice; + file_start_write(file_out); =20 /* * Cloning is supported by more file systems, so we implement copy on * same sb using clone, but for filesystems where both clone and copy * are supported (e.g. nfs,cifs), we only call the copy method. + * For copy to/from memory file, we alway call the copy method of the + * memory file. */ - if (!splice && file_out->f_op->copy_file_range) { + mem_fops =3D memory_copy_file_ops(file_in, file_out); + if (mem_fops) { + ret =3D mem_fops->copy_file_range(file_in, pos_in, + file_out, pos_out, len, flags); + } else if (file_out->f_op->copy_file_range) { ret =3D file_out->f_op->copy_file_range(file_in, pos_in, - file_out, pos_out, - len, flags); - } else if (!splice && file_in->f_op->remap_file_range && samesb) { + file_out, pos_out, + len, flags); + } else if (file_in->f_op->remap_file_range && samesb) { ret =3D file_in->f_op->remap_file_range(file_in, pos_in, file_out, pos_out, min_t(loff_t, MAX_RW_COUNT, len), @@ -1603,6 +1647,7 @@ ssize_t vfs_copy_file_range(struct file *file_in, lof= f_t pos_in, if (!splice) goto done; =20 +do_splice: /* * We can get here for same sb copy of filesystems that do not implement * ->copy_file_range() in case filesystem does not support clone or in @@ -1786,12 +1831,7 @@ int generic_file_rw_checks(struct file *file_in, str= uct file *file_out) if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) return -EINVAL; =20 - if (!(file_in->f_mode & FMODE_READ) || - !(file_out->f_mode & FMODE_WRITE) || - (file_out->f_flags & O_APPEND)) - return -EBADF; - - return 0; + return essential_file_rw_checks(file_in, file_out); } =20 int generic_atomic_write_valid(struct kiocb *iocb, struct iov_iter *iter) diff --git a/include/linux/fs.h b/include/linux/fs.h index 016b0fe1536e..37df1b497418 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2187,6 +2187,8 @@ struct file_operations { #define FOP_ASYNC_LOCK ((__force fop_flags_t)(1 << 6)) /* File system supports uncached read/write buffered IO */ #define FOP_DONTCACHE ((__force fop_flags_t)(1 << 7)) +/* Supports cross-FS copy_file_range for memory file */ +#define FOP_MEMORY_FILE ((__force fop_flags_t)(1 << 8)) =20 /* Wrap a directory iterator that needs exclusive inode access */ int wrap_directory_iterator(struct file *, struct dir_context *, --=20 2.17.1 From nobody Mon Feb 9 01:23:45 2026 Received: from mta21.hihonor.com (mta21.honor.com [81.70.160.142]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 1AE7C28137C; Tue, 3 Jun 2025 09:53:52 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=81.70.160.142 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1748944434; cv=none; b=gARgs5RlXsXDkQZXcFQuqjtdwhlu3TeePc3p+8tjyXuFkhnc/cp8Z3ag25awHGT8nCrEs2VNWsWFIJimNSYyJRdRbXrWQbOeF0cm5bNiGRTcMRcUS+DNaEZ37PhqxS5gSIqc0YdnNPYo22RiOTc11aTujgzHOqDP2nrNz/HZ0fE= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1748944434; c=relaxed/simple; bh=EivE7ISi+0WG5ukbJcn5823I3TbKyL4AilNB3tIDUZY=; h=From:To:CC:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=OQUEOglEC/ID70o1LbOhbREsLBfslpHQlscxifEG2g1VQklZqXRzQ2P9Di1WVru2fD6c9PCs4iK8u6e1uLFog5quUJWDftGpK6dcfhREfnEIJwiCpsBMWI2GtdxZRBST5MWC3RVcEnhpGRxTKpJdlUl4nKsqFLhdvXC7/mDJkkc= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=honor.com; spf=pass smtp.mailfrom=honor.com; arc=none smtp.client-ip=81.70.160.142 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=honor.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=honor.com Received: from w001.hihonor.com (unknown [10.68.25.235]) by mta21.hihonor.com (SkyGuard) with ESMTPS id 4bBQwN2MZ8zYlSxZ; Tue, 3 Jun 2025 17:51:48 +0800 (CST) Received: from a010.hihonor.com (10.68.16.52) by w001.hihonor.com (10.68.25.235) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.2.1544.11; Tue, 3 Jun 2025 17:53:50 +0800 Received: from localhost.localdomain (10.144.18.117) by a010.hihonor.com (10.68.16.52) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.2.1544.11; Tue, 3 Jun 2025 17:53:50 +0800 From: wangtao To: , , , , , , , , CC: , , , , , , , , , , , , , , , , wangtao Subject: [PATCH v4 2/4] dmabuf: Implement copy_file_range callback for dmabuf direct I/O prep Date: Tue, 3 Jun 2025 17:52:43 +0800 Message-ID: <20250603095245.17478-3-tao.wangtao@honor.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20250603095245.17478-1-tao.wangtao@honor.com> References: <20250603095245.17478-1-tao.wangtao@honor.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-ClientProxiedBy: w002.hihonor.com (10.68.28.120) To a010.hihonor.com (10.68.16.52) Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" First determine if dmabuf reads from or writes to the file. Then call exporter's rw_file callback function. Signed-off-by: wangtao --- drivers/dma-buf/dma-buf.c | 32 ++++++++++++++++++++++++++++++++ include/linux/dma-buf.h | 16 ++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 5baa83b85515..fc9bf54c921a 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -523,7 +523,38 @@ static void dma_buf_show_fdinfo(struct seq_file *m, st= ruct file *file) spin_unlock(&dmabuf->name_lock); } =20 +static ssize_t dma_buf_rw_file(struct dma_buf *dmabuf, loff_t my_pos, + struct file *file, loff_t pos, size_t count, bool is_write) +{ + if (!dmabuf->ops->rw_file) + return -EINVAL; + + if (my_pos >=3D dmabuf->size) + count =3D 0; + else + count =3D min_t(size_t, count, dmabuf->size - my_pos); + if (!count) + return 0; + + return dmabuf->ops->rw_file(dmabuf, my_pos, file, pos, count, is_write); +} + +static ssize_t dma_buf_copy_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + size_t count, unsigned int flags) +{ + if (is_dma_buf_file(file_in) && file_out->f_op->write_iter) + return dma_buf_rw_file(file_in->private_data, pos_in, + file_out, pos_out, count, true); + else if (is_dma_buf_file(file_out) && file_in->f_op->read_iter) + return dma_buf_rw_file(file_out->private_data, pos_out, + file_in, pos_in, count, false); + else + return -EINVAL; +} + static const struct file_operations dma_buf_fops =3D { + .fop_flags =3D FOP_MEMORY_FILE, .release =3D dma_buf_file_release, .mmap =3D dma_buf_mmap_internal, .llseek =3D dma_buf_llseek, @@ -531,6 +562,7 @@ static const struct file_operations dma_buf_fops =3D { .unlocked_ioctl =3D dma_buf_ioctl, .compat_ioctl =3D compat_ptr_ioctl, .show_fdinfo =3D dma_buf_show_fdinfo, + .copy_file_range =3D dma_buf_copy_file_range, }; =20 /* diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 36216d28d8bd..d3636e985399 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -22,6 +22,7 @@ #include #include #include +#include =20 struct device; struct dma_buf; @@ -285,6 +286,21 @@ struct dma_buf_ops { =20 int (*vmap)(struct dma_buf *dmabuf, struct iosys_map *map); void (*vunmap)(struct dma_buf *dmabuf, struct iosys_map *map); + + /** + * @rw_file: + * + * If an Exporter needs to support Direct I/O file operations, it can + * implement this optional callback. The exporter must verify that no + * other objects hold the sg_table, ensure exclusive access to the + * dmabuf's sg_table, and only then proceed with the I/O operation. + * + * Returns: + * + * 0 on success or a negative error code on failure. + */ + ssize_t (*rw_file)(struct dma_buf *dmabuf, loff_t my_pos, + struct file *file, loff_t pos, size_t count, bool is_write); }; =20 /** --=20 2.17.1 From nobody Mon Feb 9 01:23:45 2026 Received: from mta20.hihonor.com (mta20.honor.com [81.70.206.69]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id DAC0727FD5D; Tue, 3 Jun 2025 09:53:59 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=81.70.206.69 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1748944442; cv=none; b=PmzZ7JKV0FZDq9htPasnx6MftpgN35KuuetExF5T9V+CP9yFIFsVPftfE0LdTzEjYdttTkue3hx4z+Hlen0sbwY6sKPH+f66MNUWbh4N4UOw+XyTM1uC4IQa85h/rnRIg25csx1fs0/loCpo8OH4hB5KwUGassZYe23d5yHT6K4= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1748944442; c=relaxed/simple; bh=Y++8qSubKRI1uL5yLvAVlryoFONB0hDV8WuLRPeWXZY=; h=From:To:CC:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=DgwmY83nFo9AM4PSGAOj8SdHc0GlHHwY1cxTu+iTt6JOBGdueNFY70KsDoNYvb+Qxe6o2cQG+I/qhhgmxVkJyhxQHr03DVzgwodUuAqBC+r/Ibd4VAlPFAfnevKdt4fTdr2kuCXHW30veov8owqzvPkcG4ZzZpanFlxHRaX78Fg= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=honor.com; spf=pass smtp.mailfrom=honor.com; arc=none smtp.client-ip=81.70.206.69 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=honor.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=honor.com Received: from w012.hihonor.com (unknown [10.68.27.189]) by mta20.hihonor.com (SkyGuard) with ESMTPS id 4bBQw571wWzYlP63; Tue, 3 Jun 2025 17:51:33 +0800 (CST) Received: from a010.hihonor.com (10.68.16.52) by w012.hihonor.com (10.68.27.189) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.2.1544.11; Tue, 3 Jun 2025 17:53:50 +0800 Received: from localhost.localdomain (10.144.18.117) by a010.hihonor.com (10.68.16.52) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.2.1544.11; Tue, 3 Jun 2025 17:53:50 +0800 From: wangtao To: , , , , , , , , CC: , , , , , , , , , , , , , , , , wangtao Subject: [PATCH v4 3/4] udmabuf: Implement udmabuf direct I/O Date: Tue, 3 Jun 2025 17:52:44 +0800 Message-ID: <20250603095245.17478-4-tao.wangtao@honor.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20250603095245.17478-1-tao.wangtao@honor.com> References: <20250603095245.17478-1-tao.wangtao@honor.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-ClientProxiedBy: w002.hihonor.com (10.68.28.120) To a010.hihonor.com (10.68.16.52) Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Construct bio_vec from folios, then call the other file's r/w callbacks for IO operations. Test data shows direct I/O copy_file_range improves performance by over 50% vs direct I/O mmap&read (2557 vs 1534). Test data: | 32x32MB Read 1024MB |Creat-ms|Close-ms| I/O-ms|I/O-MB/s| I/O% |-------------------------|--------|--------|--------|--------|----- | 1)Beg udmabuf buffer R/W| 580 | 323 | 1238 | 867 | 100% | 2) dmabuf buffer R/W| 48 | 5 | 1149 | 934 | 107% | 3) udma+memfd buffer R/W| 597 | 340 | 2157 | 497 | 57% | 4) udma+memfd direct R/W| 573 | 340 | 700 | 1534 | 176% | 5) u+mfd buffer sendfile| 577 | 340 | 1204 | 891 | 102% | 6) u+mfd direct sendfile| 567 | 339 | 2272 | 472 | 54% | 7) u+mfd buffer splice| 570 | 337 | 1114 | 964 | 111% | 8) u+mfd direct splice| 564 | 335 | 793 | 1355 | 156% | 9) udmabuf buffer c_f_r| 577 | 323 | 1059 | 1014 | 116% |10) udmabuf direct c_f_r| 582 | 325 | 420 | 2557 | 294% |11)End udmabuf buffer R/W| 586 | 323 | 1188 | 903 | 104% Signed-off-by: wangtao --- drivers/dma-buf/udmabuf.c | 54 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c index e74e36a8ecda..511567b15340 100644 --- a/drivers/dma-buf/udmabuf.c +++ b/drivers/dma-buf/udmabuf.c @@ -15,6 +15,8 @@ #include #include #include +#include +#include =20 static int list_limit =3D 1024; module_param(list_limit, int, 0644); @@ -284,6 +286,55 @@ static int end_cpu_udmabuf(struct dma_buf *buf, return 0; } =20 +static ssize_t udmabuf_rw_file(struct dma_buf *dmabuf, loff_t my_pos, + struct file *other, loff_t pos, + size_t count, bool is_write) +{ + struct udmabuf *ubuf =3D dmabuf->priv; + loff_t my_end =3D my_pos + count, bv_beg, bv_end =3D 0; + size_t i, bv_off, bv_len, bv_idx =3D 0; + struct bio_vec *bvec; + struct kiocb kiocb; + struct iov_iter iter; + unsigned int direction =3D is_write ? ITER_SOURCE : ITER_DEST; + ssize_t ret =3D 0; + struct folio *folio; + + bvec =3D kvcalloc(ubuf->nr_pinned, sizeof(*bvec), GFP_KERNEL); + if (!bvec) + return -ENOMEM; + + init_sync_kiocb(&kiocb, other); + kiocb.ki_pos =3D pos; + + for (i =3D 0; i < ubuf->nr_pinned; i++) { + folio =3D ubuf->pinned_folios[i]; + bv_beg =3D bv_end; + if (bv_beg >=3D my_end) + break; + bv_end +=3D folio_size(folio); + if (bv_end <=3D my_pos) + continue; + + bv_len =3D min(bv_end, my_end) - max(my_pos, bv_beg); + bv_off =3D my_pos > bv_beg ? my_pos - bv_beg : 0; + bvec_set_page(&bvec[bv_idx], &folio->page, bv_len, bv_off); + ++bv_idx; + } + + if (bv_idx > 0) { + /* start R/W. */ + iov_iter_bvec(&iter, direction, bvec, bv_idx, count); + if (is_write) + ret =3D other->f_op->write_iter(&kiocb, &iter); + else + ret =3D other->f_op->read_iter(&kiocb, &iter); + } + kvfree(bvec); + + return ret; +} + static const struct dma_buf_ops udmabuf_ops =3D { .cache_sgt_mapping =3D true, .map_dma_buf =3D map_udmabuf, @@ -294,6 +345,7 @@ static const struct dma_buf_ops udmabuf_ops =3D { .vunmap =3D vunmap_udmabuf, .begin_cpu_access =3D begin_cpu_udmabuf, .end_cpu_access =3D end_cpu_udmabuf, + .rw_file =3D udmabuf_rw_file, }; =20 #define SEALS_WANTED (F_SEAL_SHRINK) @@ -455,6 +507,8 @@ static long udmabuf_create(struct miscdevice *device, ret =3D PTR_ERR(dmabuf); goto err; } + /* Support direct I/O */ + dmabuf->file->f_mode |=3D FMODE_CAN_ODIRECT; /* * Ownership of ubuf is held by the dmabuf from here. * If the following dma_buf_fd() fails, dma_buf_put() cleans up both the --=20 2.17.1 From nobody Mon Feb 9 01:23:45 2026 Received: from mta22.hihonor.com (mta22.hihonor.com [81.70.192.198]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 5DB7F2820CE; Tue, 3 Jun 2025 09:53:54 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=81.70.192.198 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1748944436; cv=none; b=Mh4x6Lf8BagKM4omvJChBrk8fwkkemQC+GIcu/BVFKo1ECtDa8jypyAgBMYmWw9cZLUz43D0I5VhV2Kdey5JXma3U89ANWWpOKH79LE2FxWSCzlEZGHIem5bzacfZQwBHYzz9G2H4bTFXSCjEQZjrHBUW1UN2HIg1GQOc+U2eKY= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1748944436; c=relaxed/simple; bh=dbx+BVRt7GsCl5PL/AnfI2ytrlqI6ooWiB7QuFeZT5M=; h=From:To:CC:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=DE2XwJE6Mmf7Me2l92bi0EaFhN7znhKKPaRrHaXG2c1ABXDP8w5IQ7r2eNGFRwA2zWvZjQkcaODpdRd8LJABJIjljacqA9VR+hH6pDT2YOuwH8s+yR2OQ07i3ww9SMXQox4uLakNs6tX+U9xDkZ7LOMd4SLIxh5LOL+SQ4uc2oE= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=honor.com; spf=pass smtp.mailfrom=honor.com; arc=none smtp.client-ip=81.70.192.198 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=honor.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=honor.com Received: from w011.hihonor.com (unknown [10.68.20.122]) by mta22.hihonor.com (SkyGuard) with ESMTPS id 4bBQwW0q9hzYl8XY; Tue, 3 Jun 2025 17:51:55 +0800 (CST) Received: from a010.hihonor.com (10.68.16.52) by w011.hihonor.com (10.68.20.122) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.2.1544.11; Tue, 3 Jun 2025 17:53:51 +0800 Received: from localhost.localdomain (10.144.18.117) by a010.hihonor.com (10.68.16.52) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.2.1544.11; Tue, 3 Jun 2025 17:53:50 +0800 From: wangtao To: , , , , , , , , CC: , , , , , , , , , , , , , , , , wangtao Subject: [PATCH v4 4/4] dmabuf:system_heap Implement system_heap dmabuf direct I/O Date: Tue, 3 Jun 2025 17:52:45 +0800 Message-ID: <20250603095245.17478-5-tao.wangtao@honor.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20250603095245.17478-1-tao.wangtao@honor.com> References: <20250603095245.17478-1-tao.wangtao@honor.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-ClientProxiedBy: w002.hihonor.com (10.68.28.120) To a010.hihonor.com (10.68.16.52) Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" First verify system_heap exporter has exclusive dmabuf access. Build bio_vec from sgtable, then invoke target file's r/w callbacks for IO. Outperforms buffer IO mmap/read by 250%, beats direct I/O udmabuf copy_file_range by over 30% with initialization time significantly lower than udmabuf. Test data: | 32x32MB Read 1024MB |Creat-ms|Close-ms| I/O-ms|I/O-MB/s| I/O% |-------------------------|--------|--------|--------|--------|----- | 1)Beg dmabuf buffer R/W| 47 | 5 | 1125 | 954 | 100% | 2) udmabuf buffer R/W| 576 | 323 | 1228 | 874 | 91% | 3) udma+memfd buffer R/W| 596 | 340 | 2166 | 495 | 51% | 4) udma+memfd direct R/W| 570 | 338 | 711 | 1510 | 158% | 5) udmabuf buffer c_f_r| 578 | 329 | 1128 | 952 | 99% | 6) udmabuf direct c_f_r| 570 | 324 | 405 | 2651 | 277% | 7) dmabuf buffer c_f_r| 47 | 5 | 1035 | 1037 | 108% | 8) dmabuf direct c_f_r| 51 | 5 | 309 | 3480 | 364% | 9)End dmabuf buffer R/W| 48 | 5 | 1153 | 931 | 97% | 32x32MB Write 1024MB |Creat-ms|Close-ms| I/O-ms|I/O-MB/s| I/O% |-------------------------|--------|--------|--------|--------|----- | 1)Beg dmabuf buffer R/W| 50 | 5 | 1405 | 764 | 100% | 2) udmabuf buffer R/W| 580 | 341 | 1337 | 803 | 105% | 3) udma+memfd buffer R/W| 588 | 331 | 1820 | 590 | 77% | 4) udma+memfd direct R/W| 585 | 333 | 662 | 1622 | 212% | 5) udmabuf buffer c_f_r| 577 | 329 | 1326 | 810 | 106% | 6) udmabuf direct c_f_r| 580 | 330 | 602 | 1784 | 233% | 7) dmabuf buffer c_f_r| 49 | 5 | 1330 | 807 | 105% | 8) dmabuf direct c_f_r| 49 | 5 | 344 | 3127 | 409% | 9)End dmabuf buffer R/W| 50 | 5 | 1442 | 745 | 97% Signed-off-by: wangtao --- drivers/dma-buf/heaps/system_heap.c | 69 +++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/drivers/dma-buf/heaps/system_heap.c b/drivers/dma-buf/heaps/sy= stem_heap.c index 26d5dc89ea16..85ffff7ef855 100644 --- a/drivers/dma-buf/heaps/system_heap.c +++ b/drivers/dma-buf/heaps/system_heap.c @@ -20,6 +20,8 @@ #include #include #include +#include +#include =20 static struct dma_heap *sys_heap; =20 @@ -281,6 +283,70 @@ static void system_heap_vunmap(struct dma_buf *dmabuf,= struct iosys_map *map) iosys_map_clear(map); } =20 +static ssize_t system_heap_buffer_rw_other(struct system_heap_buffer *buff= er, + loff_t my_pos, struct file *other, loff_t pos, + size_t count, bool is_write) +{ + struct sg_table *sgt =3D &buffer->sg_table; + struct scatterlist *sg; + loff_t my_end =3D my_pos + count, bv_beg, bv_end =3D 0; + size_t i, bv_off, bv_len, bv_idx =3D 0; + struct bio_vec *bvec; + struct kiocb kiocb; + struct iov_iter iter; + unsigned int direction =3D is_write ? ITER_SOURCE : ITER_DEST; + ssize_t ret =3D 0; + + bvec =3D kvcalloc(sgt->orig_nents, sizeof(*bvec), GFP_KERNEL); + if (!bvec) + return -ENOMEM; + + init_sync_kiocb(&kiocb, other); + kiocb.ki_pos =3D pos; + + for_each_sgtable_sg(sgt, sg, i) { + bv_beg =3D bv_end; + if (bv_beg >=3D my_end) + break; + bv_end +=3D sg->offset + sg->length; + if (bv_end <=3D my_pos) + continue; + + bv_len =3D min(bv_end, my_end) - max(my_pos, bv_beg); + bv_off =3D sg->offset + (my_pos > bv_beg ? my_pos - bv_beg : 0); + bvec_set_page(&bvec[bv_idx], sg_page(sg), bv_len, bv_off); + ++bv_idx; + } + + if (bv_idx > 0) { + /* start R/W. */ + iov_iter_bvec(&iter, direction, bvec, bv_idx, count); + if (is_write) + ret =3D other->f_op->write_iter(&kiocb, &iter); + else + ret =3D other->f_op->read_iter(&kiocb, &iter); + } + kvfree(bvec); + + return ret; +} + +static ssize_t system_heap_dma_buf_rw_file(struct dma_buf *dmabuf, + loff_t my_pos, struct file *file, loff_t pos, + size_t count, bool is_write) +{ + struct system_heap_buffer *buffer =3D dmabuf->priv; + ssize_t ret =3D -EBUSY; + + mutex_lock(&buffer->lock); + if (list_empty(&buffer->attachments) && !buffer->vmap_cnt) + ret =3D system_heap_buffer_rw_other(buffer, my_pos, + file, pos, count, is_write); + mutex_unlock(&buffer->lock); + + return ret; +} + static void system_heap_dma_buf_release(struct dma_buf *dmabuf) { struct system_heap_buffer *buffer =3D dmabuf->priv; @@ -308,6 +374,7 @@ static const struct dma_buf_ops system_heap_buf_ops =3D= { .mmap =3D system_heap_mmap, .vmap =3D system_heap_vmap, .vunmap =3D system_heap_vunmap, + .rw_file =3D system_heap_dma_buf_rw_file, .release =3D system_heap_dma_buf_release, }; =20 @@ -400,6 +467,8 @@ static struct dma_buf *system_heap_allocate(struct dma_= heap *heap, ret =3D PTR_ERR(dmabuf); goto free_pages; } + /* Support direct I/O */ + dmabuf->file->f_mode |=3D FMODE_CAN_ODIRECT; return dmabuf; =20 free_pages: --=20 2.17.1