From Adrien Delorme
> From: David Ahern
> Sent: 12 April 2023 7:39
> > Sent: 11 April 2023 16:28
> ....
> > Christoph's patch set a few years back that removed set_fs broke the
> > ability to do in-kernel ioctl and {s,g}setsockopt calls. I did not
> > follow that change; was it a deliberate intent to not allow these
> > in-kernel calls vs wanting to remove the set_fs? e.g., can we add a
> > kioctl variant for in-kernel use of the APIs?
>
> I think that was a side effect, and with no in-tree in-kernel
> users (apart from limited calls in bpf) it was deemed acceptable.
> (It is a PITA for any code trying to use SCTP in kernel.)
>
> One problem is that not all sockopt calls pass the correct length.
> And some of them can have very long buffers.
> Not to mention the ones that are read-modify-write.
>
> A plausible solution is to pass a 'fat pointer' that contains
> some, or all, of:
> - A userspace buffer pointer.
> - A kernel buffer pointer.
> - The length supplied by the user.
> - The length of the kernel buffer.
> = The number of bytes to copy on completion.
> For simple user requests the syscall entry/exit code
> would copy the data to a short on-stack buffer.
> Kernel users just pass the kernel address.
> Odd requests can just use the user pointer.
>
> Probably needs accessors that add in an offset.
>
> It might also be that some of the problematic sockopt
> were in decnet - now removed.
Hello everyone,
I'm currently working on an implementation of {get,set} sockopt.
Since this thread is already talking about it, I hope that I replying at the correct place.
My implementation is rather simple using a struct that will be used to pass the necessary info throught sqe->cmd.
Here is my implementation based of kernel version 6.3 :
Signed-off-by: Adrien Delorme <delorme.ade@outlook.com>
diff -uprN a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
--- a/include/uapi/linux/io_uring.h 2023-04-23 15:02:52.000000000 -0400
+++ b/include/uapi/linux/io_uring.h 2023-04-24 07:55:21.406981696 -0400
@@ -235,6 +235,25 @@ enum io_uring_op {
*/
#define IORING_URING_CMD_FIXED (1U << 0)
+/* struct io_uring_cmd->cmd_op flags for socket operations */
+#define IO_URING_CMD_OP_GETSOCKOPT 0x0
+#define IO_URING_CMD_OP_SETSOCKOPT 0x1
+
+/* Struct to pass args for IO_URING_CMD_OP_GETSOCKOPT and IO_URING_CMD_OP_SETSOCKOPT operations */
+struct args_setsockopt_uring{
+ int level;
+ int optname;
+ char __user * user_optval;
+ int optlen;
+};
+
+struct args_getsockopt_uring{
+ int level;
+ int optname;
+ char __user * user_optval;
+ int __user * optlen;
+};
+
/*
* sqe->fsync_flags
diff -uprN a/net/socket.c b/net/socket.c
--- a/net/socket.c 2023-04-23 15:02:52.000000000 -0400
+++ b/net/socket.c 2023-04-24 08:06:44.800981696 -0400
@@ -108,6 +108,11 @@
#include <linux/ptp_clock_kernel.h>
#include <trace/events/sock.h>
+#ifdef CONFIG_IO_URING
+#include <uapi/linux/io_uring.h>
+#include <linux/io_uring.h>
+#endif
+
#ifdef CONFIG_NET_RX_BUSY_POLL
unsigned int sysctl_net_busy_read __read_mostly;
unsigned int sysctl_net_busy_poll __read_mostly;
@@ -132,6 +137,11 @@ static ssize_t sock_splice_read(struct f
struct pipe_inode_info *pipe, size_t len,
unsigned int flags);
+
+#ifdef CONFIG_IO_URING
+int socket_uring_cmd_handler(struct io_uring_cmd *cmd, unsigned int flags);
+#endif
+
#ifdef CONFIG_PROC_FS
static void sock_show_fdinfo(struct seq_file *m, struct file *f)
{
@@ -166,6 +176,9 @@ static const struct file_operations sock
.splice_write = generic_splice_sendpage,
.splice_read = sock_splice_read,
.show_fdinfo = sock_show_fdinfo,
+#ifdef CONFIG_IO_URING
+ .uring_cmd = socket_uring_cmd_handler,
+#endif
};
static const char * const pf_family_names[] = {
@@ -2330,6 +2343,126 @@ SYSCALL_DEFINE5(getsockopt, int, fd, int
return __sys_getsockopt(fd, level, optname, optval, optlen);
}
+#ifdef CONFIG_IO_URING
+
+/*
+ * Make getsockopt operation with io_uring.
+ * This fonction is based of the __sys_getsockopt without sockfd_lookup_light
+ * since io_uring retrieves it for us.
+ */
+int uring_cmd_getsockopt(struct socket *sock, int level, int optname, char __user *optval,
+ int __user *optlen)
+{
+ int err;
+ int max_optlen;
+
+ err = security_socket_getsockopt(sock, level, optname);
+ if (err)
+ goto out_put;
+
+ if (!in_compat_syscall())
+ max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
+
+ if (level == SOL_SOCKET)
+ err = sock_getsockopt(sock, level, optname, optval, optlen);
+ else if (unlikely(!sock->ops->getsockopt))
+ err = -EOPNOTSUPP;
+ else
+ err = sock->ops->getsockopt(sock, level, optname, optval,
+ optlen);
+
+ if (!in_compat_syscall())
+ err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
+ optval, optlen, max_optlen,
+ err);
+out_put:
+ return err;
+}
+
+/*
+ * Make setsockopt operation with io_uring.
+ * This fonction is based of the __sys_setsockopt without sockfd_lookup_light
+ * since io_uring retrieves it for us.
+ */
+int uring_cmd_setsockopt(struct socket *sock, int level, int optname, char *user_optval,
+ int optlen)
+{
+ sockptr_t optval = USER_SOCKPTR(user_optval);
+ char *kernel_optval = NULL;
+ int err;
+
+ if (optlen < 0)
+ return -EINVAL;
+
+ err = security_socket_setsockopt(sock, level, optname);
+ if (err)
+ goto out_put;
+
+ if (!in_compat_syscall())
+ err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level, &optname,
+ user_optval, &optlen,
+ &kernel_optval);
+ if (err < 0)
+ goto out_put;
+ if (err > 0) {
+ err = 0;
+ goto out_put;
+ }
+
+ if (kernel_optval)
+ optval = KERNEL_SOCKPTR(kernel_optval);
+ if (level == SOL_SOCKET && !sock_use_custom_sol_socket(sock))
+ err = sock_setsockopt(sock, level, optname, optval, optlen);
+ else if (unlikely(!sock->ops->setsockopt))
+ err = -EOPNOTSUPP;
+ else
+ err = sock->ops->setsockopt(sock, level, optname, optval,
+ optlen);
+ kfree(kernel_optval);
+out_put:
+ return err;
+}
+
+/*
+ * Handler uring_cmd socket file_operations.
+ *
+ * Operation code and struct are defined in /include/uapi/linux/io_uring.h
+ * The io_uring ring needs to be set with the flags : IORING_SETUP_SQE128 and IORING_SETUP_CQE32
+ *
+ */
+int socket_uring_cmd_handler(struct io_uring_cmd *cmd, unsigned int flags){
+
+ /* Retrieve socket */
+ struct socket *sock = sock_from_file(cmd->file);
+
+ if (!sock)
+ return -EINVAL;
+
+ /* Does the requested operation */
+ switch (cmd->cmd_op) {
+ case IO_URING_CMD_OP_GETSOCKOPT:
+ struct args_getsockopt_uring *values_get = (struct args_getsockopt_uring *) cmd->cmd;
+ return uring_cmd_getsockopt(sock,
+ values_get->level,
+ values_get->optname,
+ values_get->user_optval,
+ values_get->optlen);
+
+ case IO_URING_CMD_OP_SETSOCKOPT:
+ struct args_setsockopt_uring *values_set = (struct args_setsockopt_uring *) cmd->cmd;
+ return uring_cmd_setsockopt(sock,
+ values_set->level,
+ values_set->optname,
+ values_set->user_optval,
+ values_set->optlen);
+ default:
+ break;
+
+ }
+ return -EINVAL;
+}
+#endif
+
/*
* Shutdown a socket.
*/
I would appreciate any feedback or advice you may have on this work. Hopefully it will be of some kind of help. Thank you for your time and consideration.
Adrien
On 5/2/23 10:21, Adrien Delorme wrote: > From Adrien Delorme > >> From: David Ahern >> Sent: 12 April 2023 7:39 >>> Sent: 11 April 2023 16:28 >> .... >>> Christoph's patch set a few years back that removed set_fs broke the >>> ability to do in-kernel ioctl and {s,g}setsockopt calls. I did not >>> follow that change; was it a deliberate intent to not allow these >>> in-kernel calls vs wanting to remove the set_fs? e.g., can we add a >>> kioctl variant for in-kernel use of the APIs? >> >> I think that was a side effect, and with no in-tree in-kernel >> users (apart from limited calls in bpf) it was deemed acceptable. >> (It is a PITA for any code trying to use SCTP in kernel.) >> >> One problem is that not all sockopt calls pass the correct length. >> And some of them can have very long buffers. >> Not to mention the ones that are read-modify-write. >> >> A plausible solution is to pass a 'fat pointer' that contains >> some, or all, of: >> - A userspace buffer pointer. >> - A kernel buffer pointer. >> - The length supplied by the user. >> - The length of the kernel buffer. >> = The number of bytes to copy on completion. >> For simple user requests the syscall entry/exit code >> would copy the data to a short on-stack buffer. >> Kernel users just pass the kernel address. >> Odd requests can just use the user pointer. >> >> Probably needs accessors that add in an offset. >> >> It might also be that some of the problematic sockopt >> were in decnet - now removed. > > Hello everyone, > > I'm currently working on an implementation of {get,set} sockopt. > Since this thread is already talking about it, I hope that I replying at the correct place. Hi Adrien, I believe Breno is working on set/getsockopt as well and had similar patches for awhile, but that would need for some problems to be solved first, e.g. try and decide whether it copies to a ptr as the syscall versions or would get/return optval directly in sqe/cqe. And also where to store bits that you pass in struct args_setsockopt_uring, and whether to rely on SQE128 or not. > My implementation is rather simple using a struct that will be used to pass the necessary info throught sqe->cmd. > > Here is my implementation based of kernel version 6.3 : > > Signed-off-by: Adrien Delorme <delorme.ade@outlook.com> > > diff -uprN a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h > --- a/include/uapi/linux/io_uring.h 2023-04-23 15:02:52.000000000 -0400 > +++ b/include/uapi/linux/io_uring.h 2023-04-24 07:55:21.406981696 -0400 > @@ -235,6 +235,25 @@ enum io_uring_op { > */ > #define IORING_URING_CMD_FIXED (1U << 0) > > +/* struct io_uring_cmd->cmd_op flags for socket operations */ > +#define IO_URING_CMD_OP_GETSOCKOPT 0x0 > +#define IO_URING_CMD_OP_SETSOCKOPT 0x1 > + > +/* Struct to pass args for IO_URING_CMD_OP_GETSOCKOPT and IO_URING_CMD_OP_SETSOCKOPT operations */ > +struct args_setsockopt_uring{ The name of the structure is quite inconsistent with the rest. It's better to be io_[uring_]_sockopt_arg or some variation. > + int level; > + int optname; > + char __user * user_optval; > + int optlen; That's uapi, there should not be __user, and field sizes should be more portable, i.e. use __u32, __u64, etc, look through the file. Would need to look into the get/setsockopt implementation before saying anything about uring_cmd_{set,get}sockopt. > +}; > + > +struct args_getsockopt_uring{ > + int level; > + int optname; > + char __user * user_optval; > + int __user * optlen; > +}; > + > > /* > * sqe->fsync_flags > diff -uprN a/net/socket.c b/net/socket.c > --- a/net/socket.c 2023-04-23 15:02:52.000000000 -0400 > +++ b/net/socket.c 2023-04-24 08:06:44.800981696 -0400 > @@ -108,6 +108,11 @@ > #include <linux/ptp_clock_kernel.h> > #include <trace/events/sock.h> > > +#ifdef CONFIG_IO_URING > +#include <uapi/linux/io_uring.h> > +#include <linux/io_uring.h> > +#endif > + > #ifdef CONFIG_NET_RX_BUSY_POLL > unsigned int sysctl_net_busy_read __read_mostly; > unsigned int sysctl_net_busy_poll __read_mostly; > @@ -132,6 +137,11 @@ static ssize_t sock_splice_read(struct f > struct pipe_inode_info *pipe, size_t len, > unsigned int flags); > > + > +#ifdef CONFIG_IO_URING > +int socket_uring_cmd_handler(struct io_uring_cmd *cmd, unsigned int flags); > +#endif > + > #ifdef CONFIG_PROC_FS > static void sock_show_fdinfo(struct seq_file *m, struct file *f) > { > @@ -166,6 +176,9 @@ static const struct file_operations sock > .splice_write = generic_splice_sendpage, > .splice_read = sock_splice_read, > .show_fdinfo = sock_show_fdinfo, > +#ifdef CONFIG_IO_URING > + .uring_cmd = socket_uring_cmd_handler, > +#endif > }; > > static const char * const pf_family_names[] = { > @@ -2330,6 +2343,126 @@ SYSCALL_DEFINE5(getsockopt, int, fd, int > return __sys_getsockopt(fd, level, optname, optval, optlen); > } > > +#ifdef CONFIG_IO_URING > + > +/* > + * Make getsockopt operation with io_uring. > + * This fonction is based of the __sys_getsockopt without sockfd_lookup_light > + * since io_uring retrieves it for us. > + */ > +int uring_cmd_getsockopt(struct socket *sock, int level, int optname, char __user *optval, > + int __user *optlen) > +{ > + int err; > + int max_optlen; > + > + err = security_socket_getsockopt(sock, level, optname); > + if (err) > + goto out_put; > + > + if (!in_compat_syscall()) > + max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen); > + > + if (level == SOL_SOCKET) > + err = sock_getsockopt(sock, level, optname, optval, optlen); > + else if (unlikely(!sock->ops->getsockopt)) > + err = -EOPNOTSUPP; > + else > + err = sock->ops->getsockopt(sock, level, optname, optval, > + optlen); > + > + if (!in_compat_syscall()) > + err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname, > + optval, optlen, max_optlen, > + err); > +out_put: > + return err; > +} > + > +/* > + * Make setsockopt operation with io_uring. > + * This fonction is based of the __sys_setsockopt without sockfd_lookup_light > + * since io_uring retrieves it for us. > + */ > +int uring_cmd_setsockopt(struct socket *sock, int level, int optname, char *user_optval, > + int optlen) > +{ > + sockptr_t optval = USER_SOCKPTR(user_optval); > + char *kernel_optval = NULL; > + int err; > + > + if (optlen < 0) > + return -EINVAL; > + > + err = security_socket_setsockopt(sock, level, optname); > + if (err) > + goto out_put; > + > + if (!in_compat_syscall()) > + err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level, &optname, > + user_optval, &optlen, > + &kernel_optval); > + if (err < 0) > + goto out_put; > + if (err > 0) { > + err = 0; > + goto out_put; > + } > + > + if (kernel_optval) > + optval = KERNEL_SOCKPTR(kernel_optval); > + if (level == SOL_SOCKET && !sock_use_custom_sol_socket(sock)) > + err = sock_setsockopt(sock, level, optname, optval, optlen); > + else if (unlikely(!sock->ops->setsockopt)) > + err = -EOPNOTSUPP; > + else > + err = sock->ops->setsockopt(sock, level, optname, optval, > + optlen); > + kfree(kernel_optval); > +out_put: > + return err; > +} > + > +/* > + * Handler uring_cmd socket file_operations. > + * > + * Operation code and struct are defined in /include/uapi/linux/io_uring.h > + * The io_uring ring needs to be set with the flags : IORING_SETUP_SQE128 and IORING_SETUP_CQE32 > + * > + */ > +int socket_uring_cmd_handler(struct io_uring_cmd *cmd, unsigned int flags){ > + > + /* Retrieve socket */ > + struct socket *sock = sock_from_file(cmd->file); > + > + if (!sock) > + return -EINVAL; > + > + /* Does the requested operation */ > + switch (cmd->cmd_op) { > + case IO_URING_CMD_OP_GETSOCKOPT: > + struct args_getsockopt_uring *values_get = (struct args_getsockopt_uring *) cmd->cmd; > + return uring_cmd_getsockopt(sock, > + values_get->level, > + values_get->optname, > + values_get->user_optval, > + values_get->optlen); > + > + case IO_URING_CMD_OP_SETSOCKOPT: > + struct args_setsockopt_uring *values_set = (struct args_setsockopt_uring *) cmd->cmd; > + return uring_cmd_setsockopt(sock, > + values_set->level, > + values_set->optname, > + values_set->user_optval, > + values_set->optlen); > + default: > + break; > + > + } > + return -EINVAL; > +} > +#endif > + > /* > * Shutdown a socket. > */ > > I would appreciate any feedback or advice you may have on this work. Hopefully it will be of some kind of help. Thank you for your time and consideration. > > Adrien -- Pavel Begunkov
From Adrien Delorme > From : Pavel Begunkov > Sent : 2 May 2023 15:04 > On 5/2/23 10:21, Adrien Delorme wrote: > > From Adrien Delorme > > > >> From: David Ahern > >> Sent: 12 April 2023 7:39 > >>> Sent: 11 April 2023 16:28 > >> .... > >> One problem is that not all sockopt calls pass the correct length. > >> And some of them can have very long buffers. > >> Not to mention the ones that are read-modify-write. > >> > >> A plausible solution is to pass a 'fat pointer' that contains some, > >> or all, of: > >> - A userspace buffer pointer. > >> - A kernel buffer pointer. > >> - The length supplied by the user. > >> - The length of the kernel buffer. > >> = The number of bytes to copy on completion. > >> For simple user requests the syscall entry/exit code would copy the > >> data to a short on-stack buffer. > >> Kernel users just pass the kernel address. > >> Odd requests can just use the user pointer. > >> > >> Probably needs accessors that add in an offset. > >> > >> It might also be that some of the problematic sockopt were in decnet > >> - now removed. > > > > Hello everyone, > > > > I'm currently working on an implementation of {get,set} sockopt. > > Since this thread is already talking about it, I hope that I replying at the > correct place. > > Hi Adrien, I believe Breno is working on set/getsockopt as well and had > similar patches for awhile, but that would need for some problems to be > solved first, e.g. try and decide whether it copies to a ptr as the syscall > versions or would get/return optval directly in sqe/cqe. And also where to > store bits that you pass in struct args_setsockopt_uring, and whether to rely > on SQE128 or not. > Hello Pavel, That is good to hear. If possible I would like to provide some help. I looked at the getsockopt implementation. From what I'm seeing, I believe that it would be easier to copies to a ptr as the syscall. The length of the output is usually 4 bytes (sometimes less) but in a lot of cases, this length is variable. Sometime it can even be bigger that the SQE128 ring. Here is a non-exhaustive list of those cases : /net/ipv4/tcp.c : int do_tcp_getsockopt(...) - TCP_INFO : up to 240 bytes - TCP_CC_INFO and TCP_REPAIR_WINDOW : up to 20 bytes - TCP_CONGESTION and TCP_ULP : up to 16 bytes - TCP_ZEROCPOY_RECEIVE : up to 64 bytes /net/atm/commun.c : int vcc_getsockopt(...) - SO_ATMQOS : up to 88 bytes - SO_ATMPVC : up to 16 bytes /net/ipv4/io_sockglue.c : int do_ip_getsockopt(...) - MCAST_MSFILTER : up to 144 bytes - IP_MSFILTER : 16 bytes minimum I will look into setsockopt but I believe it might be the same. If needed I can also complete this list. However there are some cases where it is hard to determinate a maximum amount of bytes in advance. As to where the bytes should be stored I was thinking of either : - As a pointer in sqe->addr so the SQE128 is not needed - In sqe->cmd as a struct but from my understanding, the SQE128 is needed > > > My implementation is rather simple using a struct that will be used to pass > the necessary info throught sqe->cmd. > > > > Here is my implementation based of kernel version 6.3 : > > ... > > +/* Struct to pass args for IO_URING_CMD_OP_GETSOCKOPT and > > +IO_URING_CMD_OP_SETSOCKOPT operations */ struct > > +args_setsockopt_uring{ > > The name of the structure is quite inconsistent with the rest. It's better to be > io_[uring_]_sockopt_arg or some variation. > > > + int level; > > + int optname; > > + char __user * user_optval; > > + int optlen; > > That's uapi, there should not be __user, and field sizes should be more > portable, i.e. use __u32, __u64, etc, look through the file. > > Would need to look into the get/setsockopt implementation before saying > anything about uring_cmd_{set,get}sockopt. > ... > Pavel Begunkov Thank you for the review. Adrien Delorme --
From: Adrien Delorme > Sent: 03 May 2023 14:11 > > From Adrien Delorme > > From : Pavel Begunkov > > Sent : 2 May 2023 15:04 > > On 5/2/23 10:21, Adrien Delorme wrote: > > > From Adrien Delorme > > > > > >> From: David Ahern > > >> Sent: 12 April 2023 7:39 > > >>> Sent: 11 April 2023 16:28 > > >> .... > > >> One problem is that not all sockopt calls pass the correct length. > > >> And some of them can have very long buffers. > > >> Not to mention the ones that are read-modify-write. > > >> > > >> A plausible solution is to pass a 'fat pointer' that contains some, > > >> or all, of: > > >> - A userspace buffer pointer. > > >> - A kernel buffer pointer. > > >> - The length supplied by the user. > > >> - The length of the kernel buffer. > > >> = The number of bytes to copy on completion. > > >> For simple user requests the syscall entry/exit code would copy the > > >> data to a short on-stack buffer. > > >> Kernel users just pass the kernel address. > > >> Odd requests can just use the user pointer. > > >> > > >> Probably needs accessors that add in an offset. > > >> > > >> It might also be that some of the problematic sockopt were in decnet > > >> - now removed. > > > > > > Hello everyone, > > > > > > I'm currently working on an implementation of {get,set} sockopt. > > > Since this thread is already talking about it, I hope that I replying at the > > correct place. > > > > Hi Adrien, I believe Breno is working on set/getsockopt as well and had > > similar patches for awhile, but that would need for some problems to be > > solved first, e.g. try and decide whether it copies to a ptr as the syscall > > versions or would get/return optval directly in sqe/cqe. And also where to > > store bits that you pass in struct args_setsockopt_uring, and whether to rely > > on SQE128 or not. > > > > Hello Pavel, > That is good to hear. If possible I would like to provide some help. > I looked at the getsockopt implementation. From what I'm seeing, I believe that it would be easier to > copies to a ptr as the syscall. > The length of the output is usually 4 bytes (sometimes less) but in a lot of cases, this length is > variable. Sometime it can even be bigger that the SQE128 ring. > > Here is a non-exhaustive list of those cases : > /net/ipv4/tcp.c : int do_tcp_getsockopt(...) > - TCP_INFO : up to 240 bytes > - TCP_CC_INFO and TCP_REPAIR_WINDOW : up to 20 bytes > - TCP_CONGESTION and TCP_ULP : up to 16 bytes > - TCP_ZEROCPOY_RECEIVE : up to 64 bytes > /net/atm/commun.c : int vcc_getsockopt(...) > - SO_ATMQOS : up to 88 bytes > - SO_ATMPVC : up to 16 bytes > /net/ipv4/io_sockglue.c : int do_ip_getsockopt(...) > - MCAST_MSFILTER : up to 144 bytes > - IP_MSFILTER : 16 bytes minimum > > I will look into setsockopt but I believe it might be the same. > If needed I can also complete this list. > However there are some cases where it is hard to determinate a maximum amount of bytes in advance. Also look at SCTP - it has some very long buffers. Almost any code that uses SCTP needs to use the SCTP_STATUS request to get the negotiated number of data streams (that one is relatively short). IIRC there are also getsockopt() that are read/modify/write! There will also be user code that supplies a very long buffer (too long to allocate in kernel) for some variable length requests. So the generic system call code can allocate a short (eg on-stack) buffer for short requests and then pass both the user and kernel addresses (and lengths) through to the protocol functions. Anything that needs a big buffer can directly copy to/from and user buffers, kernel callers would need to pass a big enough buffer. But the code for small buffers would be much simplified for both kernel and user access. David - Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK Registration No: 1397386 (Wales)
© 2016 - 2023 Red Hat, Inc.