Now that we've generalized the infrastructure for user.* xattrs make it
possible to set up to 128 user.* extended attributes on a sockfs inode
or up to 128kib. kernfs (cgroupfs) has the same limits and it has proven
to be quite sufficient for nearly all use-cases.
This will allow containers to label sockets and will e.g., be used by
systemd and Gnome to find various sockets in containers where
high-privilege or more complicated solutions aren't available.
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
net/socket.c | 119 +++++++++++++++++++++++++++++++++++++++++++++--------------
1 file changed, 92 insertions(+), 27 deletions(-)
diff --git a/net/socket.c b/net/socket.c
index 136b98c54fb3..7aa94fce7a8b 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -315,45 +315,70 @@ static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
static struct kmem_cache *sock_inode_cachep __ro_after_init;
+struct sockfs_inode {
+ struct simple_xattrs *xattrs;
+ struct simple_xattr_limits xattr_limits;
+ struct socket_alloc;
+};
+
+static struct sockfs_inode *SOCKFS_I(struct inode *inode)
+{
+ return container_of(inode, struct sockfs_inode, vfs_inode);
+}
+
static struct inode *sock_alloc_inode(struct super_block *sb)
{
- struct socket_alloc *ei;
+ struct sockfs_inode *si;
- ei = alloc_inode_sb(sb, sock_inode_cachep, GFP_KERNEL);
- if (!ei)
+ si = alloc_inode_sb(sb, sock_inode_cachep, GFP_KERNEL);
+ if (!si)
return NULL;
- init_waitqueue_head(&ei->socket.wq.wait);
- ei->socket.wq.fasync_list = NULL;
- ei->socket.wq.flags = 0;
+ si->xattrs = NULL;
+ simple_xattr_limits_init(&si->xattr_limits);
+
+ init_waitqueue_head(&si->socket.wq.wait);
+ si->socket.wq.fasync_list = NULL;
+ si->socket.wq.flags = 0;
+
+ si->socket.state = SS_UNCONNECTED;
+ si->socket.flags = 0;
+ si->socket.ops = NULL;
+ si->socket.sk = NULL;
+ si->socket.file = NULL;
- ei->socket.state = SS_UNCONNECTED;
- ei->socket.flags = 0;
- ei->socket.ops = NULL;
- ei->socket.sk = NULL;
- ei->socket.file = NULL;
+ return &si->vfs_inode;
+}
+
+static void sock_evict_inode(struct inode *inode)
+{
+ struct sockfs_inode *si = SOCKFS_I(inode);
+ struct simple_xattrs *xattrs = si->xattrs;
- return &ei->vfs_inode;
+ if (xattrs) {
+ simple_xattrs_free(xattrs, NULL);
+ kfree(xattrs);
+ }
+ clear_inode(inode);
}
static void sock_free_inode(struct inode *inode)
{
- struct socket_alloc *ei;
+ struct sockfs_inode *si = SOCKFS_I(inode);
- ei = container_of(inode, struct socket_alloc, vfs_inode);
- kmem_cache_free(sock_inode_cachep, ei);
+ kmem_cache_free(sock_inode_cachep, si);
}
static void init_once(void *foo)
{
- struct socket_alloc *ei = (struct socket_alloc *)foo;
+ struct sockfs_inode *si = (struct sockfs_inode *)foo;
- inode_init_once(&ei->vfs_inode);
+ inode_init_once(&si->vfs_inode);
}
static void init_inodecache(void)
{
sock_inode_cachep = kmem_cache_create("sock_inode_cache",
- sizeof(struct socket_alloc),
+ sizeof(struct sockfs_inode),
0,
(SLAB_HWCACHE_ALIGN |
SLAB_RECLAIM_ACCOUNT |
@@ -365,6 +390,7 @@ static void init_inodecache(void)
static const struct super_operations sockfs_ops = {
.alloc_inode = sock_alloc_inode,
.free_inode = sock_free_inode,
+ .evict_inode = sock_evict_inode,
.statfs = simple_statfs,
};
@@ -417,9 +443,48 @@ static const struct xattr_handler sockfs_security_xattr_handler = {
.set = sockfs_security_xattr_set,
};
+static int sockfs_user_xattr_get(const struct xattr_handler *handler,
+ struct dentry *dentry, struct inode *inode,
+ const char *suffix, void *value, size_t size)
+{
+ const char *name = xattr_full_name(handler, suffix);
+ struct simple_xattrs *xattrs;
+
+ xattrs = READ_ONCE(SOCKFS_I(inode)->xattrs);
+ if (!xattrs)
+ return -ENODATA;
+
+ return simple_xattr_get(xattrs, name, value, size);
+}
+
+static int sockfs_user_xattr_set(const struct xattr_handler *handler,
+ struct mnt_idmap *idmap,
+ struct dentry *dentry, struct inode *inode,
+ const char *suffix, const void *value,
+ size_t size, int flags)
+{
+ const char *name = xattr_full_name(handler, suffix);
+ struct sockfs_inode *si = SOCKFS_I(inode);
+ struct simple_xattrs *xattrs;
+
+ xattrs = simple_xattrs_lazy_alloc(&si->xattrs, value, flags);
+ if (IS_ERR_OR_NULL(xattrs))
+ return PTR_ERR(xattrs);
+
+ return simple_xattr_set_limited(xattrs, &si->xattr_limits,
+ name, value, size, flags);
+}
+
+static const struct xattr_handler sockfs_user_xattr_handler = {
+ .prefix = XATTR_USER_PREFIX,
+ .get = sockfs_user_xattr_get,
+ .set = sockfs_user_xattr_set,
+};
+
static const struct xattr_handler * const sockfs_xattr_handlers[] = {
&sockfs_xattr_handler,
&sockfs_security_xattr_handler,
+ &sockfs_user_xattr_handler,
NULL
};
@@ -572,26 +637,26 @@ EXPORT_SYMBOL(sockfd_lookup);
static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
size_t size)
{
- ssize_t len;
- ssize_t used = 0;
+ struct sockfs_inode *si = SOCKFS_I(d_inode(dentry));
+ ssize_t len, used;
- len = security_inode_listsecurity(d_inode(dentry), buffer, size);
+ len = simple_xattr_list(d_inode(dentry), READ_ONCE(si->xattrs),
+ buffer, size);
if (len < 0)
return len;
- used += len;
+
+ used = len;
if (buffer) {
- if (size < used)
- return -ERANGE;
buffer += len;
+ size -= len;
}
- len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
+ len = XATTR_NAME_SOCKPROTONAME_LEN + 1;
used += len;
if (buffer) {
- if (size < used)
+ if (size < len)
return -ERANGE;
memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
- buffer += len;
}
return used;
--
2.47.3
On Mon 16-02-26 14:32:06, Christian Brauner wrote:
> Now that we've generalized the infrastructure for user.* xattrs make it
> possible to set up to 128 user.* extended attributes on a sockfs inode
> or up to 128kib. kernfs (cgroupfs) has the same limits and it has proven
> to be quite sufficient for nearly all use-cases.
>
> This will allow containers to label sockets and will e.g., be used by
> systemd and Gnome to find various sockets in containers where
> high-privilege or more complicated solutions aren't available.
>
> Signed-off-by: Christian Brauner <brauner@kernel.org>
Looks good. Feel free to add:
Reviewed-by: Jan Kara <jack@suse.cz>
Honza
> ---
> net/socket.c | 119 +++++++++++++++++++++++++++++++++++++++++++++--------------
> 1 file changed, 92 insertions(+), 27 deletions(-)
>
> diff --git a/net/socket.c b/net/socket.c
> index 136b98c54fb3..7aa94fce7a8b 100644
> --- a/net/socket.c
> +++ b/net/socket.c
> @@ -315,45 +315,70 @@ static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
>
> static struct kmem_cache *sock_inode_cachep __ro_after_init;
>
> +struct sockfs_inode {
> + struct simple_xattrs *xattrs;
> + struct simple_xattr_limits xattr_limits;
> + struct socket_alloc;
> +};
> +
> +static struct sockfs_inode *SOCKFS_I(struct inode *inode)
> +{
> + return container_of(inode, struct sockfs_inode, vfs_inode);
> +}
> +
> static struct inode *sock_alloc_inode(struct super_block *sb)
> {
> - struct socket_alloc *ei;
> + struct sockfs_inode *si;
>
> - ei = alloc_inode_sb(sb, sock_inode_cachep, GFP_KERNEL);
> - if (!ei)
> + si = alloc_inode_sb(sb, sock_inode_cachep, GFP_KERNEL);
> + if (!si)
> return NULL;
> - init_waitqueue_head(&ei->socket.wq.wait);
> - ei->socket.wq.fasync_list = NULL;
> - ei->socket.wq.flags = 0;
> + si->xattrs = NULL;
> + simple_xattr_limits_init(&si->xattr_limits);
> +
> + init_waitqueue_head(&si->socket.wq.wait);
> + si->socket.wq.fasync_list = NULL;
> + si->socket.wq.flags = 0;
> +
> + si->socket.state = SS_UNCONNECTED;
> + si->socket.flags = 0;
> + si->socket.ops = NULL;
> + si->socket.sk = NULL;
> + si->socket.file = NULL;
>
> - ei->socket.state = SS_UNCONNECTED;
> - ei->socket.flags = 0;
> - ei->socket.ops = NULL;
> - ei->socket.sk = NULL;
> - ei->socket.file = NULL;
> + return &si->vfs_inode;
> +}
> +
> +static void sock_evict_inode(struct inode *inode)
> +{
> + struct sockfs_inode *si = SOCKFS_I(inode);
> + struct simple_xattrs *xattrs = si->xattrs;
>
> - return &ei->vfs_inode;
> + if (xattrs) {
> + simple_xattrs_free(xattrs, NULL);
> + kfree(xattrs);
> + }
> + clear_inode(inode);
> }
>
> static void sock_free_inode(struct inode *inode)
> {
> - struct socket_alloc *ei;
> + struct sockfs_inode *si = SOCKFS_I(inode);
>
> - ei = container_of(inode, struct socket_alloc, vfs_inode);
> - kmem_cache_free(sock_inode_cachep, ei);
> + kmem_cache_free(sock_inode_cachep, si);
> }
>
> static void init_once(void *foo)
> {
> - struct socket_alloc *ei = (struct socket_alloc *)foo;
> + struct sockfs_inode *si = (struct sockfs_inode *)foo;
>
> - inode_init_once(&ei->vfs_inode);
> + inode_init_once(&si->vfs_inode);
> }
>
> static void init_inodecache(void)
> {
> sock_inode_cachep = kmem_cache_create("sock_inode_cache",
> - sizeof(struct socket_alloc),
> + sizeof(struct sockfs_inode),
> 0,
> (SLAB_HWCACHE_ALIGN |
> SLAB_RECLAIM_ACCOUNT |
> @@ -365,6 +390,7 @@ static void init_inodecache(void)
> static const struct super_operations sockfs_ops = {
> .alloc_inode = sock_alloc_inode,
> .free_inode = sock_free_inode,
> + .evict_inode = sock_evict_inode,
> .statfs = simple_statfs,
> };
>
> @@ -417,9 +443,48 @@ static const struct xattr_handler sockfs_security_xattr_handler = {
> .set = sockfs_security_xattr_set,
> };
>
> +static int sockfs_user_xattr_get(const struct xattr_handler *handler,
> + struct dentry *dentry, struct inode *inode,
> + const char *suffix, void *value, size_t size)
> +{
> + const char *name = xattr_full_name(handler, suffix);
> + struct simple_xattrs *xattrs;
> +
> + xattrs = READ_ONCE(SOCKFS_I(inode)->xattrs);
> + if (!xattrs)
> + return -ENODATA;
> +
> + return simple_xattr_get(xattrs, name, value, size);
> +}
> +
> +static int sockfs_user_xattr_set(const struct xattr_handler *handler,
> + struct mnt_idmap *idmap,
> + struct dentry *dentry, struct inode *inode,
> + const char *suffix, const void *value,
> + size_t size, int flags)
> +{
> + const char *name = xattr_full_name(handler, suffix);
> + struct sockfs_inode *si = SOCKFS_I(inode);
> + struct simple_xattrs *xattrs;
> +
> + xattrs = simple_xattrs_lazy_alloc(&si->xattrs, value, flags);
> + if (IS_ERR_OR_NULL(xattrs))
> + return PTR_ERR(xattrs);
> +
> + return simple_xattr_set_limited(xattrs, &si->xattr_limits,
> + name, value, size, flags);
> +}
> +
> +static const struct xattr_handler sockfs_user_xattr_handler = {
> + .prefix = XATTR_USER_PREFIX,
> + .get = sockfs_user_xattr_get,
> + .set = sockfs_user_xattr_set,
> +};
> +
> static const struct xattr_handler * const sockfs_xattr_handlers[] = {
> &sockfs_xattr_handler,
> &sockfs_security_xattr_handler,
> + &sockfs_user_xattr_handler,
> NULL
> };
>
> @@ -572,26 +637,26 @@ EXPORT_SYMBOL(sockfd_lookup);
> static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
> size_t size)
> {
> - ssize_t len;
> - ssize_t used = 0;
> + struct sockfs_inode *si = SOCKFS_I(d_inode(dentry));
> + ssize_t len, used;
>
> - len = security_inode_listsecurity(d_inode(dentry), buffer, size);
> + len = simple_xattr_list(d_inode(dentry), READ_ONCE(si->xattrs),
> + buffer, size);
> if (len < 0)
> return len;
> - used += len;
> +
> + used = len;
> if (buffer) {
> - if (size < used)
> - return -ERANGE;
> buffer += len;
> + size -= len;
> }
>
> - len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
> + len = XATTR_NAME_SOCKPROTONAME_LEN + 1;
> used += len;
> if (buffer) {
> - if (size < used)
> + if (size < len)
> return -ERANGE;
> memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
> - buffer += len;
> }
>
> return used;
>
> --
> 2.47.3
>
--
Jan Kara <jack@suse.com>
SUSE Labs, CR
© 2016 - 2026 Red Hat, Inc.