fs/namei.c | 15 +++++++++++++-- include/linux/cred.h | 1 + include/uapi/linux/prctl.h | 4 ++++ kernel/groups.c | 23 ++++++++++++++++++----- kernel/sys.c | 18 ++++++++++++++++++ 5 files changed, 54 insertions(+), 7 deletions(-)
This patch adds the group restriction bitmap.
This bitmap is normally 0 (all bits clear), which means the normal
handling of the group permission check. When either bit is set, the
corresponding entry in supplementary group list is treated differently:
- if group access denied, then deny, as before
- if group access allowed, then proceed to checking Other perms.
Added 3 prctl calls: PR_GET_GRBITMAP, PR_SET_GRBITMAP and PR_CLR_GRBITMAP
to manipulate the bitmap. This implementation only allows to manipulate
31 bits. PR_CLR_GRBITMAP needs CAP_SETGID, meaning that the user can
only set the restriction bits but never clear (unless capable).
Q: Why is this needed?
A: When you want to lower the privs of your process, you may use
suid/sgid bits to switch to some home-less (no home dir) unprivileged
user that can't touch any files of the original user. But the
supplementary group list ruins that possibility, and you can't drop it.
The ability to drop the group list was proposed by Josh Tripplett:
https://lore.kernel.org/all/0895c1f268bc0b01cc6c8ed4607d7c3953f49728.1416041823.git.josh@joshtriplett.org/
But it wasn't considered secure enough because the group may restrict
an access, not only allow. My solution avoids that problem, as when you
set a bit in the restriction bitmap, the group restriction still
applies - only the permission is withdrawn. Another advantage is that
you can selectively restrict groups from the list, rather than to drop
them all at once.
Changes in v2: add PR_CLR_GRBITMAP and make the bits otherwise unclearable.
Signed-off-by: Stas Sergeev <stsp2@yandex.ru>
CC: Alexander Viro <viro@zeniv.linux.org.uk>
CC: Christian Brauner <brauner@kernel.org>
CC: Jan Kara <jack@suse.cz>
CC: Jens Axboe <axboe@kernel.dk>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: Catalin Marinas <catalin.marinas@arm.com>
CC: Florent Revest <revest@chromium.org>
CC: Kees Cook <kees@kernel.org>
CC: Palmer Dabbelt <palmer@rivosinc.com>
CC: Charlie Jenkins <charlie@rivosinc.com>
CC: Benjamin Gray <bgray@linux.ibm.com>
CC: Oleg Nesterov <oleg@redhat.com>
CC: Helge Deller <deller@gmx.de>
CC: Zev Weiss <zev@bewilderbeest.net> (commit_signer:1/12=8%)
CC: Samuel Holland <samuel.holland@sifive.com>
CC: linux-fsdevel@vger.kernel.org
CC: linux-kernel@vger.kernel.org
CC: Eric Biederman <ebiederm@xmission.com>
CC: Andy Lutomirski <luto@kernel.org>
CC: Josh Triplett <josh@joshtriplett.org>
---
fs/namei.c | 15 +++++++++++++--
include/linux/cred.h | 1 +
include/uapi/linux/prctl.h | 4 ++++
kernel/groups.c | 23 ++++++++++++++++++-----
kernel/sys.c | 18 ++++++++++++++++++
5 files changed, 54 insertions(+), 7 deletions(-)
diff --git a/fs/namei.c b/fs/namei.c
index 4a4a22a08ac2..44f5571d8f2c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -373,8 +373,19 @@ static int acl_permission_check(struct mnt_idmap *idmap,
*/
if (mask & (mode ^ (mode >> 3))) {
vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode);
- if (vfsgid_in_group_p(vfsgid))
- mode >>= 3;
+ int idx = vfsgid_in_group_p(vfsgid);
+
+ if (idx) {
+ unsigned int mode_grp = mode >> 3;
+
+ if (mask & ~mode_grp)
+ return -EACCES;
+ idx -= 2;
+ if (idx < 0 || idx >= 32 || !((1U << idx) &
+ current_cred()->group_info->restrict_bitmap))
+ return 0;
+ /* If we hit restrict_bitmap, then check Others. */
+ }
}
/* Bits in 'mode' clear that we require? */
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 2976f534a7a3..0639fa154654 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -25,6 +25,7 @@ struct inode;
*/
struct group_info {
refcount_t usage;
+ unsigned int restrict_bitmap;
int ngroups;
kgid_t gid[];
} __randomize_layout;
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 35791791a879..2a9f3e0c9845 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -328,4 +328,8 @@ struct prctl_mm_map {
# define PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC 0x10 /* Clear the aspect on exec */
# define PR_PPC_DEXCR_CTRL_MASK 0x1f
+#define PR_GET_GRBITMAP 74
+#define PR_SET_GRBITMAP 75
+#define PR_CLR_GRBITMAP 76
+
#endif /* _LINUX_PRCTL_H */
diff --git a/kernel/groups.c b/kernel/groups.c
index 9b43da22647d..b7dfd96826e5 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -20,6 +20,7 @@ struct group_info *groups_alloc(int gidsetsize)
return NULL;
refcount_set(&gi->usage, 1);
+ gi->restrict_bitmap = 0;
gi->ngroups = gidsetsize;
return gi;
}
@@ -88,7 +89,9 @@ void groups_sort(struct group_info *group_info)
}
EXPORT_SYMBOL(groups_sort);
-/* a simple bsearch */
+/* a simple bsearch
+ * Return: 1-based index of the matched entry, or 0 if not found,
+ */
int groups_search(const struct group_info *group_info, kgid_t grp)
{
unsigned int left, right;
@@ -105,7 +108,7 @@ int groups_search(const struct group_info *group_info, kgid_t grp)
else if (gid_lt(grp, group_info->gid[mid]))
right = mid;
else
- return 1;
+ return mid + 1;
}
return 0;
}
@@ -222,15 +225,21 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist)
}
/*
- * Check whether we're fsgid/egid or in the supplemental group..
+ * Check whether we're fsgid/egid or in the supplemental group.
+ * Return: 1-based index of the matched entry, where 1 means fsgid,
+ * 2..N means 2-based index in group_info.
*/
int in_group_p(kgid_t grp)
{
const struct cred *cred = current_cred();
int retval = 1;
- if (!gid_eq(grp, cred->fsgid))
+ if (!gid_eq(grp, cred->fsgid)) {
retval = groups_search(cred->group_info, grp);
+ /* Make it start from 2. */
+ if (retval)
+ retval++;
+ }
return retval;
}
@@ -241,8 +250,12 @@ int in_egroup_p(kgid_t grp)
const struct cred *cred = current_cred();
int retval = 1;
- if (!gid_eq(grp, cred->egid))
+ if (!gid_eq(grp, cred->egid)) {
retval = groups_search(cred->group_info, grp);
+ /* Make it start from 2. */
+ if (retval)
+ retval++;
+ }
return retval;
}
diff --git a/kernel/sys.c b/kernel/sys.c
index 4da31f28fda8..ed12ac6f5a8a 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2784,6 +2784,24 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
case PR_RISCV_SET_ICACHE_FLUSH_CTX:
error = RISCV_SET_ICACHE_FLUSH_CTX(arg2, arg3);
break;
+ case PR_GET_GRBITMAP:
+ if (arg2 || arg3 || arg4 || arg5)
+ return -EINVAL;
+ error = current_cred()->group_info->restrict_bitmap;
+ break;
+ case PR_SET_GRBITMAP:
+ /* Allow 31 bits to avoid setting sign bit. */
+ if (arg2 > (1U << 31) - 1 || arg3 || arg4 || arg5)
+ return -EINVAL;
+ current_cred()->group_info->restrict_bitmap |= arg2;
+ break;
+ case PR_CLR_GRBITMAP:
+ if (arg2 || arg3 || arg4 || arg5)
+ return -EINVAL;
+ if (!may_setgroups())
+ return -EPERM;
+ current_cred()->group_info->restrict_bitmap = 0;
+ break;
default:
error = -EINVAL;
break;
--
2.46.2
Hi Stas, kernel test robot noticed the following build errors: [auto build test ERROR on brauner-vfs/vfs.all] [also build test ERROR on linus/master v6.12-rc1 next-20240930] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Stas-Sergeev/add-group-restriction-bitmap/20240930-144632 base: https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git vfs.all patch link: https://lore.kernel.org/r/20240930063405.113227-1-stsp2%40yandex.ru patch subject: [PATCH v2] add group restriction bitmap config: i386-buildonly-randconfig-001-20241001 (https://download.01.org/0day-ci/archive/20241001/202410010306.DiK2TZWL-lkp@intel.com/config) compiler: clang version 18.1.8 (https://github.com/llvm/llvm-project 3b5b5c1ec4a3095ab096dd780e84d7ab81f3d7ff) reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241001/202410010306.DiK2TZWL-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202410010306.DiK2TZWL-lkp@intel.com/ All errors (new ones prefixed by >>): >> kernel/sys.c:2799:8: error: call to undeclared function 'may_setgroups'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration] 2799 | if (!may_setgroups()) | ^ 1 error generated. vim +/may_setgroups +2799 kernel/sys.c 2456 2457 SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, 2458 unsigned long, arg4, unsigned long, arg5) 2459 { 2460 struct task_struct *me = current; 2461 unsigned char comm[sizeof(me->comm)]; 2462 long error; 2463 2464 error = security_task_prctl(option, arg2, arg3, arg4, arg5); 2465 if (error != -ENOSYS) 2466 return error; 2467 2468 error = 0; 2469 switch (option) { 2470 case PR_SET_PDEATHSIG: 2471 if (!valid_signal(arg2)) { 2472 error = -EINVAL; 2473 break; 2474 } 2475 me->pdeath_signal = arg2; 2476 break; 2477 case PR_GET_PDEATHSIG: 2478 error = put_user(me->pdeath_signal, (int __user *)arg2); 2479 break; 2480 case PR_GET_DUMPABLE: 2481 error = get_dumpable(me->mm); 2482 break; 2483 case PR_SET_DUMPABLE: 2484 if (arg2 != SUID_DUMP_DISABLE && arg2 != SUID_DUMP_USER) { 2485 error = -EINVAL; 2486 break; 2487 } 2488 set_dumpable(me->mm, arg2); 2489 break; 2490 2491 case PR_SET_UNALIGN: 2492 error = SET_UNALIGN_CTL(me, arg2); 2493 break; 2494 case PR_GET_UNALIGN: 2495 error = GET_UNALIGN_CTL(me, arg2); 2496 break; 2497 case PR_SET_FPEMU: 2498 error = SET_FPEMU_CTL(me, arg2); 2499 break; 2500 case PR_GET_FPEMU: 2501 error = GET_FPEMU_CTL(me, arg2); 2502 break; 2503 case PR_SET_FPEXC: 2504 error = SET_FPEXC_CTL(me, arg2); 2505 break; 2506 case PR_GET_FPEXC: 2507 error = GET_FPEXC_CTL(me, arg2); 2508 break; 2509 case PR_GET_TIMING: 2510 error = PR_TIMING_STATISTICAL; 2511 break; 2512 case PR_SET_TIMING: 2513 if (arg2 != PR_TIMING_STATISTICAL) 2514 error = -EINVAL; 2515 break; 2516 case PR_SET_NAME: 2517 comm[sizeof(me->comm) - 1] = 0; 2518 if (strncpy_from_user(comm, (char __user *)arg2, 2519 sizeof(me->comm) - 1) < 0) 2520 return -EFAULT; 2521 set_task_comm(me, comm); 2522 proc_comm_connector(me); 2523 break; 2524 case PR_GET_NAME: 2525 get_task_comm(comm, me); 2526 if (copy_to_user((char __user *)arg2, comm, sizeof(comm))) 2527 return -EFAULT; 2528 break; 2529 case PR_GET_ENDIAN: 2530 error = GET_ENDIAN(me, arg2); 2531 break; 2532 case PR_SET_ENDIAN: 2533 error = SET_ENDIAN(me, arg2); 2534 break; 2535 case PR_GET_SECCOMP: 2536 error = prctl_get_seccomp(); 2537 break; 2538 case PR_SET_SECCOMP: 2539 error = prctl_set_seccomp(arg2, (char __user *)arg3); 2540 break; 2541 case PR_GET_TSC: 2542 error = GET_TSC_CTL(arg2); 2543 break; 2544 case PR_SET_TSC: 2545 error = SET_TSC_CTL(arg2); 2546 break; 2547 case PR_TASK_PERF_EVENTS_DISABLE: 2548 error = perf_event_task_disable(); 2549 break; 2550 case PR_TASK_PERF_EVENTS_ENABLE: 2551 error = perf_event_task_enable(); 2552 break; 2553 case PR_GET_TIMERSLACK: 2554 if (current->timer_slack_ns > ULONG_MAX) 2555 error = ULONG_MAX; 2556 else 2557 error = current->timer_slack_ns; 2558 break; 2559 case PR_SET_TIMERSLACK: 2560 if (arg2 <= 0) 2561 current->timer_slack_ns = 2562 current->default_timer_slack_ns; 2563 else 2564 current->timer_slack_ns = arg2; 2565 break; 2566 case PR_MCE_KILL: 2567 if (arg4 | arg5) 2568 return -EINVAL; 2569 switch (arg2) { 2570 case PR_MCE_KILL_CLEAR: 2571 if (arg3 != 0) 2572 return -EINVAL; 2573 current->flags &= ~PF_MCE_PROCESS; 2574 break; 2575 case PR_MCE_KILL_SET: 2576 current->flags |= PF_MCE_PROCESS; 2577 if (arg3 == PR_MCE_KILL_EARLY) 2578 current->flags |= PF_MCE_EARLY; 2579 else if (arg3 == PR_MCE_KILL_LATE) 2580 current->flags &= ~PF_MCE_EARLY; 2581 else if (arg3 == PR_MCE_KILL_DEFAULT) 2582 current->flags &= 2583 ~(PF_MCE_EARLY|PF_MCE_PROCESS); 2584 else 2585 return -EINVAL; 2586 break; 2587 default: 2588 return -EINVAL; 2589 } 2590 break; 2591 case PR_MCE_KILL_GET: 2592 if (arg2 | arg3 | arg4 | arg5) 2593 return -EINVAL; 2594 if (current->flags & PF_MCE_PROCESS) 2595 error = (current->flags & PF_MCE_EARLY) ? 2596 PR_MCE_KILL_EARLY : PR_MCE_KILL_LATE; 2597 else 2598 error = PR_MCE_KILL_DEFAULT; 2599 break; 2600 case PR_SET_MM: 2601 error = prctl_set_mm(arg2, arg3, arg4, arg5); 2602 break; 2603 case PR_GET_TID_ADDRESS: 2604 error = prctl_get_tid_address(me, (int __user * __user *)arg2); 2605 break; 2606 case PR_SET_CHILD_SUBREAPER: 2607 me->signal->is_child_subreaper = !!arg2; 2608 if (!arg2) 2609 break; 2610 2611 walk_process_tree(me, propagate_has_child_subreaper, NULL); 2612 break; 2613 case PR_GET_CHILD_SUBREAPER: 2614 error = put_user(me->signal->is_child_subreaper, 2615 (int __user *)arg2); 2616 break; 2617 case PR_SET_NO_NEW_PRIVS: 2618 if (arg2 != 1 || arg3 || arg4 || arg5) 2619 return -EINVAL; 2620 2621 task_set_no_new_privs(current); 2622 break; 2623 case PR_GET_NO_NEW_PRIVS: 2624 if (arg2 || arg3 || arg4 || arg5) 2625 return -EINVAL; 2626 return task_no_new_privs(current) ? 1 : 0; 2627 case PR_GET_THP_DISABLE: 2628 if (arg2 || arg3 || arg4 || arg5) 2629 return -EINVAL; 2630 error = !!test_bit(MMF_DISABLE_THP, &me->mm->flags); 2631 break; 2632 case PR_SET_THP_DISABLE: 2633 if (arg3 || arg4 || arg5) 2634 return -EINVAL; 2635 if (mmap_write_lock_killable(me->mm)) 2636 return -EINTR; 2637 if (arg2) 2638 set_bit(MMF_DISABLE_THP, &me->mm->flags); 2639 else 2640 clear_bit(MMF_DISABLE_THP, &me->mm->flags); 2641 mmap_write_unlock(me->mm); 2642 break; 2643 case PR_MPX_ENABLE_MANAGEMENT: 2644 case PR_MPX_DISABLE_MANAGEMENT: 2645 /* No longer implemented: */ 2646 return -EINVAL; 2647 case PR_SET_FP_MODE: 2648 error = SET_FP_MODE(me, arg2); 2649 break; 2650 case PR_GET_FP_MODE: 2651 error = GET_FP_MODE(me); 2652 break; 2653 case PR_SVE_SET_VL: 2654 error = SVE_SET_VL(arg2); 2655 break; 2656 case PR_SVE_GET_VL: 2657 error = SVE_GET_VL(); 2658 break; 2659 case PR_SME_SET_VL: 2660 error = SME_SET_VL(arg2); 2661 break; 2662 case PR_SME_GET_VL: 2663 error = SME_GET_VL(); 2664 break; 2665 case PR_GET_SPECULATION_CTRL: 2666 if (arg3 || arg4 || arg5) 2667 return -EINVAL; 2668 error = arch_prctl_spec_ctrl_get(me, arg2); 2669 break; 2670 case PR_SET_SPECULATION_CTRL: 2671 if (arg4 || arg5) 2672 return -EINVAL; 2673 error = arch_prctl_spec_ctrl_set(me, arg2, arg3); 2674 break; 2675 case PR_PAC_RESET_KEYS: 2676 if (arg3 || arg4 || arg5) 2677 return -EINVAL; 2678 error = PAC_RESET_KEYS(me, arg2); 2679 break; 2680 case PR_PAC_SET_ENABLED_KEYS: 2681 if (arg4 || arg5) 2682 return -EINVAL; 2683 error = PAC_SET_ENABLED_KEYS(me, arg2, arg3); 2684 break; 2685 case PR_PAC_GET_ENABLED_KEYS: 2686 if (arg2 || arg3 || arg4 || arg5) 2687 return -EINVAL; 2688 error = PAC_GET_ENABLED_KEYS(me); 2689 break; 2690 case PR_SET_TAGGED_ADDR_CTRL: 2691 if (arg3 || arg4 || arg5) 2692 return -EINVAL; 2693 error = SET_TAGGED_ADDR_CTRL(arg2); 2694 break; 2695 case PR_GET_TAGGED_ADDR_CTRL: 2696 if (arg2 || arg3 || arg4 || arg5) 2697 return -EINVAL; 2698 error = GET_TAGGED_ADDR_CTRL(); 2699 break; 2700 case PR_SET_IO_FLUSHER: 2701 if (!capable(CAP_SYS_RESOURCE)) 2702 return -EPERM; 2703 2704 if (arg3 || arg4 || arg5) 2705 return -EINVAL; 2706 2707 if (arg2 == 1) 2708 current->flags |= PR_IO_FLUSHER; 2709 else if (!arg2) 2710 current->flags &= ~PR_IO_FLUSHER; 2711 else 2712 return -EINVAL; 2713 break; 2714 case PR_GET_IO_FLUSHER: 2715 if (!capable(CAP_SYS_RESOURCE)) 2716 return -EPERM; 2717 2718 if (arg2 || arg3 || arg4 || arg5) 2719 return -EINVAL; 2720 2721 error = (current->flags & PR_IO_FLUSHER) == PR_IO_FLUSHER; 2722 break; 2723 case PR_SET_SYSCALL_USER_DISPATCH: 2724 error = set_syscall_user_dispatch(arg2, arg3, arg4, 2725 (char __user *) arg5); 2726 break; 2727 #ifdef CONFIG_SCHED_CORE 2728 case PR_SCHED_CORE: 2729 error = sched_core_share_pid(arg2, arg3, arg4, arg5); 2730 break; 2731 #endif 2732 case PR_SET_MDWE: 2733 error = prctl_set_mdwe(arg2, arg3, arg4, arg5); 2734 break; 2735 case PR_GET_MDWE: 2736 error = prctl_get_mdwe(arg2, arg3, arg4, arg5); 2737 break; 2738 case PR_PPC_GET_DEXCR: 2739 if (arg3 || arg4 || arg5) 2740 return -EINVAL; 2741 error = PPC_GET_DEXCR_ASPECT(me, arg2); 2742 break; 2743 case PR_PPC_SET_DEXCR: 2744 if (arg4 || arg5) 2745 return -EINVAL; 2746 error = PPC_SET_DEXCR_ASPECT(me, arg2, arg3); 2747 break; 2748 case PR_SET_VMA: 2749 error = prctl_set_vma(arg2, arg3, arg4, arg5); 2750 break; 2751 case PR_GET_AUXV: 2752 if (arg4 || arg5) 2753 return -EINVAL; 2754 error = prctl_get_auxv((void __user *)arg2, arg3); 2755 break; 2756 #ifdef CONFIG_KSM 2757 case PR_SET_MEMORY_MERGE: 2758 if (arg3 || arg4 || arg5) 2759 return -EINVAL; 2760 if (mmap_write_lock_killable(me->mm)) 2761 return -EINTR; 2762 2763 if (arg2) 2764 error = ksm_enable_merge_any(me->mm); 2765 else 2766 error = ksm_disable_merge_any(me->mm); 2767 mmap_write_unlock(me->mm); 2768 break; 2769 case PR_GET_MEMORY_MERGE: 2770 if (arg2 || arg3 || arg4 || arg5) 2771 return -EINVAL; 2772 2773 error = !!test_bit(MMF_VM_MERGE_ANY, &me->mm->flags); 2774 break; 2775 #endif 2776 case PR_RISCV_V_SET_CONTROL: 2777 error = RISCV_V_SET_CONTROL(arg2); 2778 break; 2779 case PR_RISCV_V_GET_CONTROL: 2780 error = RISCV_V_GET_CONTROL(); 2781 break; 2782 case PR_RISCV_SET_ICACHE_FLUSH_CTX: 2783 error = RISCV_SET_ICACHE_FLUSH_CTX(arg2, arg3); 2784 break; 2785 case PR_GET_GRBITMAP: 2786 if (arg2 || arg3 || arg4 || arg5) 2787 return -EINVAL; 2788 error = current_cred()->group_info->restrict_bitmap; 2789 break; 2790 case PR_SET_GRBITMAP: 2791 /* Allow 31 bits to avoid setting sign bit. */ 2792 if (arg2 > (1U << 31) - 1 || arg3 || arg4 || arg5) 2793 return -EINVAL; 2794 current_cred()->group_info->restrict_bitmap |= arg2; 2795 break; 2796 case PR_CLR_GRBITMAP: 2797 if (arg2 || arg3 || arg4 || arg5) 2798 return -EINVAL; > 2799 if (!may_setgroups()) 2800 return -EPERM; 2801 current_cred()->group_info->restrict_bitmap = 0; 2802 break; 2803 default: 2804 error = -EINVAL; 2805 break; 2806 } 2807 return error; 2808 } 2809 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
© 2016 - 2024 Red Hat, Inc.