[PATCH v5] sysctl: simplify the min/max boundary check

Wen Yang posted 1 patch 11 months, 2 weeks ago
kernel/sysctl.c | 211 ++++++++++++++++++++----------------------------
1 file changed, 86 insertions(+), 125 deletions(-)
[PATCH v5] sysctl: simplify the min/max boundary check
Posted by Wen Yang 11 months, 2 weeks ago
do_proc_dointvec_conv() used the default range of int type, while
do_proc_dointvec_minmax_conv() additionally used "int * {min, max}" of
struct do_proc_dointvec_minmax_conv_param, which are actually passed
in table->extra{1,2} pointers.

By changing the struct do_proc_dointvec_minmax_conv_param's
"int * {min, max}" to "int {min, max}" and setting the min/max value
via dereference the table->extra{1, 2}, those do_proc_dointvec_conv() and
do_proc_dointvec_minmax_conv() functions can be merged into one and reduce
code by one-third.

Similar changes were also made for do_proc_douintvec_minmax_conv_param,
do_proc_douintvec_conv() and do_proc_douintvec_minmax_conv().

Selftest were also made:

[23:16:38] ================ sysctl_test (11 subtests) =================
[23:16:38] [PASSED] sysctl_test_api_dointvec_null_tbl_data
[23:16:38] [PASSED] sysctl_test_api_dointvec_table_maxlen_unset
[23:16:38] [PASSED] sysctl_test_api_dointvec_table_len_is_zero
[23:16:38] [PASSED] sysctl_test_api_dointvec_table_read_but_position_set
[23:16:38] [PASSED] sysctl_test_dointvec_read_happy_single_positive
[23:16:38] [PASSED] sysctl_test_dointvec_read_happy_single_negative
[23:16:38] [PASSED] sysctl_test_dointvec_write_happy_single_positive
[23:16:38] [PASSED] sysctl_test_dointvec_write_happy_single_negative
[23:16:38] [PASSED] sysctl_test_api_dointvec_write_single_less_int_min
[23:16:38] [PASSED] sysctl_test_api_dointvec_write_single_greater_int_max
[23:16:38] [PASSED] sysctl_test_register_sysctl_sz_invalid_extra_value
[23:16:38] =================== [PASSED] sysctl_test ===================
[23:16:38] ============================================================
[23:16:38] Testing complete. Ran 11 tests: passed: 11

Signed-off-by: Wen Yang <wen.yang@linux.dev>
Cc: Joel Granados <joel.granados@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Dave Young <dyoung@redhat.com>
Cc: linux-kernel@vger.kernel.org
---
 kernel/sysctl.c | 211 ++++++++++++++++++++----------------------------
 1 file changed, 86 insertions(+), 125 deletions(-)

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 7ae7a4136855..250dc9057259 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -424,22 +424,44 @@ static void proc_put_char(void **buf, size_t *size, char c)
 	}
 }
 
-static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
-				 int *valp,
-				 int write, void *data)
+/**
+ * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure
+ * @min: the minimum allowable value
+ * @max: the maximum allowable value
+ *
+ * The do_proc_dointvec_minmax_conv_param structure provides the
+ * minimum and maximum values for doing range checking for those sysctl
+ * parameters that use the proc_dointvec_minmax(), proc_dou8vec_minmax() and so on.
+ */
+struct do_proc_dointvec_minmax_conv_param {
+	int min;
+	int max;
+};
+
+static inline int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
+		int *valp,
+		int write, void *data)
 {
+	struct do_proc_dointvec_minmax_conv_param *param = data;
+	long min, max;
+	long val;
+
 	if (write) {
 		if (*negp) {
-			if (*lvalp > (unsigned long) INT_MAX + 1)
-				return -EINVAL;
-			WRITE_ONCE(*valp, -*lvalp);
+			max = param ? param->max : 0;
+			min = param ? param->min : INT_MIN;
+			val = -*lvalp;
 		} else {
-			if (*lvalp > (unsigned long) INT_MAX)
-				return -EINVAL;
-			WRITE_ONCE(*valp, *lvalp);
+			max = param ? param->max : INT_MAX;
+			min = param ? param->min : 0;
+			val = *lvalp;
 		}
+
+		if (val > max || val < min)
+			return -EINVAL;
+		WRITE_ONCE(*valp, (int)val);
 	} else {
-		int val = READ_ONCE(*valp);
+		val = (int)READ_ONCE(*valp);
 		if (val < 0) {
 			*negp = true;
 			*lvalp = -(unsigned long)val;
@@ -448,21 +470,44 @@ static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
 			*lvalp = (unsigned long)val;
 		}
 	}
+
 	return 0;
 }
 
-static int do_proc_douintvec_conv(unsigned long *lvalp,
-				  unsigned int *valp,
-				  int write, void *data)
+/**
+ * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure
+ * @min: minimum allowable value
+ * @max: maximum allowable value
+ *
+ * The do_proc_douintvec_minmax_conv_param structure provides the
+ * minimum and maximum values for doing range checking for those sysctl
+ * parameters that use the proc_douintvec_minmax() handler.
+ */
+struct do_proc_douintvec_minmax_conv_param {
+	unsigned int min;
+	unsigned int max;
+};
+
+static inline int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
+					 unsigned int *valp,
+					 int write, void *data)
 {
+	struct do_proc_douintvec_minmax_conv_param *param = data;
+	unsigned long min, max;
+
 	if (write) {
-		if (*lvalp > UINT_MAX)
+		max = param ? param->max : UINT_MAX;
+		min = param ? param->min : 0;
+
+		if (*lvalp > max || *lvalp < min)
 			return -EINVAL;
-		WRITE_ONCE(*valp, *lvalp);
+
+		WRITE_ONCE(*valp, (unsigned int)*lvalp);
 	} else {
 		unsigned int val = READ_ONCE(*valp);
 		*lvalp = (unsigned long)val;
 	}
+
 	return 0;
 }
 
@@ -489,7 +534,7 @@ static int __do_proc_dointvec(void *tbl_data, const struct ctl_table *table,
 	left = *lenp;
 
 	if (!conv)
-		conv = do_proc_dointvec_conv;
+		conv = do_proc_dointvec_minmax_conv;
 
 	if (write) {
 		if (proc_first_pos_non_zero_ignore(ppos, table))
@@ -667,7 +712,7 @@ static int __do_proc_douintvec(void *tbl_data, const struct ctl_table *table,
 	}
 
 	if (!conv)
-		conv = do_proc_douintvec_conv;
+		conv = do_proc_douintvec_minmax_conv;
 
 	if (write)
 		return do_proc_douintvec_w(i, table, buffer, lenp, ppos,
@@ -762,7 +807,7 @@ int proc_douintvec(const struct ctl_table *table, int write, void *buffer,
 		size_t *lenp, loff_t *ppos)
 {
 	return do_proc_douintvec(table, write, buffer, lenp, ppos,
-				 do_proc_douintvec_conv, NULL);
+				 do_proc_douintvec_minmax_conv, NULL);
 }
 
 /*
@@ -808,46 +853,6 @@ static int proc_taint(const struct ctl_table *table, int write,
 	return err;
 }
 
-/**
- * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure
- * @min: pointer to minimum allowable value
- * @max: pointer to maximum allowable value
- *
- * The do_proc_dointvec_minmax_conv_param structure provides the
- * minimum and maximum values for doing range checking for those sysctl
- * parameters that use the proc_dointvec_minmax() handler.
- */
-struct do_proc_dointvec_minmax_conv_param {
-	int *min;
-	int *max;
-};
-
-static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
-					int *valp,
-					int write, void *data)
-{
-	int tmp, ret;
-	struct do_proc_dointvec_minmax_conv_param *param = data;
-	/*
-	 * If writing, first do so via a temporary local int so we can
-	 * bounds-check it before touching *valp.
-	 */
-	int *ip = write ? &tmp : valp;
-
-	ret = do_proc_dointvec_conv(negp, lvalp, ip, write, data);
-	if (ret)
-		return ret;
-
-	if (write) {
-		if ((param->min && *param->min > tmp) ||
-		    (param->max && *param->max < tmp))
-			return -EINVAL;
-		WRITE_ONCE(*valp, tmp);
-	}
-
-	return 0;
-}
-
 /**
  * proc_dointvec_minmax - read a vector of integers with min/max values
  * @table: the sysctl table
@@ -867,53 +872,14 @@ static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
 int proc_dointvec_minmax(const struct ctl_table *table, int write,
 		  void *buffer, size_t *lenp, loff_t *ppos)
 {
-	struct do_proc_dointvec_minmax_conv_param param = {
-		.min = (int *) table->extra1,
-		.max = (int *) table->extra2,
-	};
+	struct do_proc_dointvec_minmax_conv_param param;
+
+	param.min = (table->extra1) ? *(int *) table->extra1 : INT_MIN;
+	param.max = (table->extra2) ? *(int *) table->extra2 : INT_MAX;
 	return do_proc_dointvec(table, write, buffer, lenp, ppos,
 				do_proc_dointvec_minmax_conv, &param);
 }
 
-/**
- * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure
- * @min: pointer to minimum allowable value
- * @max: pointer to maximum allowable value
- *
- * The do_proc_douintvec_minmax_conv_param structure provides the
- * minimum and maximum values for doing range checking for those sysctl
- * parameters that use the proc_douintvec_minmax() handler.
- */
-struct do_proc_douintvec_minmax_conv_param {
-	unsigned int *min;
-	unsigned int *max;
-};
-
-static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
-					 unsigned int *valp,
-					 int write, void *data)
-{
-	int ret;
-	unsigned int tmp;
-	struct do_proc_douintvec_minmax_conv_param *param = data;
-	/* write via temporary local uint for bounds-checking */
-	unsigned int *up = write ? &tmp : valp;
-
-	ret = do_proc_douintvec_conv(lvalp, up, write, data);
-	if (ret)
-		return ret;
-
-	if (write) {
-		if ((param->min && *param->min > tmp) ||
-		    (param->max && *param->max < tmp))
-			return -ERANGE;
-
-		WRITE_ONCE(*valp, tmp);
-	}
-
-	return 0;
-}
-
 /**
  * proc_douintvec_minmax - read a vector of unsigned ints with min/max values
  * @table: the sysctl table
@@ -936,10 +902,11 @@ static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
 int proc_douintvec_minmax(const struct ctl_table *table, int write,
 			  void *buffer, size_t *lenp, loff_t *ppos)
 {
-	struct do_proc_douintvec_minmax_conv_param param = {
-		.min = (unsigned int *) table->extra1,
-		.max = (unsigned int *) table->extra2,
-	};
+	struct do_proc_douintvec_minmax_conv_param param;
+
+	param.min = (table->extra1) ? *(unsigned int *) table->extra1 : 0;
+	param.max = (table->extra2) ? *(unsigned int *) table->extra2 : UINT_MAX;
+
 	return do_proc_douintvec(table, write, buffer, lenp, ppos,
 				 do_proc_douintvec_minmax_conv, &param);
 }
@@ -965,23 +932,17 @@ int proc_dou8vec_minmax(const struct ctl_table *table, int write,
 			void *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct ctl_table tmp;
-	unsigned int min = 0, max = 255U, val;
+	unsigned int val;
 	u8 *data = table->data;
-	struct do_proc_douintvec_minmax_conv_param param = {
-		.min = &min,
-		.max = &max,
-	};
+	struct do_proc_douintvec_minmax_conv_param param;
 	int res;
 
 	/* Do not support arrays yet. */
 	if (table->maxlen != sizeof(u8))
 		return -EINVAL;
 
-	if (table->extra1)
-		min = *(unsigned int *) table->extra1;
-	if (table->extra2)
-		max = *(unsigned int *) table->extra2;
-
+	param.min = (table->extra1) ? *(unsigned int *) table->extra1 : 0;
+	param.max = (table->extra2) ? *(unsigned int *) table->extra2 : 255U;
 	tmp = *table;
 
 	tmp.maxlen = sizeof(val);
@@ -1022,7 +983,7 @@ static int __do_proc_doulongvec_minmax(void *data,
 		void *buffer, size_t *lenp, loff_t *ppos,
 		unsigned long convmul, unsigned long convdiv)
 {
-	unsigned long *i, *min, *max;
+	unsigned long *i, min, max;
 	int vleft, first = 1, err = 0;
 	size_t left;
 	char *p;
@@ -1033,8 +994,9 @@ static int __do_proc_doulongvec_minmax(void *data,
 	}
 
 	i = data;
-	min = table->extra1;
-	max = table->extra2;
+	min = (table->extra1) ? *(unsigned long *) table->extra1 : 0;
+	max = (table->extra2) ? *(unsigned long *) table->extra2 : ULONG_MAX;
+
 	vleft = table->maxlen / sizeof(unsigned long);
 	left = *lenp;
 
@@ -1066,7 +1028,7 @@ static int __do_proc_doulongvec_minmax(void *data,
 			}
 
 			val = convmul * val / convdiv;
-			if ((min && val < *min) || (max && val > *max)) {
+			if ((val < min) || (val > max)) {
 				err = -EINVAL;
 				break;
 			}
@@ -1236,8 +1198,7 @@ static int do_proc_dointvec_ms_jiffies_minmax_conv(bool *negp, unsigned long *lv
 		return ret;
 
 	if (write) {
-		if ((param->min && *param->min > tmp) ||
-				(param->max && *param->max < tmp))
+		if ((param->min > tmp) || (param->max < tmp))
 			return -EINVAL;
 		*valp = tmp;
 	}
@@ -1269,10 +1230,10 @@ int proc_dointvec_jiffies(const struct ctl_table *table, int write,
 int proc_dointvec_ms_jiffies_minmax(const struct ctl_table *table, int write,
 			  void *buffer, size_t *lenp, loff_t *ppos)
 {
-	struct do_proc_dointvec_minmax_conv_param param = {
-		.min = (int *) table->extra1,
-		.max = (int *) table->extra2,
-	};
+	struct do_proc_dointvec_minmax_conv_param param;
+
+	param.min = (table->extra1) ? *(int *) table->extra1 : INT_MIN;
+	param.max = (table->extra2) ? *(int *) table->extra2 : INT_MAX;
 	return do_proc_dointvec(table, write, buffer, lenp, ppos,
 			do_proc_dointvec_ms_jiffies_minmax_conv, &param);
 }
-- 
2.25.1
Re: [PATCH v5] sysctl: simplify the min/max boundary check
Posted by Joel Granados 11 months ago
On Sun, Jan 05, 2025 at 11:28:53PM +0800, Wen Yang wrote:
> do_proc_dointvec_conv() used the default range of int type, while
> do_proc_dointvec_minmax_conv() additionally used "int * {min, max}" of
> struct do_proc_dointvec_minmax_conv_param, which are actually passed
> in table->extra{1,2} pointers.
> 
> By changing the struct do_proc_dointvec_minmax_conv_param's
> "int * {min, max}" to "int {min, max}" and setting the min/max value
> via dereference the table->extra{1, 2}, those do_proc_dointvec_conv() and
> do_proc_dointvec_minmax_conv() functions can be merged into one and reduce
> code by one-third.
> 
> Similar changes were also made for do_proc_douintvec_minmax_conv_param,
> do_proc_douintvec_conv() and do_proc_douintvec_minmax_conv().
> 
Before moving forward, I need the motivation.

Please answer this:
*Why* are you pushing these two [1], [2] series?

Best

[1]
  * https://lore.kernel.org/tencent_A7FD9E9E75AA74A87583A427F5D20F988B09@qq.com
  * https://lore.kernel.org/tencent_C5E6023F97E7CC2A046AAEA09BC9ACF43907@qq.com
  * https://lore.kernel.org/cover.1726365007.git.wen.yang@linux.dev
  * https://lore.kernel.org/cover.1726910671.git.wen.yang@linux.dev

[2]
  * https://lore.kernel.org/20240714173858.52163-1-wen.yang@linux.dev
  * https://lore.kernel.org/20240715150223.54707-1-wen.yang@linux.dev
  * https://lore.kernel.org/20240905134818.4104-1-wen.yang@linux.dev
  * https://lore.kernel.org/20241201140058.5653-1-wen.yang@linux.dev

> Selftest were also made:
> 
> [23:16:38] ================ sysctl_test (11 subtests) =================
> [23:16:38] [PASSED] sysctl_test_api_dointvec_null_tbl_data
> [23:16:38] [PASSED] sysctl_test_api_dointvec_table_maxlen_unset
> [23:16:38] [PASSED] sysctl_test_api_dointvec_table_len_is_zero
> [23:16:38] [PASSED] sysctl_test_api_dointvec_table_read_but_position_set
> [23:16:38] [PASSED] sysctl_test_dointvec_read_happy_single_positive
> [23:16:38] [PASSED] sysctl_test_dointvec_read_happy_single_negative
> [23:16:38] [PASSED] sysctl_test_dointvec_write_happy_single_positive
> [23:16:38] [PASSED] sysctl_test_dointvec_write_happy_single_negative
> [23:16:38] [PASSED] sysctl_test_api_dointvec_write_single_less_int_min
> [23:16:38] [PASSED] sysctl_test_api_dointvec_write_single_greater_int_max
> [23:16:38] [PASSED] sysctl_test_register_sysctl_sz_invalid_extra_value
> [23:16:38] =================== [PASSED] sysctl_test ===================
> [23:16:38] ============================================================
> [23:16:38] Testing complete. Ran 11 tests: passed: 11
> 
> Signed-off-by: Wen Yang <wen.yang@linux.dev>
> Cc: Joel Granados <joel.granados@kernel.org>
> Cc: Luis Chamberlain <mcgrof@kernel.org>
> Cc: Kees Cook <keescook@chromium.org>
> Cc: Eric W. Biederman <ebiederm@xmission.com>
> Cc: Christian Brauner <brauner@kernel.org>
> Cc: Dave Young <dyoung@redhat.com>
> Cc: linux-kernel@vger.kernel.org
> ---
>  kernel/sysctl.c | 211 ++++++++++++++++++++----------------------------
>  1 file changed, 86 insertions(+), 125 deletions(-)
> 
> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> index 7ae7a4136855..250dc9057259 100644
> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> @@ -424,22 +424,44 @@ static void proc_put_char(void **buf, size_t *size, char c)
>  	}
>  }
>  
> -static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
> -				 int *valp,
> -				 int write, void *data)
> +/**
> + * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure
> + * @min: the minimum allowable value
> + * @max: the maximum allowable value
> + *
> + * The do_proc_dointvec_minmax_conv_param structure provides the
> + * minimum and maximum values for doing range checking for those sysctl
> + * parameters that use the proc_dointvec_minmax(), proc_dou8vec_minmax() and so on.
> + */
> +struct do_proc_dointvec_minmax_conv_param {
> +	int min;
> +	int max;
> +};
> +
> +static inline int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
> +		int *valp,
> +		int write, void *data)
>  {
> +	struct do_proc_dointvec_minmax_conv_param *param = data;
> +	long min, max;
> +	long val;
> +
>  	if (write) {
>  		if (*negp) {
> -			if (*lvalp > (unsigned long) INT_MAX + 1)
> -				return -EINVAL;
> -			WRITE_ONCE(*valp, -*lvalp);
> +			max = param ? param->max : 0;
> +			min = param ? param->min : INT_MIN;
> +			val = -*lvalp;
>  		} else {
> -			if (*lvalp > (unsigned long) INT_MAX)
> -				return -EINVAL;
> -			WRITE_ONCE(*valp, *lvalp);
> +			max = param ? param->max : INT_MAX;
> +			min = param ? param->min : 0;
> +			val = *lvalp;
>  		}
> +
> +		if (val > max || val < min)
> +			return -EINVAL;
> +		WRITE_ONCE(*valp, (int)val);
>  	} else {
> -		int val = READ_ONCE(*valp);
> +		val = (int)READ_ONCE(*valp);
>  		if (val < 0) {
>  			*negp = true;
>  			*lvalp = -(unsigned long)val;
> @@ -448,21 +470,44 @@ static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
>  			*lvalp = (unsigned long)val;
>  		}
>  	}
> +
>  	return 0;
>  }
>  
> -static int do_proc_douintvec_conv(unsigned long *lvalp,
> -				  unsigned int *valp,
> -				  int write, void *data)
> +/**
> + * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure
> + * @min: minimum allowable value
> + * @max: maximum allowable value
> + *
> + * The do_proc_douintvec_minmax_conv_param structure provides the
> + * minimum and maximum values for doing range checking for those sysctl
> + * parameters that use the proc_douintvec_minmax() handler.
> + */
> +struct do_proc_douintvec_minmax_conv_param {
> +	unsigned int min;
> +	unsigned int max;
> +};
> +
> +static inline int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
> +					 unsigned int *valp,
> +					 int write, void *data)
>  {
> +	struct do_proc_douintvec_minmax_conv_param *param = data;
> +	unsigned long min, max;
> +
>  	if (write) {
> -		if (*lvalp > UINT_MAX)
> +		max = param ? param->max : UINT_MAX;
> +		min = param ? param->min : 0;
> +
> +		if (*lvalp > max || *lvalp < min)
>  			return -EINVAL;
> -		WRITE_ONCE(*valp, *lvalp);
> +
> +		WRITE_ONCE(*valp, (unsigned int)*lvalp);
>  	} else {
>  		unsigned int val = READ_ONCE(*valp);
>  		*lvalp = (unsigned long)val;
>  	}
> +
>  	return 0;
>  }
>  
> @@ -489,7 +534,7 @@ static int __do_proc_dointvec(void *tbl_data, const struct ctl_table *table,
>  	left = *lenp;
>  
>  	if (!conv)
> -		conv = do_proc_dointvec_conv;
> +		conv = do_proc_dointvec_minmax_conv;
>  
>  	if (write) {
>  		if (proc_first_pos_non_zero_ignore(ppos, table))
> @@ -667,7 +712,7 @@ static int __do_proc_douintvec(void *tbl_data, const struct ctl_table *table,
>  	}
>  
>  	if (!conv)
> -		conv = do_proc_douintvec_conv;
> +		conv = do_proc_douintvec_minmax_conv;
>  
>  	if (write)
>  		return do_proc_douintvec_w(i, table, buffer, lenp, ppos,
> @@ -762,7 +807,7 @@ int proc_douintvec(const struct ctl_table *table, int write, void *buffer,
>  		size_t *lenp, loff_t *ppos)
>  {
>  	return do_proc_douintvec(table, write, buffer, lenp, ppos,
> -				 do_proc_douintvec_conv, NULL);
> +				 do_proc_douintvec_minmax_conv, NULL);
>  }
>  
>  /*
> @@ -808,46 +853,6 @@ static int proc_taint(const struct ctl_table *table, int write,
>  	return err;
>  }
>  
> -/**
> - * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure
> - * @min: pointer to minimum allowable value
> - * @max: pointer to maximum allowable value
> - *
> - * The do_proc_dointvec_minmax_conv_param structure provides the
> - * minimum and maximum values for doing range checking for those sysctl
> - * parameters that use the proc_dointvec_minmax() handler.
> - */
> -struct do_proc_dointvec_minmax_conv_param {
> -	int *min;
> -	int *max;
> -};
> -
> -static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
> -					int *valp,
> -					int write, void *data)
> -{
> -	int tmp, ret;
> -	struct do_proc_dointvec_minmax_conv_param *param = data;
> -	/*
> -	 * If writing, first do so via a temporary local int so we can
> -	 * bounds-check it before touching *valp.
> -	 */
> -	int *ip = write ? &tmp : valp;
> -
> -	ret = do_proc_dointvec_conv(negp, lvalp, ip, write, data);
> -	if (ret)
> -		return ret;
> -
> -	if (write) {
> -		if ((param->min && *param->min > tmp) ||
> -		    (param->max && *param->max < tmp))
> -			return -EINVAL;
> -		WRITE_ONCE(*valp, tmp);
> -	}
> -
> -	return 0;
> -}
> -
>  /**
>   * proc_dointvec_minmax - read a vector of integers with min/max values
>   * @table: the sysctl table
> @@ -867,53 +872,14 @@ static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
>  int proc_dointvec_minmax(const struct ctl_table *table, int write,
>  		  void *buffer, size_t *lenp, loff_t *ppos)
>  {
> -	struct do_proc_dointvec_minmax_conv_param param = {
> -		.min = (int *) table->extra1,
> -		.max = (int *) table->extra2,
> -	};
> +	struct do_proc_dointvec_minmax_conv_param param;
> +
> +	param.min = (table->extra1) ? *(int *) table->extra1 : INT_MIN;
> +	param.max = (table->extra2) ? *(int *) table->extra2 : INT_MAX;
>  	return do_proc_dointvec(table, write, buffer, lenp, ppos,
>  				do_proc_dointvec_minmax_conv, &param);
>  }
>  
> -/**
> - * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure
> - * @min: pointer to minimum allowable value
> - * @max: pointer to maximum allowable value
> - *
> - * The do_proc_douintvec_minmax_conv_param structure provides the
> - * minimum and maximum values for doing range checking for those sysctl
> - * parameters that use the proc_douintvec_minmax() handler.
> - */
> -struct do_proc_douintvec_minmax_conv_param {
> -	unsigned int *min;
> -	unsigned int *max;
> -};
> -
> -static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
> -					 unsigned int *valp,
> -					 int write, void *data)
> -{
> -	int ret;
> -	unsigned int tmp;
> -	struct do_proc_douintvec_minmax_conv_param *param = data;
> -	/* write via temporary local uint for bounds-checking */
> -	unsigned int *up = write ? &tmp : valp;
> -
> -	ret = do_proc_douintvec_conv(lvalp, up, write, data);
> -	if (ret)
> -		return ret;
> -
> -	if (write) {
> -		if ((param->min && *param->min > tmp) ||
> -		    (param->max && *param->max < tmp))
> -			return -ERANGE;
> -
> -		WRITE_ONCE(*valp, tmp);
> -	}
> -
> -	return 0;
> -}
> -
>  /**
>   * proc_douintvec_minmax - read a vector of unsigned ints with min/max values
>   * @table: the sysctl table
> @@ -936,10 +902,11 @@ static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
>  int proc_douintvec_minmax(const struct ctl_table *table, int write,
>  			  void *buffer, size_t *lenp, loff_t *ppos)
>  {
> -	struct do_proc_douintvec_minmax_conv_param param = {
> -		.min = (unsigned int *) table->extra1,
> -		.max = (unsigned int *) table->extra2,
> -	};
> +	struct do_proc_douintvec_minmax_conv_param param;
> +
> +	param.min = (table->extra1) ? *(unsigned int *) table->extra1 : 0;
> +	param.max = (table->extra2) ? *(unsigned int *) table->extra2 : UINT_MAX;
> +
>  	return do_proc_douintvec(table, write, buffer, lenp, ppos,
>  				 do_proc_douintvec_minmax_conv, &param);
>  }
> @@ -965,23 +932,17 @@ int proc_dou8vec_minmax(const struct ctl_table *table, int write,
>  			void *buffer, size_t *lenp, loff_t *ppos)
>  {
>  	struct ctl_table tmp;
> -	unsigned int min = 0, max = 255U, val;
> +	unsigned int val;
>  	u8 *data = table->data;
> -	struct do_proc_douintvec_minmax_conv_param param = {
> -		.min = &min,
> -		.max = &max,
> -	};
> +	struct do_proc_douintvec_minmax_conv_param param;
>  	int res;
>  
>  	/* Do not support arrays yet. */
>  	if (table->maxlen != sizeof(u8))
>  		return -EINVAL;
>  
> -	if (table->extra1)
> -		min = *(unsigned int *) table->extra1;
> -	if (table->extra2)
> -		max = *(unsigned int *) table->extra2;
> -
> +	param.min = (table->extra1) ? *(unsigned int *) table->extra1 : 0;
> +	param.max = (table->extra2) ? *(unsigned int *) table->extra2 : 255U;
>  	tmp = *table;
>  
>  	tmp.maxlen = sizeof(val);
> @@ -1022,7 +983,7 @@ static int __do_proc_doulongvec_minmax(void *data,
>  		void *buffer, size_t *lenp, loff_t *ppos,
>  		unsigned long convmul, unsigned long convdiv)
>  {
> -	unsigned long *i, *min, *max;
> +	unsigned long *i, min, max;
>  	int vleft, first = 1, err = 0;
>  	size_t left;
>  	char *p;
> @@ -1033,8 +994,9 @@ static int __do_proc_doulongvec_minmax(void *data,
>  	}
>  
>  	i = data;
> -	min = table->extra1;
> -	max = table->extra2;
> +	min = (table->extra1) ? *(unsigned long *) table->extra1 : 0;
> +	max = (table->extra2) ? *(unsigned long *) table->extra2 : ULONG_MAX;
> +
>  	vleft = table->maxlen / sizeof(unsigned long);
>  	left = *lenp;
>  
> @@ -1066,7 +1028,7 @@ static int __do_proc_doulongvec_minmax(void *data,
>  			}
>  
>  			val = convmul * val / convdiv;
> -			if ((min && val < *min) || (max && val > *max)) {
> +			if ((val < min) || (val > max)) {
>  				err = -EINVAL;
>  				break;
>  			}
> @@ -1236,8 +1198,7 @@ static int do_proc_dointvec_ms_jiffies_minmax_conv(bool *negp, unsigned long *lv
>  		return ret;
>  
>  	if (write) {
> -		if ((param->min && *param->min > tmp) ||
> -				(param->max && *param->max < tmp))
> +		if ((param->min > tmp) || (param->max < tmp))
>  			return -EINVAL;
>  		*valp = tmp;
>  	}
> @@ -1269,10 +1230,10 @@ int proc_dointvec_jiffies(const struct ctl_table *table, int write,
>  int proc_dointvec_ms_jiffies_minmax(const struct ctl_table *table, int write,
>  			  void *buffer, size_t *lenp, loff_t *ppos)
>  {
> -	struct do_proc_dointvec_minmax_conv_param param = {
> -		.min = (int *) table->extra1,
> -		.max = (int *) table->extra2,
> -	};
> +	struct do_proc_dointvec_minmax_conv_param param;
> +
> +	param.min = (table->extra1) ? *(int *) table->extra1 : INT_MIN;
> +	param.max = (table->extra2) ? *(int *) table->extra2 : INT_MAX;
>  	return do_proc_dointvec(table, write, buffer, lenp, ppos,
>  			do_proc_dointvec_ms_jiffies_minmax_conv, &param);
>  }
> -- 
> 2.25.1
> 

-- 

Joel Granados
Re: [PATCH v5] sysctl: simplify the min/max boundary check
Posted by Wen Yang 11 months ago

On 2025/1/16 17:37, Joel Granados wrote:
> On Sun, Jan 05, 2025 at 11:28:53PM +0800, Wen Yang wrote:
>> do_proc_dointvec_conv() used the default range of int type, while
>> do_proc_dointvec_minmax_conv() additionally used "int * {min, max}" of
>> struct do_proc_dointvec_minmax_conv_param, which are actually passed
>> in table->extra{1,2} pointers.
>>
>> By changing the struct do_proc_dointvec_minmax_conv_param's
>> "int * {min, max}" to "int {min, max}" and setting the min/max value
>> via dereference the table->extra{1, 2}, those do_proc_dointvec_conv() and
>> do_proc_dointvec_minmax_conv() functions can be merged into one and reduce
>> code by one-third.
>>
>> Similar changes were also made for do_proc_douintvec_minmax_conv_param,
>> do_proc_douintvec_conv() and do_proc_douintvec_minmax_conv().
>>
> Before moving forward, I need the motivation.
> 
> Please answer this:
> *Why* are you pushing these two [1], [2] series?
> 
> Best
> 
> [1]
>    * https://lore.kernel.org/tencent_A7FD9E9E75AA74A87583A427F5D20F988B09@qq.com
>    * https://lore.kernel.org/tencent_C5E6023F97E7CC2A046AAEA09BC9ACF43907@qq.com
>    * https://lore.kernel.org/cover.1726365007.git.wen.yang@linux.dev
>    * https://lore.kernel.org/cover.1726910671.git.wen.yang@linux.dev
> 
> [2]
>    * https://lore.kernel.org/20240714173858.52163-1-wen.yang@linux.dev
>    * https://lore.kernel.org/20240715150223.54707-1-wen.yang@linux.dev
>    * https://lore.kernel.org/20240905134818.4104-1-wen.yang@linux.dev
>    * https://lore.kernel.org/20241201140058.5653-1-wen.yang@linux.dev
> 

Thank you for your comments.

Here is a brief report about it.

The boundary check of sysctl uses .extra{1, 2} pointers, which need to 
point to constant variables (such as two_five_five, n_65535, ue_int_max, 
etc. that appear in multiple modules).

We first try to stuff these variables into the shared const arrays ( 
(sysctl_vals, sysctl_long_vals) to save memory, as shown in series 1.

Thank Eric for pointing out:
- You can still save a lot more by turning .extra1 and .extra2
- into longs instead of keeping them as pointers and needing
- constants to be pointed at somewhere.

We have further found more detailed information about "kill the shared 
const array":

https://lkml.kernel.org/87zgpte9o4.fsf@email.froward.int.ebiederm.org

67ff32289 ("proc_sysctl: update docs for __register_sysctl_table()")


So we tried some ways to achieve it, as shown in series 2.

Step 1: The extra{1,2} pointer is used as {min, max} pointers in the 
do_proc_do{int, uint}vec_minmax_conv_param structures. By changing the 
{min, max} pointers to numeric values, the code is simplified and 
preparations are made for the second step.

Step 2: Add some sysctl helper functions to avoid direct access to
table->extra1/extra2, and also support encoding values directly in the 
table entry.

https://lore.kernel.org/all/0f4a5233b75e6484a6236d85d2b506c96ea41ef1.1726910671.git.wen.yang@linux.dev/

https://lore.kernel.org/all/a086609632328c2feee69b10067e43115885b2ae.1726910671.git.wen.yang@linux.dev/


Step 3: gradually remove the extra constant variables in multiple modules.

https://lore.kernel.org/all/5f753895a5f31cac65ddeb56b58fc17553785163.1726910671.git.wen.yang@linux.dev/

https://lore.kernel.org/all/20d67551ed7fa9c774d2b128ad9bc298a0a55c9d.1726910671.git.wen.yang@linux.dev/



--
Best wishes,
Wen




>> Selftest were also made:
>>
>> [23:16:38] ================ sysctl_test (11 subtests) =================
>> [23:16:38] [PASSED] sysctl_test_api_dointvec_null_tbl_data
>> [23:16:38] [PASSED] sysctl_test_api_dointvec_table_maxlen_unset
>> [23:16:38] [PASSED] sysctl_test_api_dointvec_table_len_is_zero
>> [23:16:38] [PASSED] sysctl_test_api_dointvec_table_read_but_position_set
>> [23:16:38] [PASSED] sysctl_test_dointvec_read_happy_single_positive
>> [23:16:38] [PASSED] sysctl_test_dointvec_read_happy_single_negative
>> [23:16:38] [PASSED] sysctl_test_dointvec_write_happy_single_positive
>> [23:16:38] [PASSED] sysctl_test_dointvec_write_happy_single_negative
>> [23:16:38] [PASSED] sysctl_test_api_dointvec_write_single_less_int_min
>> [23:16:38] [PASSED] sysctl_test_api_dointvec_write_single_greater_int_max
>> [23:16:38] [PASSED] sysctl_test_register_sysctl_sz_invalid_extra_value
>> [23:16:38] =================== [PASSED] sysctl_test ===================
>> [23:16:38] ============================================================
>> [23:16:38] Testing complete. Ran 11 tests: passed: 11
>>
>> Signed-off-by: Wen Yang <wen.yang@linux.dev>
>> Cc: Joel Granados <joel.granados@kernel.org>
>> Cc: Luis Chamberlain <mcgrof@kernel.org>
>> Cc: Kees Cook <keescook@chromium.org>
>> Cc: Eric W. Biederman <ebiederm@xmission.com>
>> Cc: Christian Brauner <brauner@kernel.org>
>> Cc: Dave Young <dyoung@redhat.com>
>> Cc: linux-kernel@vger.kernel.org
>> ---
>>   kernel/sysctl.c | 211 ++++++++++++++++++++----------------------------
>>   1 file changed, 86 insertions(+), 125 deletions(-)
>>
>> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
>> index 7ae7a4136855..250dc9057259 100644
>> --- a/kernel/sysctl.c
>> +++ b/kernel/sysctl.c
>> @@ -424,22 +424,44 @@ static void proc_put_char(void **buf, size_t *size, char c)
>>   	}
>>   }
>>   
>> -static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
>> -				 int *valp,
>> -				 int write, void *data)
>> +/**
>> + * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure
>> + * @min: the minimum allowable value
>> + * @max: the maximum allowable value
>> + *
>> + * The do_proc_dointvec_minmax_conv_param structure provides the
>> + * minimum and maximum values for doing range checking for those sysctl
>> + * parameters that use the proc_dointvec_minmax(), proc_dou8vec_minmax() and so on.
>> + */
>> +struct do_proc_dointvec_minmax_conv_param {
>> +	int min;
>> +	int max;
>> +};
>> +
>> +static inline int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
>> +		int *valp,
>> +		int write, void *data)
>>   {
>> +	struct do_proc_dointvec_minmax_conv_param *param = data;
>> +	long min, max;
>> +	long val;
>> +
>>   	if (write) {
>>   		if (*negp) {
>> -			if (*lvalp > (unsigned long) INT_MAX + 1)
>> -				return -EINVAL;
>> -			WRITE_ONCE(*valp, -*lvalp);
>> +			max = param ? param->max : 0;
>> +			min = param ? param->min : INT_MIN;
>> +			val = -*lvalp;
>>   		} else {
>> -			if (*lvalp > (unsigned long) INT_MAX)
>> -				return -EINVAL;
>> -			WRITE_ONCE(*valp, *lvalp);
>> +			max = param ? param->max : INT_MAX;
>> +			min = param ? param->min : 0;
>> +			val = *lvalp;
>>   		}
>> +
>> +		if (val > max || val < min)
>> +			return -EINVAL;
>> +		WRITE_ONCE(*valp, (int)val);
>>   	} else {
>> -		int val = READ_ONCE(*valp);
>> +		val = (int)READ_ONCE(*valp);
>>   		if (val < 0) {
>>   			*negp = true;
>>   			*lvalp = -(unsigned long)val;
>> @@ -448,21 +470,44 @@ static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
>>   			*lvalp = (unsigned long)val;
>>   		}
>>   	}
>> +
>>   	return 0;
>>   }
>>   
>> -static int do_proc_douintvec_conv(unsigned long *lvalp,
>> -				  unsigned int *valp,
>> -				  int write, void *data)
>> +/**
>> + * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure
>> + * @min: minimum allowable value
>> + * @max: maximum allowable value
>> + *
>> + * The do_proc_douintvec_minmax_conv_param structure provides the
>> + * minimum and maximum values for doing range checking for those sysctl
>> + * parameters that use the proc_douintvec_minmax() handler.
>> + */
>> +struct do_proc_douintvec_minmax_conv_param {
>> +	unsigned int min;
>> +	unsigned int max;
>> +};
>> +
>> +static inline int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
>> +					 unsigned int *valp,
>> +					 int write, void *data)
>>   {
>> +	struct do_proc_douintvec_minmax_conv_param *param = data;
>> +	unsigned long min, max;
>> +
>>   	if (write) {
>> -		if (*lvalp > UINT_MAX)
>> +		max = param ? param->max : UINT_MAX;
>> +		min = param ? param->min : 0;
>> +
>> +		if (*lvalp > max || *lvalp < min)
>>   			return -EINVAL;
>> -		WRITE_ONCE(*valp, *lvalp);
>> +
>> +		WRITE_ONCE(*valp, (unsigned int)*lvalp);
>>   	} else {
>>   		unsigned int val = READ_ONCE(*valp);
>>   		*lvalp = (unsigned long)val;
>>   	}
>> +
>>   	return 0;
>>   }
>>   
>> @@ -489,7 +534,7 @@ static int __do_proc_dointvec(void *tbl_data, const struct ctl_table *table,
>>   	left = *lenp;
>>   
>>   	if (!conv)
>> -		conv = do_proc_dointvec_conv;
>> +		conv = do_proc_dointvec_minmax_conv;
>>   
>>   	if (write) {
>>   		if (proc_first_pos_non_zero_ignore(ppos, table))
>> @@ -667,7 +712,7 @@ static int __do_proc_douintvec(void *tbl_data, const struct ctl_table *table,
>>   	}
>>   
>>   	if (!conv)
>> -		conv = do_proc_douintvec_conv;
>> +		conv = do_proc_douintvec_minmax_conv;
>>   
>>   	if (write)
>>   		return do_proc_douintvec_w(i, table, buffer, lenp, ppos,
>> @@ -762,7 +807,7 @@ int proc_douintvec(const struct ctl_table *table, int write, void *buffer,
>>   		size_t *lenp, loff_t *ppos)
>>   {
>>   	return do_proc_douintvec(table, write, buffer, lenp, ppos,
>> -				 do_proc_douintvec_conv, NULL);
>> +				 do_proc_douintvec_minmax_conv, NULL);
>>   }
>>   
>>   /*
>> @@ -808,46 +853,6 @@ static int proc_taint(const struct ctl_table *table, int write,
>>   	return err;
>>   }
>>   
>> -/**
>> - * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure
>> - * @min: pointer to minimum allowable value
>> - * @max: pointer to maximum allowable value
>> - *
>> - * The do_proc_dointvec_minmax_conv_param structure provides the
>> - * minimum and maximum values for doing range checking for those sysctl
>> - * parameters that use the proc_dointvec_minmax() handler.
>> - */
>> -struct do_proc_dointvec_minmax_conv_param {
>> -	int *min;
>> -	int *max;
>> -};
>> -
>> -static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
>> -					int *valp,
>> -					int write, void *data)
>> -{
>> -	int tmp, ret;
>> -	struct do_proc_dointvec_minmax_conv_param *param = data;
>> -	/*
>> -	 * If writing, first do so via a temporary local int so we can
>> -	 * bounds-check it before touching *valp.
>> -	 */
>> -	int *ip = write ? &tmp : valp;
>> -
>> -	ret = do_proc_dointvec_conv(negp, lvalp, ip, write, data);
>> -	if (ret)
>> -		return ret;
>> -
>> -	if (write) {
>> -		if ((param->min && *param->min > tmp) ||
>> -		    (param->max && *param->max < tmp))
>> -			return -EINVAL;
>> -		WRITE_ONCE(*valp, tmp);
>> -	}
>> -
>> -	return 0;
>> -}
>> -
>>   /**
>>    * proc_dointvec_minmax - read a vector of integers with min/max values
>>    * @table: the sysctl table
>> @@ -867,53 +872,14 @@ static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
>>   int proc_dointvec_minmax(const struct ctl_table *table, int write,
>>   		  void *buffer, size_t *lenp, loff_t *ppos)
>>   {
>> -	struct do_proc_dointvec_minmax_conv_param param = {
>> -		.min = (int *) table->extra1,
>> -		.max = (int *) table->extra2,
>> -	};
>> +	struct do_proc_dointvec_minmax_conv_param param;
>> +
>> +	param.min = (table->extra1) ? *(int *) table->extra1 : INT_MIN;
>> +	param.max = (table->extra2) ? *(int *) table->extra2 : INT_MAX;
>>   	return do_proc_dointvec(table, write, buffer, lenp, ppos,
>>   				do_proc_dointvec_minmax_conv, &param);
>>   }
>>   
>> -/**
>> - * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure
>> - * @min: pointer to minimum allowable value
>> - * @max: pointer to maximum allowable value
>> - *
>> - * The do_proc_douintvec_minmax_conv_param structure provides the
>> - * minimum and maximum values for doing range checking for those sysctl
>> - * parameters that use the proc_douintvec_minmax() handler.
>> - */
>> -struct do_proc_douintvec_minmax_conv_param {
>> -	unsigned int *min;
>> -	unsigned int *max;
>> -};
>> -
>> -static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
>> -					 unsigned int *valp,
>> -					 int write, void *data)
>> -{
>> -	int ret;
>> -	unsigned int tmp;
>> -	struct do_proc_douintvec_minmax_conv_param *param = data;
>> -	/* write via temporary local uint for bounds-checking */
>> -	unsigned int *up = write ? &tmp : valp;
>> -
>> -	ret = do_proc_douintvec_conv(lvalp, up, write, data);
>> -	if (ret)
>> -		return ret;
>> -
>> -	if (write) {
>> -		if ((param->min && *param->min > tmp) ||
>> -		    (param->max && *param->max < tmp))
>> -			return -ERANGE;
>> -
>> -		WRITE_ONCE(*valp, tmp);
>> -	}
>> -
>> -	return 0;
>> -}
>> -
>>   /**
>>    * proc_douintvec_minmax - read a vector of unsigned ints with min/max values
>>    * @table: the sysctl table
>> @@ -936,10 +902,11 @@ static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
>>   int proc_douintvec_minmax(const struct ctl_table *table, int write,
>>   			  void *buffer, size_t *lenp, loff_t *ppos)
>>   {
>> -	struct do_proc_douintvec_minmax_conv_param param = {
>> -		.min = (unsigned int *) table->extra1,
>> -		.max = (unsigned int *) table->extra2,
>> -	};
>> +	struct do_proc_douintvec_minmax_conv_param param;
>> +
>> +	param.min = (table->extra1) ? *(unsigned int *) table->extra1 : 0;
>> +	param.max = (table->extra2) ? *(unsigned int *) table->extra2 : UINT_MAX;
>> +
>>   	return do_proc_douintvec(table, write, buffer, lenp, ppos,
>>   				 do_proc_douintvec_minmax_conv, &param);
>>   }
>> @@ -965,23 +932,17 @@ int proc_dou8vec_minmax(const struct ctl_table *table, int write,
>>   			void *buffer, size_t *lenp, loff_t *ppos)
>>   {
>>   	struct ctl_table tmp;
>> -	unsigned int min = 0, max = 255U, val;
>> +	unsigned int val;
>>   	u8 *data = table->data;
>> -	struct param = {
>> -		.min = &min,
>> -		.max = &max,
>> -	};
>> +	struct do_proc_douintvec_minmax_conv_param param;
>>   	int res;
>>   
>>   	/* Do not support arrays yet. */
>>   	if (table->maxlen != sizeof(u8))
>>   		return -EINVAL;
>>   
>> -	if (table->extra1)
>> -		min = *(unsigned int *) table->extra1;
>> -	if (table->extra2)
>> -		max = *(unsigned int *) table->extra2;
>> -
>> +	param.min = (table->extra1) ? *(unsigned int *) table->extra1 : 0;
>> +	param.max = (table->extra2) ? *(unsigned int *) table->extra2 : 255U;
>>   	tmp = *table;
>>   
>>   	tmp.maxlen = sizeof(val);
>> @@ -1022,7 +983,7 @@ static int __do_proc_doulongvec_minmax(void *data,
>>   		void *buffer, size_t *lenp, loff_t *ppos,
>>   		unsigned long convmul, unsigned long convdiv)
>>   {
>> -	unsigned long *i, *min, *max;
>> +	unsigned long *i, min, max;
>>   	int vleft, first = 1, err = 0;
>>   	size_t left;
>>   	char *p;
>> @@ -1033,8 +994,9 @@ static int __do_proc_doulongvec_minmax(void *data,
>>   	}
>>   
>>   	i = data;
>> -	min = table->extra1;
>> -	max = table->extra2;
>> +	min = (table->extra1) ? *(unsigned long *) table->extra1 : 0;
>> +	max = (table->extra2) ? *(unsigned long *) table->extra2 : ULONG_MAX;
>> +
>>   	vleft = table->maxlen / sizeof(unsigned long);
>>   	left = *lenp;
>>   
>> @@ -1066,7 +1028,7 @@ static int __do_proc_doulongvec_minmax(void *data,
>>   			}
>>   
>>   			val = convmul * val / convdiv;
>> -			if ((min && val < *min) || (max && val > *max)) {
>> +			if ((val < min) || (val > max)) {
>>   				err = -EINVAL;
>>   				break;
>>   			}
>> @@ -1236,8 +1198,7 @@ static int do_proc_dointvec_ms_jiffies_minmax_conv(bool *negp, unsigned long *lv
>>   		return ret;
>>   
>>   	if (write) {
>> -		if ((param->min && *param->min > tmp) ||
>> -				(param->max && *param->max < tmp))
>> +		if ((param->min > tmp) || (param->max < tmp))
>>   			return -EINVAL;
>>   		*valp = tmp;
>>   	}
>> @@ -1269,10 +1230,10 @@ int proc_dointvec_jiffies(const struct ctl_table *table, int write,
>>   int proc_dointvec_ms_jiffies_minmax(const struct ctl_table *table, int write,
>>   			  void *buffer, size_t *lenp, loff_t *ppos)
>>   {
>> -	struct do_proc_dointvec_minmax_conv_param param = {
>> -		.min = (int *) table->extra1,
>> -		.max = (int *) table->extra2,
>> -	};
>> +	struct do_proc_dointvec_minmax_conv_param param;
>> +
>> +	param.min = (table->extra1) ? *(int *) table->extra1 : INT_MIN;
>> +	param.max = (table->extra2) ? *(int *) table->extra2 : INT_MAX;
>>   	return do_proc_dointvec(table, write, buffer, lenp, ppos,
>>   			do_proc_dointvec_ms_jiffies_minmax_conv, &param);
>>   }
>> -- 
>> 2.25.1
>>
>
Re: Re: [PATCH v5] sysctl: simplify the min/max boundary check
Posted by Joel Granados 10 months, 4 weeks ago
On Sun, Jan 19, 2025 at 10:59:21PM +0800, Wen Yang wrote:
> 
> 
> On 2025/1/16 17:37, Joel Granados wrote:
> > On Sun, Jan 05, 2025 at 11:28:53PM +0800, Wen Yang wrote:
> > > do_proc_dointvec_conv() used the default range of int type, while
> > > do_proc_dointvec_minmax_conv() additionally used "int * {min, max}" of
> > > struct do_proc_dointvec_minmax_conv_param, which are actually passed
> > > in table->extra{1,2} pointers.
> > > 
> > > By changing the struct do_proc_dointvec_minmax_conv_param's
> > > "int * {min, max}" to "int {min, max}" and setting the min/max value
> > > via dereference the table->extra{1, 2}, those do_proc_dointvec_conv() and
> > > do_proc_dointvec_minmax_conv() functions can be merged into one and reduce
> > > code by one-third.
> > > 
> > > Similar changes were also made for do_proc_douintvec_minmax_conv_param,
> > > do_proc_douintvec_conv() and do_proc_douintvec_minmax_conv().
> > > 
> > Before moving forward, I need the motivation.
> > 
> > Please answer this:
> > *Why* are you pushing these two [1], [2] series?
> > 
> > Best
> > 
> > [1]
> >    * https://lore.kernel.org/tencent_A7FD9E9E75AA74A87583A427F5D20F988B09@qq.com
> >    * https://lore.kernel.org/tencent_C5E6023F97E7CC2A046AAEA09BC9ACF43907@qq.com
> >    * https://lore.kernel.org/cover.1726365007.git.wen.yang@linux.dev
> >    * https://lore.kernel.org/cover.1726910671.git.wen.yang@linux.dev
> > 
> > [2]
> >    * https://lore.kernel.org/20240714173858.52163-1-wen.yang@linux.dev
> >    * https://lore.kernel.org/20240715150223.54707-1-wen.yang@linux.dev
> >    * https://lore.kernel.org/20240905134818.4104-1-wen.yang@linux.dev
> >    * https://lore.kernel.org/20241201140058.5653-1-wen.yang@linux.dev
> > 
> 
> Thank you for your comments.
> 
> Here is a brief report about it.
> 
> The boundary check of sysctl uses .extra{1, 2} pointers, which need to point
> to constant variables (such as two_five_five, n_65535, ue_int_max, etc. that
> appear in multiple modules).
> 
> We first try to stuff these variables into the shared const arrays (
> (sysctl_vals, sysctl_long_vals) to save memory, as shown in series 1.
> 
> Thank Eric for pointing out:
> - You can still save a lot more by turning .extra1 and .extra2
> - into longs instead of keeping them as pointers and needing
> - constants to be pointed at somewhere.
> 
> We have further found more detailed information about "kill the shared const
> array":
> 
> https://lkml.kernel.org/87zgpte9o4.fsf@email.froward.int.ebiederm.org
> 
> 67ff32289 ("proc_sysctl: update docs for __register_sysctl_table()")

Hey Wen Yang

Thx for that additional explanation.

So I understand that you want to get rid of the sysctl_vals shared const
array by replacing the extra{1,2} from void* to long; all this to *save
memory*. Right (please correct me if I have misunderstood you)?

I don't think that by doing this replacement you will save much memory
(if any). And this is why:
1. The long and the void* are most likely (depending on arch?) the same
   size.
2. In [1] it is mentioned that, we would still need an extra (void*) to
   address the sysctl tables that are *NOT* using extra{1,2} as min max.
   This means that we need a bigger ctl_table (long extra1, long extra2
   and void* extra). We will need *more* memory?

I would like to be proven wrong. So this is my proposal: Instead of
trying to do an incremental change, I suggest you remove the sysctl_vals
shared const array and measure how much memory you actually save. You
can use the ./scripts/bloat-o-meter in the linux kernel source and
follow something similar to what we did in [2] to measure how much
memory we are actually talking about.

Once you get a hard number, then we can move forward on the memory
saving front.

Best

[1] https://lkml.kernel.org/87zgpte9o4.fsf@email.froward.int.ebiederm.org
[2] https://lore.kernel.org/all/20240501-jag-sysctl_remset_net-v6-0-370b702b6b4a@samsung.com/
-- 

Joel Granados
Re: [PATCH v5] sysctl: simplify the min/max boundary check
Posted by Eric W. Biederman 10 months, 3 weeks ago
Joel Granados <joel.granados@kernel.org> writes:

> On Sun, Jan 19, 2025 at 10:59:21PM +0800, Wen Yang wrote:
>> 
>> 
>> On 2025/1/16 17:37, Joel Granados wrote:
>> > On Sun, Jan 05, 2025 at 11:28:53PM +0800, Wen Yang wrote:
>> > > do_proc_dointvec_conv() used the default range of int type, while
>> > > do_proc_dointvec_minmax_conv() additionally used "int * {min, max}" of
>> > > struct do_proc_dointvec_minmax_conv_param, which are actually passed
>> > > in table->extra{1,2} pointers.
>> > > 
>> > > By changing the struct do_proc_dointvec_minmax_conv_param's
>> > > "int * {min, max}" to "int {min, max}" and setting the min/max value
>> > > via dereference the table->extra{1, 2}, those do_proc_dointvec_conv() and
>> > > do_proc_dointvec_minmax_conv() functions can be merged into one and reduce
>> > > code by one-third.
>> > > 
>> > > Similar changes were also made for do_proc_douintvec_minmax_conv_param,
>> > > do_proc_douintvec_conv() and do_proc_douintvec_minmax_conv().
>> > > 
>> > Before moving forward, I need the motivation.
>> > 
>> > Please answer this:
>> > *Why* are you pushing these two [1], [2] series?
>> > 
>> > Best
>> > 
>> > [1]
>> >    * https://lore.kernel.org/tencent_A7FD9E9E75AA74A87583A427F5D20F988B09@qq.com
>> >    * https://lore.kernel.org/tencent_C5E6023F97E7CC2A046AAEA09BC9ACF43907@qq.com
>> >    * https://lore.kernel.org/cover.1726365007.git.wen.yang@linux.dev
>> >    * https://lore.kernel.org/cover.1726910671.git.wen.yang@linux.dev
>> > 
>> > [2]
>> >    * https://lore.kernel.org/20240714173858.52163-1-wen.yang@linux.dev
>> >    * https://lore.kernel.org/20240715150223.54707-1-wen.yang@linux.dev
>> >    * https://lore.kernel.org/20240905134818.4104-1-wen.yang@linux.dev
>> >    * https://lore.kernel.org/20241201140058.5653-1-wen.yang@linux.dev
>> > 
>> 
>> Thank you for your comments.
>> 
>> Here is a brief report about it.
>> 
>> The boundary check of sysctl uses .extra{1, 2} pointers, which need to point
>> to constant variables (such as two_five_five, n_65535, ue_int_max, etc. that
>> appear in multiple modules).
>> 
>> We first try to stuff these variables into the shared const arrays (
>> (sysctl_vals, sysctl_long_vals) to save memory, as shown in series 1.
>> 
>> Thank Eric for pointing out:
>> - You can still save a lot more by turning .extra1 and .extra2
>> - into longs instead of keeping them as pointers and needing
>> - constants to be pointed at somewhere.
>> 
>> We have further found more detailed information about "kill the shared const
>> array":
>> 
>> https://lkml.kernel.org/87zgpte9o4.fsf@email.froward.int.ebiederm.org
>> 
>> 67ff32289 ("proc_sysctl: update docs for __register_sysctl_table()")
>
> Hey Wen Yang
>
> Thx for that additional explanation.
>
> So I understand that you want to get rid of the sysctl_vals shared const
> array by replacing the extra{1,2} from void* to long; all this to *save
> memory*. Right (please correct me if I have misunderstood you)?
>
> I don't think that by doing this replacement you will save much memory
> (if any). And this is why:
> 1. The long and the void* are most likely (depending on arch?) the same
>    size.
> 2. In [1] it is mentioned that, we would still need an extra (void*) to
>    address the sysctl tables that are *NOT* using extra{1,2} as min max.
>    This means that we need a bigger ctl_table (long extra1, long extra2
>    and void* extra). We will need *more* memory?
>
> I would like to be proven wrong. So this is my proposal: Instead of
> trying to do an incremental change, I suggest you remove the sysctl_vals
> shared const array and measure how much memory you actually save. You
> can use the ./scripts/bloat-o-meter in the linux kernel source and
> follow something similar to what we did in [2] to measure how much
> memory we are actually talking about.
>
> Once you get a hard number, then we can move forward on the memory
> saving front.

When I originally suggested this my motivation had nothing to do with
memory.  The sysctl_vals memory array is type unsafe and has actively
lead to real world bugs.  AKA longs and int confusion.  One example is
that SYSCTL_ZERO does not properly work as a minimum to
proc_do_ulongvec_minmax.

Frankly those SYSCTL_XXX macros that use sysctl_vals are just plain
scary to work with.

So I suggested please making everything simpler by putting unsigned long
min and max in to struct ctl_table and then getting rid of extra1 and
extra2.  As extra1 and extra2 are almost exclusively used to implement
min and max.

The size would not change but the chance of making a hard to find and
understand mistake would go down dramatically.

Eric
Re: [PATCH v5] sysctl: simplify the min/max boundary check
Posted by Eric W. Biederman 10 months, 3 weeks ago
"Eric W. Biederman" <ebiederm@xmission.com> writes:

> Joel Granados <joel.granados@kernel.org> writes:
>
>> On Sun, Jan 19, 2025 at 10:59:21PM +0800, Wen Yang wrote:
>>> 
>>> 
>>> On 2025/1/16 17:37, Joel Granados wrote:
>>> > On Sun, Jan 05, 2025 at 11:28:53PM +0800, Wen Yang wrote:
>>> > > do_proc_dointvec_conv() used the default range of int type, while
>>> > > do_proc_dointvec_minmax_conv() additionally used "int * {min, max}" of
>>> > > struct do_proc_dointvec_minmax_conv_param, which are actually passed
>>> > > in table->extra{1,2} pointers.
>>> > > 
>>> > > By changing the struct do_proc_dointvec_minmax_conv_param's
>>> > > "int * {min, max}" to "int {min, max}" and setting the min/max value
>>> > > via dereference the table->extra{1, 2}, those do_proc_dointvec_conv() and
>>> > > do_proc_dointvec_minmax_conv() functions can be merged into one and reduce
>>> > > code by one-third.
>>> > > 
>>> > > Similar changes were also made for do_proc_douintvec_minmax_conv_param,
>>> > > do_proc_douintvec_conv() and do_proc_douintvec_minmax_conv().
>>> > > 
>>> > Before moving forward, I need the motivation.
>>> > 
>>> > Please answer this:
>>> > *Why* are you pushing these two [1], [2] series?
>>> > 
>>> > Best
>>> > 
>>> > [1]
>>> >    * https://lore.kernel.org/tencent_A7FD9E9E75AA74A87583A427F5D20F988B09@qq.com
>>> >    * https://lore.kernel.org/tencent_C5E6023F97E7CC2A046AAEA09BC9ACF43907@qq.com
>>> >    * https://lore.kernel.org/cover.1726365007.git.wen.yang@linux.dev
>>> >    * https://lore.kernel.org/cover.1726910671.git.wen.yang@linux.dev
>>> > 
>>> > [2]
>>> >    * https://lore.kernel.org/20240714173858.52163-1-wen.yang@linux.dev
>>> >    * https://lore.kernel.org/20240715150223.54707-1-wen.yang@linux.dev
>>> >    * https://lore.kernel.org/20240905134818.4104-1-wen.yang@linux.dev
>>> >    * https://lore.kernel.org/20241201140058.5653-1-wen.yang@linux.dev
>>> > 
>>> 
>>> Thank you for your comments.
>>> 
>>> Here is a brief report about it.
>>> 
>>> The boundary check of sysctl uses .extra{1, 2} pointers, which need to point
>>> to constant variables (such as two_five_five, n_65535, ue_int_max, etc. that
>>> appear in multiple modules).
>>> 
>>> We first try to stuff these variables into the shared const arrays (
>>> (sysctl_vals, sysctl_long_vals) to save memory, as shown in series 1.
>>> 
>>> Thank Eric for pointing out:
>>> - You can still save a lot more by turning .extra1 and .extra2
>>> - into longs instead of keeping them as pointers and needing
>>> - constants to be pointed at somewhere.
>>> 
>>> We have further found more detailed information about "kill the shared const
>>> array":
>>> 
>>> https://lkml.kernel.org/87zgpte9o4.fsf@email.froward.int.ebiederm.org
>>> 
>>> 67ff32289 ("proc_sysctl: update docs for __register_sysctl_table()")
>>
>> Hey Wen Yang
>>
>> Thx for that additional explanation.
>>
>> So I understand that you want to get rid of the sysctl_vals shared const
>> array by replacing the extra{1,2} from void* to long; all this to *save
>> memory*. Right (please correct me if I have misunderstood you)?
>>
>> I don't think that by doing this replacement you will save much memory
>> (if any). And this is why:
>> 1. The long and the void* are most likely (depending on arch?) the same
>>    size.
>> 2. In [1] it is mentioned that, we would still need an extra (void*) to
>>    address the sysctl tables that are *NOT* using extra{1,2} as min max.
>>    This means that we need a bigger ctl_table (long extra1, long extra2
>>    and void* extra). We will need *more* memory?
>>
>> I would like to be proven wrong. So this is my proposal: Instead of
>> trying to do an incremental change, I suggest you remove the sysctl_vals
>> shared const array and measure how much memory you actually save. You
>> can use the ./scripts/bloat-o-meter in the linux kernel source and
>> follow something similar to what we did in [2] to measure how much
>> memory we are actually talking about.
>>
>> Once you get a hard number, then we can move forward on the memory
>> saving front.
>
> When I originally suggested this my motivation had nothing to do with
> memory.  The sysctl_vals memory array is type unsafe and has actively
> lead to real world bugs.  AKA longs and int confusion.  One example is
> that SYSCTL_ZERO does not properly work as a minimum to
> proc_do_ulongvec_minmax.
>
> Frankly those SYSCTL_XXX macros that use sysctl_vals are just plain
> scary to work with.
>
> So I suggested please making everything simpler by putting unsigned long
> min and max in to struct ctl_table and then getting rid of extra1 and
> extra2.  As extra1 and extra2 are almost exclusively used to implement
> min and max.
>
> The size would not change but the chance of making a hard to find and
> understand mistake would go down dramatically.

Oh.  I forgot to add it was also about the usability of the SYSCTL_XXX
macros.

As it is now whenever there is a new interesting value you have to add
another SYSCTL_XXX macro, and place your value in sysctl_vals.

Compared to just having a min or a max value in struct ctl_table it
is much more work.

Where size comes into my original suggestion is that because extra1 and
extra2 can and probably should go away, adding min and max members to
struct ctl_table can be done at no cost in size.

Eric
Re: Re: [PATCH v5] sysctl: simplify the min/max boundary check
Posted by Joel Granados 10 months, 3 weeks ago
On Thu, Jan 23, 2025 at 12:30:25PM -0600, Eric W. Biederman wrote:
> "Eric W. Biederman" <ebiederm@xmission.com> writes:
> 
> > Joel Granados <joel.granados@kernel.org> writes:
> >
> >> On Sun, Jan 19, 2025 at 10:59:21PM +0800, Wen Yang wrote:
> >>> 
> >>> 
> >>> On 2025/1/16 17:37, Joel Granados wrote:
> >>> > On Sun, Jan 05, 2025 at 11:28:53PM +0800, Wen Yang wrote:
> >>> > > do_proc_dointvec_conv() used the default range of int type, while
> >>> > > do_proc_dointvec_minmax_conv() additionally used "int * {min, max}" of
> >>> > > struct do_proc_dointvec_minmax_conv_param, which are actually passed
> >>> > > in table->extra{1,2} pointers.
...
> >> (if any). And this is why:
> >> 1. The long and the void* are most likely (depending on arch?) the same
> >>    size.
> >> 2. In [1] it is mentioned that, we would still need an extra (void*) to
> >>    address the sysctl tables that are *NOT* using extra{1,2} as min max.
> >>    This means that we need a bigger ctl_table (long extra1, long extra2
> >>    and void* extra). We will need *more* memory?
> >>
> >> I would like to be proven wrong. So this is my proposal: Instead of
> >> trying to do an incremental change, I suggest you remove the sysctl_vals
> >> shared const array and measure how much memory you actually save. You
> >> can use the ./scripts/bloat-o-meter in the linux kernel source and
> >> follow something similar to what we did in [2] to measure how much
> >> memory we are actually talking about.
> >>
> >> Once you get a hard number, then we can move forward on the memory
> >> saving front.

Hey Eric.

Thx for the clarification. Much appreciated.
> >
> > When I originally suggested this my motivation had nothing to do with memory
That makes a *lot* of sense :).

> > The sysctl_vals memory array is type unsafe and has actively
Here I understand that they are unsafe because of Integer promotion
issues exacerbated by the void* variables extra{1,2}. Please correct me
If I missed the point.

There is also the fact that you can just do a `extra1 = &sysctl_vals[15]`
and the compiler will not bark at you. At least It let me do that on my
side.

> > lead to real world bugs. AKA longs and int confusion.  One example is
> > that SYSCTL_ZERO does not properly work as a minimum to
> > proc_do_ulongvec_minmax.
That is a great example.

> >
> > Frankly those SYSCTL_XXX macros that use sysctl_vals are just plain
> > scary to work with.
I share your feeling :)

> >
> > So I suggested please making everything simpler by putting unsigned long
> > min and max in to struct ctl_table and then getting rid of extra1 and
> > extra2.  As extra1 and extra2 are almost exclusively used to implement
> > min and max.
Explicitly specifying the type will help reduce the "unsefeness" but
with all the ways that there are of using these pointers, I think we
need to think bigger and maybe try to find a more typesafe way to
represent all the interactions.

It has always struck me as strange the arbitrariness of having 2 extra
pointers. Why not just one? At the end it is a pointer and can point to
a struct that holds min, max... I do not have the answer yet, but I
think what you propose here is part of a bigger refactoring needed in
ctl_table structure. Would like to hear your thought on it if you have
any.

> >
> > The size would not change but the chance of making a hard to find and
> > understand mistake would go down dramatically.
> 
> Oh.  I forgot to add it was also about the usability of the SYSCTL_XXX
> macros.
> 
> As it is now whenever there is a new interesting value you have to add
> another SYSCTL_XXX macro, and place your value in sysctl_vals.
> 
> Compared to just having a min or a max value in struct ctl_table it
> is much more work.
> 
> Where size comes into my original suggestion is that because extra1 and
> extra2 can and probably should go away, adding min and max members to
> struct ctl_table can be done at no cost in size.
> 
> Eric
> 

Best regards

-- 

Joel Granados
Re: [PATCH v5] sysctl: simplify the min/max boundary check
Posted by Eric W. Biederman 10 months, 3 weeks ago
Joel Granados <joel.granados@kernel.org> writes:

> On Thu, Jan 23, 2025 at 12:30:25PM -0600, Eric W. Biederman wrote:
>> "Eric W. Biederman" <ebiederm@xmission.com> writes:
>> 
>> > Joel Granados <joel.granados@kernel.org> writes:
>> >
>> >> On Sun, Jan 19, 2025 at 10:59:21PM +0800, Wen Yang wrote:
>> >>> 
>> >>> 
>> >>> On 2025/1/16 17:37, Joel Granados wrote:
>> >>> > On Sun, Jan 05, 2025 at 11:28:53PM +0800, Wen Yang wrote:
>> >>> > > do_proc_dointvec_conv() used the default range of int type, while
>> >>> > > do_proc_dointvec_minmax_conv() additionally used "int * {min, max}" of
>> >>> > > struct do_proc_dointvec_minmax_conv_param, which are actually passed
>> >>> > > in table->extra{1,2} pointers.
> ...
>> >> (if any). And this is why:
>> >> 1. The long and the void* are most likely (depending on arch?) the same
>> >>    size.
>> >> 2. In [1] it is mentioned that, we would still need an extra (void*) to
>> >>    address the sysctl tables that are *NOT* using extra{1,2} as min max.
>> >>    This means that we need a bigger ctl_table (long extra1, long extra2
>> >>    and void* extra). We will need *more* memory?
>> >>
>> >> I would like to be proven wrong. So this is my proposal: Instead of
>> >> trying to do an incremental change, I suggest you remove the sysctl_vals
>> >> shared const array and measure how much memory you actually save. You
>> >> can use the ./scripts/bloat-o-meter in the linux kernel source and
>> >> follow something similar to what we did in [2] to measure how much
>> >> memory we are actually talking about.
>> >>
>> >> Once you get a hard number, then we can move forward on the memory
>> >> saving front.
>
> Hey Eric.
>
> Thx for the clarification. Much appreciated.
>> >
>> > When I originally suggested this my motivation had nothing to do with memory
> That makes a *lot* of sense :).
>
>> > The sysctl_vals memory array is type unsafe and has actively
> Here I understand that they are unsafe because of Integer promotion
> issues exacerbated by the void* variables extra{1,2}. Please correct me
> If I missed the point.

Not precisely.  It is because the (void *) pointers are silently cast to
either (int *) or (long *) pointers.  So for example passing SYSCTL_ZERO
to proc_do_ulongvec_minmax results in reading sysctl_vals[0] and
sysctl_vals[1] and to get the long value.  Since sysctl_vals[1] is 1
a 0 is not accepted because 0 is below the minimum.

The minimum value that is accepted depends on which architecture you are
on.  On x86_64 and other little endian architectures the minimum value
accepted is 0x0000000100000000.  On big endian architectures like mips64
the minimum value accepted winds up being 0x0000000000000001.  Or do I
have that backwards? 

It doesn't matter because neither case is what the programmer expected.
Further it means that keeping the current proc_do_ulongvec_minmax and
proc_do_int_minmax methods that it is impossible to define any of the
SYSCTL_XXX macros except SYSCTL_ZERO that will work with both methods.

> There is also the fact that you can just do a `extra1 = &sysctl_vals[15]`
> and the compiler will not bark at you. At least It let me do that on my
> side.

All of which in the simplest for has me think the SYSCTL_XXX cleanups
were a step in the wrong direction.

>> > lead to real world bugs. AKA longs and int confusion.  One example is
>> > that SYSCTL_ZERO does not properly work as a minimum to
>> > proc_do_ulongvec_minmax.
> That is a great example.
>
>> >
>> > Frankly those SYSCTL_XXX macros that use sysctl_vals are just plain
>> > scary to work with.
> I share your feeling :)
>
>> >
>> > So I suggested please making everything simpler by putting unsigned long
>> > min and max in to struct ctl_table and then getting rid of extra1 and
>> > extra2.  As extra1 and extra2 are almost exclusively used to implement
>> > min and max.
> Explicitly specifying the type will help reduce the "unsefeness" but
> with all the ways that there are of using these pointers, I think we
> need to think bigger and maybe try to find a more typesafe way to
> represent all the interactions.
>
> It has always struck me as strange the arbitrariness of having 2 extra
> pointers. Why not just one?

Which would be the void *data pointer.

> At the end it is a pointer and can point to
> a struct that holds min, max... I do not have the answer yet, but I
> think what you propose here is part of a bigger refactoring needed in
> ctl_table structure. Would like to hear your thought on it if you have
> any.

One of the things that happens and that is worth acknowledging is there
is code that wraps proc_doulongvec_minmax and proc_dointvec_minmax.
Having the minmax information separate from the data pointer makes that
wrapping easier.

Further the min/max information is typically separate from other kinds
of data.  So even when not wrapped it is nice just to take a quick
glance and see what the minimums and maximums are.

My original suggest was that we change struct ctl_table from:

> /* A sysctl table is an array of struct ctl_table: */
> struct ctl_table {
> 	const char *procname;		/* Text ID for /proc/sys */
> 	void *data;
> 	int maxlen;
> 	umode_t mode;
> 	proc_handler *proc_handler;	/* Callback for text formatting */
> 	struct ctl_table_poll *poll;
> 	void *extra1;
> 	void *extra2;
> } __randomize_layout;

to:

> /* A sysctl table is an array of struct ctl_table: */
> struct ctl_table {
> 	const char *procname;		/* Text ID for /proc/sys */
> 	void *data;
> 	int maxlen;
> 	umode_t mode;
> 	proc_handler *proc_handler;	/* Callback for text formatting */
> 	struct ctl_table_poll *poll;
>       unsigned long min;
>       unsigned long max;
> } __randomize_layout;

That is just replace extra1 and extra2 with min and max members.  The
members don't have any reason to be pointers.  Without being pointers
the min/max functions can just use long values to cap either ints or
longs, and there is no room for error.  The integer promotion rules
will ensure that even negative values can be stored in unsigned long
min and max values successfully.  Plus it is all bog standard C
so there is nothing special to learn.

There are a bunch of fiddly little details to transition from where we
are today.  The most straightforward way I can see of making the
transition is to add the min and max members.  Come up with replacements
for proc_doulongvec_minmax and proc_dointvec_minmax that read the new
min and max members.  Update all of the users.  Update the few users
that use extra1 or extra2 for something besides min and max.  Then
remove extra1 and extra2.  At the end it is simpler and requires the
same or a little less space.

That was and remains my suggestion.

Eric
Re: [PATCH v5] sysctl: simplify the min/max boundary check
Posted by Joel Granados 9 months, 3 weeks ago
On Mon, Jan 27, 2025 at 11:51:51AM -0600, Eric W. Biederman wrote:
> Joel Granados <joel.granados@kernel.org> writes:
> 
> > On Thu, Jan 23, 2025 at 12:30:25PM -0600, Eric W. Biederman wrote:
> >> "Eric W. Biederman" <ebiederm@xmission.com> writes:
> >> 
> >> > Joel Granados <joel.granados@kernel.org> writes:
> >> >
> >> >> On Sun, Jan 19, 2025 at 10:59:21PM +0800, Wen Yang wrote:
> >> >>> 
> >> >>> 
> >> >>> On 2025/1/16 17:37, Joel Granados wrote:
> >> >>> > On Sun, Jan 05, 2025 at 11:28:53PM +0800, Wen Yang wrote:
> >> >>> > > do_proc_dointvec_conv() used the default range of int type, while
> >> >>> > > do_proc_dointvec_minmax_conv() additionally used "int * {min, max}" of
> >> >>> > > struct do_proc_dointvec_minmax_conv_param, which are actually passed
> >> >>> > > in table->extra{1,2} pointers.
> > ...
> >> >> (if any). And this is why:
> >> >> 1. The long and the void* are most likely (depending on arch?) the same
> >> >>    size.
> >> >> 2. In [1] it is mentioned that, we would still need an extra (void*) to
> >> >>    address the sysctl tables that are *NOT* using extra{1,2} as min max.
> >> >>    This means that we need a bigger ctl_table (long extra1, long extra2
> >> >>    and void* extra). We will need *more* memory?
> >> >>
> >> >> I would like to be proven wrong. So this is my proposal: Instead of
> >> >> trying to do an incremental change, I suggest you remove the sysctl_vals
> >> >> shared const array and measure how much memory you actually save. You
> >> >> can use the ./scripts/bloat-o-meter in the linux kernel source and
> >> >> follow something similar to what we did in [2] to measure how much
> >> >> memory we are actually talking about.
> >> >>
> >> >> Once you get a hard number, then we can move forward on the memory
> >> >> saving front.
> >
> > Hey Eric.
> >
> > Thx for the clarification. Much appreciated.
> >> >
> >> > When I originally suggested this my motivation had nothing to do with memory
> > That makes a *lot* of sense :).
> >
> >> > The sysctl_vals memory array is type unsafe and has actively
> > Here I understand that they are unsafe because of Integer promotion
> > issues exacerbated by the void* variables extra{1,2}. Please correct me
> > If I missed the point.
> 
> Not precisely.  It is because the (void *) pointers are silently cast to
> either (int *) or (long *) pointers.  So for example passing SYSCTL_ZERO
> to proc_do_ulongvec_minmax results in reading sysctl_vals[0] and
> sysctl_vals[1] and to get the long value.  Since sysctl_vals[1] is 1
> a 0 is not accepted because 0 is below the minimum.
> 
> The minimum value that is accepted depends on which architecture you are
> on.  On x86_64 and other little endian architectures the minimum value
> accepted is 0x0000000100000000.  On big endian architectures like mips64
> the minimum value accepted winds up being 0x0000000000000001.  Or do I
> have that backwards? 
> 
> It doesn't matter because neither case is what the programmer expected.
> Further it means that keeping the current proc_do_ulongvec_minmax and
> proc_do_int_minmax methods that it is impossible to define any of the
> SYSCTL_XXX macros except SYSCTL_ZERO that will work with both methods.
> 
> > There is also the fact that you can just do a `extra1 = &sysctl_vals[15]`
> > and the compiler will not bark at you. At least It let me do that on my
> > side.
> 
> All of which in the simplest for has me think the SYSCTL_XXX cleanups
> were a step in the wrong direction.
> 
> >> > lead to real world bugs. AKA longs and int confusion.  One example is
> >> > that SYSCTL_ZERO does not properly work as a minimum to
> >> > proc_do_ulongvec_minmax.
> > That is a great example.
> >
> >> >
> >> > Frankly those SYSCTL_XXX macros that use sysctl_vals are just plain
> >> > scary to work with.
> > I share your feeling :)
> >
> >> >
> >> > So I suggested please making everything simpler by putting unsigned long
> >> > min and max in to struct ctl_table and then getting rid of extra1 and
> >> > extra2.  As extra1 and extra2 are almost exclusively used to implement
> >> > min and max.
> > Explicitly specifying the type will help reduce the "unsefeness" but
> > with all the ways that there are of using these pointers, I think we
> > need to think bigger and maybe try to find a more typesafe way to
> > represent all the interactions.
> >
> > It has always struck me as strange the arbitrariness of having 2 extra
> > pointers. Why not just one?
> 
> Which would be the void *data pointer.
> 
> > At the end it is a pointer and can point to
> > a struct that holds min, max... I do not have the answer yet, but I
> > think what you propose here is part of a bigger refactoring needed in
> > ctl_table structure. Would like to hear your thought on it if you have
> > any.
> 
> One of the things that happens and that is worth acknowledging is there
> is code that wraps proc_doulongvec_minmax and proc_dointvec_minmax.
> Having the minmax information separate from the data pointer makes that
> wrapping easier.
> 
> Further the min/max information is typically separate from other kinds
> of data.  So even when not wrapped it is nice just to take a quick
> glance and see what the minimums and maximums are.
> 
> My original suggest was that we change struct ctl_table from:
> 
> > /* A sysctl table is an array of struct ctl_table: */
> > struct ctl_table {
> > 	const char *procname;		/* Text ID for /proc/sys */
> > 	void *data;
> > 	int maxlen;
> > 	umode_t mode;
> > 	proc_handler *proc_handler;	/* Callback for text formatting */
> > 	struct ctl_table_poll *poll;
> > 	void *extra1;
> > 	void *extra2;
> > } __randomize_layout;
> 
> to:
> 
> > /* A sysctl table is an array of struct ctl_table: */
> > struct ctl_table {
> > 	const char *procname;		/* Text ID for /proc/sys */
> > 	void *data;
> > 	int maxlen;
> > 	umode_t mode;
> > 	proc_handler *proc_handler;	/* Callback for text formatting */
> > 	struct ctl_table_poll *poll;
> >       unsigned long min;
> >       unsigned long max;
> > } __randomize_layout;
> 
> That is just replace extra1 and extra2 with min and max members.  The
> members don't have any reason to be pointers.  Without being pointers
> the min/max functions can just use long values to cap either ints or
> longs, and there is no room for error.  The integer promotion rules
> will ensure that even negative values can be stored in unsigned long
> min and max values successfully.  Plus it is all bog standard C
> so there is nothing special to learn.
> 
> There are a bunch of fiddly little details to transition from where we
> are today.  The most straightforward way I can see of making the
> transition is to add the min and max members.  Come up with replacements
> for proc_doulongvec_minmax and proc_dointvec_minmax that read the new
> min and max members.  Update all of the users.  Update the few users
> that use extra1 or extra2 for something besides min and max.  Then
> remove extra1 and extra2.  At the end it is simpler and requires the
> same or a little less space.
> 
> That was and remains my suggestion.
Thx for all this. Been putting this off for a month now, but will slowly
come back to it. I'll use your and Wen's series to try to come up with
something that look good to me.

Best

-- 

Joel Granados
Re: [PATCH v5] sysctl: simplify the min/max boundary check
Posted by Wen Yang 9 months, 3 weeks ago

On 2025/2/27 22:07, Joel Granados wrote:
> On Mon, Jan 27, 2025 at 11:51:51AM -0600, Eric W. Biederman wrote:
>> Joel Granados <joel.granados@kernel.org> writes:
>>
>>> On Thu, Jan 23, 2025 at 12:30:25PM -0600, Eric W. Biederman wrote:
>>>> "Eric W. Biederman" <ebiederm@xmission.com> writes:
>>>>
>>>>> Joel Granados <joel.granados@kernel.org> writes:
>>>>>
>>>>>> On Sun, Jan 19, 2025 at 10:59:21PM +0800, Wen Yang wrote:
>>>>>>>
>>>>>>>
>>>>>>> On 2025/1/16 17:37, Joel Granados wrote:
>>>>>>>> On Sun, Jan 05, 2025 at 11:28:53PM +0800, Wen Yang wrote:
>>>>>>>>> do_proc_dointvec_conv() used the default range of int type, while
>>>>>>>>> do_proc_dointvec_minmax_conv() additionally used "int * {min, max}" of
>>>>>>>>> struct do_proc_dointvec_minmax_conv_param, which are actually passed
>>>>>>>>> in table->extra{1,2} pointers.
>>> ...
>>>>>> (if any). And this is why:
>>>>>> 1. The long and the void* are most likely (depending on arch?) the same
>>>>>>     size.
>>>>>> 2. In [1] it is mentioned that, we would still need an extra (void*) to
>>>>>>     address the sysctl tables that are *NOT* using extra{1,2} as min max.
>>>>>>     This means that we need a bigger ctl_table (long extra1, long extra2
>>>>>>     and void* extra). We will need *more* memory?
>>>>>>
>>>>>> I would like to be proven wrong. So this is my proposal: Instead of
>>>>>> trying to do an incremental change, I suggest you remove the sysctl_vals
>>>>>> shared const array and measure how much memory you actually save. You
>>>>>> can use the ./scripts/bloat-o-meter in the linux kernel source and
>>>>>> follow something similar to what we did in [2] to measure how much
>>>>>> memory we are actually talking about.
>>>>>>
>>>>>> Once you get a hard number, then we can move forward on the memory
>>>>>> saving front.
>>>
>>> Hey Eric.
>>>
>>> Thx for the clarification. Much appreciated.
>>>>>
>>>>> When I originally suggested this my motivation had nothing to do with memory
>>> That makes a *lot* of sense :).
>>>
>>>>> The sysctl_vals memory array is type unsafe and has actively
>>> Here I understand that they are unsafe because of Integer promotion
>>> issues exacerbated by the void* variables extra{1,2}. Please correct me
>>> If I missed the point.
>>
>> Not precisely.  It is because the (void *) pointers are silently cast to
>> either (int *) or (long *) pointers.  So for example passing SYSCTL_ZERO
>> to proc_do_ulongvec_minmax results in reading sysctl_vals[0] and
>> sysctl_vals[1] and to get the long value.  Since sysctl_vals[1] is 1
>> a 0 is not accepted because 0 is below the minimum.
>>
>> The minimum value that is accepted depends on which architecture you are
>> on.  On x86_64 and other little endian architectures the minimum value
>> accepted is 0x0000000100000000.  On big endian architectures like mips64
>> the minimum value accepted winds up being 0x0000000000000001.  Or do I
>> have that backwards?
>>
>> It doesn't matter because neither case is what the programmer expected.
>> Further it means that keeping the current proc_do_ulongvec_minmax and
>> proc_do_int_minmax methods that it is impossible to define any of the
>> SYSCTL_XXX macros except SYSCTL_ZERO that will work with both methods.
>>
>>> There is also the fact that you can just do a `extra1 = &sysctl_vals[15]`
>>> and the compiler will not bark at you. At least It let me do that on my
>>> side.
>>
>> All of which in the simplest for has me think the SYSCTL_XXX cleanups
>> were a step in the wrong direction.
>>
>>>>> lead to real world bugs. AKA longs and int confusion.  One example is
>>>>> that SYSCTL_ZERO does not properly work as a minimum to
>>>>> proc_do_ulongvec_minmax.
>>> That is a great example.
>>>
>>>>>
>>>>> Frankly those SYSCTL_XXX macros that use sysctl_vals are just plain
>>>>> scary to work with.
>>> I share your feeling :)
>>>
>>>>>
>>>>> So I suggested please making everything simpler by putting unsigned long
>>>>> min and max in to struct ctl_table and then getting rid of extra1 and
>>>>> extra2.  As extra1 and extra2 are almost exclusively used to implement
>>>>> min and max.
>>> Explicitly specifying the type will help reduce the "unsefeness" but
>>> with all the ways that there are of using these pointers, I think we
>>> need to think bigger and maybe try to find a more typesafe way to
>>> represent all the interactions.
>>>
>>> It has always struck me as strange the arbitrariness of having 2 extra
>>> pointers. Why not just one?
>>
>> Which would be the void *data pointer.
>>
>>> At the end it is a pointer and can point to
>>> a struct that holds min, max... I do not have the answer yet, but I
>>> think what you propose here is part of a bigger refactoring needed in
>>> ctl_table structure. Would like to hear your thought on it if you have
>>> any.
>>
>> One of the things that happens and that is worth acknowledging is there
>> is code that wraps proc_doulongvec_minmax and proc_dointvec_minmax.
>> Having the minmax information separate from the data pointer makes that
>> wrapping easier.
>>
>> Further the min/max information is typically separate from other kinds
>> of data.  So even when not wrapped it is nice just to take a quick
>> glance and see what the minimums and maximums are.
>>
>> My original suggest was that we change struct ctl_table from:
>>
>>> /* A sysctl table is an array of struct ctl_table: */
>>> struct ctl_table {
>>> 	const char *procname;		/* Text ID for /proc/sys */
>>> 	void *data;
>>> 	int maxlen;
>>> 	umode_t mode;
>>> 	proc_handler *proc_handler;	/* Callback for text formatting */
>>> 	struct ctl_table_poll *poll;
>>> 	void *extra1;
>>> 	void *extra2;
>>> } __randomize_layout;
>>
>> to:
>>
>>> /* A sysctl table is an array of struct ctl_table: */
>>> struct ctl_table {
>>> 	const char *procname;		/* Text ID for /proc/sys */
>>> 	void *data;
>>> 	int maxlen;
>>> 	umode_t mode;
>>> 	proc_handler *proc_handler;	/* Callback for text formatting */
>>> 	struct ctl_table_poll *poll;
>>>        unsigned long min;
>>>        unsigned long max;
>>> } __randomize_layout;
>>
>> That is just replace extra1 and extra2 with min and max members.  The
>> members don't have any reason to be pointers.  Without being pointers
>> the min/max functions can just use long values to cap either ints or
>> longs, and there is no room for error.  The integer promotion rules
>> will ensure that even negative values can be stored in unsigned long
>> min and max values successfully.  Plus it is all bog standard C
>> so there is nothing special to learn.
>>
>> There are a bunch of fiddly little details to transition from where we
>> are today.  The most straightforward way I can see of making the
>> transition is to add the min and max members.  Come up with replacements
>> for proc_doulongvec_minmax and proc_dointvec_minmax that read the new
>> min and max members.  Update all of the users.  Update the few users
>> that use extra1 or extra2 for something besides min and max.  Then
>> remove extra1 and extra2.  At the end it is simpler and requires the
>> same or a little less space.
>>
>> That was and remains my suggestion.
> Thx for all this. Been putting this off for a month now, but will slowly
> come back to it. I'll use your and Wen's series to try to come up with
> something that look good to me.
> 

Thanks.
The following is the latest series, which has been tested for about 20 days:

https://lore.kernel.org/all/cover.1739115369.git.wen.yang@linux.dev/

We look forward to your comments and suggestions.

--
Best wishes,
Wen
Re: [PATCH v5] sysctl: simplify the min/max boundary check
Posted by Wen Yang 10 months, 2 weeks ago

On 2025/1/28 01:51, Eric W. Biederman wrote:
> Joel Granados <joel.granados@kernel.org> writes:
> 
>> On Thu, Jan 23, 2025 at 12:30:25PM -0600, Eric W. Biederman wrote:
>>> "Eric W. Biederman" <ebiederm@xmission.com> writes:
>>>
>>>> Joel Granados <joel.granados@kernel.org> writes:
>>>>
>>>>> On Sun, Jan 19, 2025 at 10:59:21PM +0800, Wen Yang wrote:
>>>>>>
>>>>>>
>>>>>> On 2025/1/16 17:37, Joel Granados wrote:
>>>>>>> On Sun, Jan 05, 2025 at 11:28:53PM +0800, Wen Yang wrote:
>>>>>>>> do_proc_dointvec_conv() used the default range of int type, while
>>>>>>>> do_proc_dointvec_minmax_conv() additionally used "int * {min, max}" of
>>>>>>>> struct do_proc_dointvec_minmax_conv_param, which are actually passed
>>>>>>>> in table->extra{1,2} pointers.
>> ...
>>>>> (if any). And this is why:
>>>>> 1. The long and the void* are most likely (depending on arch?) the same
>>>>>     size.
>>>>> 2. In [1] it is mentioned that, we would still need an extra (void*) to
>>>>>     address the sysctl tables that are *NOT* using extra{1,2} as min max.
>>>>>     This means that we need a bigger ctl_table (long extra1, long extra2
>>>>>     and void* extra). We will need *more* memory?
>>>>>
>>>>> I would like to be proven wrong. So this is my proposal: Instead of
>>>>> trying to do an incremental change, I suggest you remove the sysctl_vals
>>>>> shared const array and measure how much memory you actually save. You
>>>>> can use the ./scripts/bloat-o-meter in the linux kernel source and
>>>>> follow something similar to what we did in [2] to measure how much
>>>>> memory we are actually talking about.
>>>>>
>>>>> Once you get a hard number, then we can move forward on the memory
>>>>> saving front.
>>
>> Hey Eric.
>>
>> Thx for the clarification. Much appreciated.
>>>>
>>>> When I originally suggested this my motivation had nothing to do with memory
>> That makes a *lot* of sense :).
>>
>>>> The sysctl_vals memory array is type unsafe and has actively
>> Here I understand that they are unsafe because of Integer promotion
>> issues exacerbated by the void* variables extra{1,2}. Please correct me
>> If I missed the point.
> 
> Not precisely.  It is because the (void *) pointers are silently cast to
> either (int *) or (long *) pointers.  So for example passing SYSCTL_ZERO
> to proc_do_ulongvec_minmax results in reading sysctl_vals[0] and
> sysctl_vals[1] and to get the long value.  Since sysctl_vals[1] is 1
> a 0 is not accepted because 0 is below the minimum.
> 
> The minimum value that is accepted depends on which architecture you are
> on.  On x86_64 and other little endian architectures the minimum value
> accepted is 0x0000000100000000.  On big endian architectures like mips64
> the minimum value accepted winds up being 0x0000000000000001.  Or do I
> have that backwards?
> 
> It doesn't matter because neither case is what the programmer expected.
> Further it means that keeping the current proc_do_ulongvec_minmax and
> proc_do_int_minmax methods that it is impossible to define any of the
> SYSCTL_XXX macros except SYSCTL_ZERO that will work with both methods.
> 
>> There is also the fact that you can just do a `extra1 = &sysctl_vals[15]`
>> and the compiler will not bark at you. At least It let me do that on my
>> side.
> 
> All of which in the simplest for has me think the SYSCTL_XXX cleanups
> were a step in the wrong direction.
> 
>>>> lead to real world bugs. AKA longs and int confusion.  One example is
>>>> that SYSCTL_ZERO does not properly work as a minimum to
>>>> proc_do_ulongvec_minmax.
>> That is a great example.
>>
>>>>
>>>> Frankly those SYSCTL_XXX macros that use sysctl_vals are just plain
>>>> scary to work with.
>> I share your feeling :)
>>
>>>>
>>>> So I suggested please making everything simpler by putting unsigned long
>>>> min and max in to struct ctl_table and then getting rid of extra1 and
>>>> extra2.  As extra1 and extra2 are almost exclusively used to implement
>>>> min and max.
>> Explicitly specifying the type will help reduce the "unsefeness" but
>> with all the ways that there are of using these pointers, I think we
>> need to think bigger and maybe try to find a more typesafe way to
>> represent all the interactions.
>>
>> It has always struck me as strange the arbitrariness of having 2 extra
>> pointers. Why not just one?
> 
> Which would be the void *data pointer.
> 
>> At the end it is a pointer and can point to
>> a struct that holds min, max... I do not have the answer yet, but I
>> think what you propose here is part of a bigger refactoring needed in
>> ctl_table structure. Would like to hear your thought on it if you have
>> any.
> 
> One of the things that happens and that is worth acknowledging is there
> is code that wraps proc_doulongvec_minmax and proc_dointvec_minmax.
> Having the minmax information separate from the data pointer makes that
> wrapping easier.
> 
> Further the min/max information is typically separate from other kinds
> of data.  So even when not wrapped it is nice just to take a quick
> glance and see what the minimums and maximums are.
> 
> My original suggest was that we change struct ctl_table from:
> 
>> /* A sysctl table is an array of struct ctl_table: */
>> struct ctl_table {
>> 	const char *procname;		/* Text ID for /proc/sys */
>> 	void *data;
>> 	int maxlen;
>> 	umode_t mode;
>> 	proc_handler *proc_handler;	/* Callback for text formatting */
>> 	struct ctl_table_poll *poll;
>> 	void *extra1;
>> 	void *extra2;
>> } __randomize_layout;
> 
> to:
> 
>> /* A sysctl table is an array of struct ctl_table: */
>> struct ctl_table {
>> 	const char *procname;		/* Text ID for /proc/sys */
>> 	void *data;
>> 	int maxlen;
>> 	umode_t mode;
>> 	proc_handler *proc_handler;	/* Callback for text formatting */
>> 	struct ctl_table_poll *poll;
>>        unsigned long min;
>>        unsigned long max;
>> } __randomize_layout;
> 
> That is just replace extra1 and extra2 with min and max members.  The
> members don't have any reason to be pointers.  Without being pointers
> the min/max functions can just use long values to cap either ints or
> longs, and there is no room for error.  The integer promotion rules
> will ensure that even negative values can be stored in unsigned long
> min and max values successfully.  Plus it is all bog standard C
> so there is nothing special to learn.
> 
> There are a bunch of fiddly little details to transition from where we
> are today.  The most straightforward way I can see of making the
> transition is to add the min and max members.  Come up with replacements
> for proc_doulongvec_minmax and proc_dointvec_minmax that read the new
> min and max members.  Update all of the users.  Update the few users
> that use extra1 or extra2 for something besides min and max.  Then
> remove extra1 and extra2.  At the end it is simpler and requires the
> same or a little less space.
> 
> That was and remains my suggestion.
> 

Thanks for your valuable suggestions. We will continue to move forward 
along it and need your more guidance.

But there are also a few codes that do take the extra{1, 2} as pointers, 
for example:

int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
                           proc_handler *handler)
{
...
         for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
                 t->neigh_vars[i].data += (long) p;
                 t->neigh_vars[i].extra1 = dev;
                 t->neigh_vars[i].extra2 = p;
         }
...
}

static void neigh_proc_update(const struct ctl_table *ctl, int write)
{
         struct net_device *dev = ctl->extra1;
         struct neigh_parms *p = ctl->extra2;
         struct net *net = neigh_parms_net(p);
         int index = (int *) ctl->data - p->data;
...
}


So could we modify it in this way to make it compatible with these two 
situations:

@@ -137,8 +137,16 @@ struct ctl_table {
         umode_t mode;
         proc_handler *proc_handler;     /* Callback for text formatting */
         struct ctl_table_poll *poll;
-       void *extra1;
-       void *extra2;
+       union {
+               struct {
+                       void *extra1;
+                       void *extra2;
+               };
+               struct {
+                       unsigned long min;
+                       unsigned long max;
+               };
+       };
  } __randomize_layout;


--
Best wishes,
Wen
Re: [PATCH v5] sysctl: simplify the min/max boundary check
Posted by Joel Granados 9 months, 2 weeks ago
On Thu, Jan 30, 2025 at 10:32:14PM +0800, Wen Yang wrote:
> 
> 
> On 2025/1/28 01:51, Eric W. Biederman wrote:
> > Joel Granados <joel.granados@kernel.org> writes:
> > 
> > > On Thu, Jan 23, 2025 at 12:30:25PM -0600, Eric W. Biederman wrote:
> > > > "Eric W. Biederman" <ebiederm@xmission.com> writes:
> > > > 
> > > > > Joel Granados <joel.granados@kernel.org> writes:
> > > > > 
> > > > > > On Sun, Jan 19, 2025 at 10:59:21PM +0800, Wen Yang wrote:
...
> > > struct ctl_table {
> > > 	const char *procname;		/* Text ID for /proc/sys */
> > > 	void *data;
> > > 	int maxlen;
> > > 	umode_t mode;
> > > 	proc_handler *proc_handler;	/* Callback for text formatting */
> > > 	struct ctl_table_poll *poll;
> > >        unsigned long min;
> > >        unsigned long max;
> > > } __randomize_layout;
> > 
> > That is just replace extra1 and extra2 with min and max members.  The
> > members don't have any reason to be pointers.  Without being pointers
> > the min/max functions can just use long values to cap either ints or
> > longs, and there is no room for error.  The integer promotion rules
> > will ensure that even negative values can be stored in unsigned long
> > min and max values successfully.  Plus it is all bog standard C
> > so there is nothing special to learn.
> > 
> > There are a bunch of fiddly little details to transition from where we
> > are today.  The most straightforward way I can see of making the
> > transition is to add the min and max members.  Come up with replacements
> > for proc_doulongvec_minmax and proc_dointvec_minmax that read the new
> > min and max members.  Update all of the users.  Update the few users
> > that use extra1 or extra2 for something besides min and max.  Then
> > remove extra1 and extra2.  At the end it is simpler and requires the
> > same or a little less space.
> > 
> > That was and remains my suggestion.
> > 
> 
> Thanks for your valuable suggestions. We will continue to move forward along
> it and need your more guidance.
> 
> But there are also a few codes that do take the extra{1, 2} as pointers, for
> example:
> 
> int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
>                           proc_handler *handler)
> {
> ...
>         for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
>                 t->neigh_vars[i].data += (long) p;
>                 t->neigh_vars[i].extra1 = dev;
>                 t->neigh_vars[i].extra2 = p;
>         }
> ...
> }
> 
> static void neigh_proc_update(const struct ctl_table *ctl, int write)
> {
>         struct net_device *dev = ctl->extra1;
>         struct neigh_parms *p = ctl->extra2;
>         struct net *net = neigh_parms_net(p);
>         int index = (int *) ctl->data - p->data;
> ...
> }
Quick question: Do you have a systemic way of identifying these? Do you
have a grep or awk scripts somewhere? I'm actually very interested in
finding out what is the impact of this.

Thx

Best


> 
> 
> So could we modify it in this way to make it compatible with these two
> situations:
> 
> @@ -137,8 +137,16 @@ struct ctl_table {
>         umode_t mode;
>         proc_handler *proc_handler;     /* Callback for text formatting */
>         struct ctl_table_poll *poll;
> -       void *extra1;
> -       void *extra2;
> +       union {
> +               struct {
> +                       void *extra1;
> +                       void *extra2;
> +               };
> +               struct {
> +                       unsigned long min;
> +                       unsigned long max;
> +               };
> +       };
>  } __randomize_layout;
> 
> 
> --
> Best wishes,
> Wen
> 

-- 

Joel Granados
Re: [PATCH v5] sysctl: simplify the min/max boundary check
Posted by Wen Yang 9 months, 2 weeks ago

On 2025/3/3 17:26, Joel Granados wrote:
> On Thu, Jan 30, 2025 at 10:32:14PM +0800, Wen Yang wrote:
>>
>>
>> On 2025/1/28 01:51, Eric W. Biederman wrote:
>>> Joel Granados <joel.granados@kernel.org> writes:
>>>
>>>> On Thu, Jan 23, 2025 at 12:30:25PM -0600, Eric W. Biederman wrote:
>>>>> "Eric W. Biederman" <ebiederm@xmission.com> writes:
>>>>>
>>>>>> Joel Granados <joel.granados@kernel.org> writes:
>>>>>>
>>>>>>> On Sun, Jan 19, 2025 at 10:59:21PM +0800, Wen Yang wrote:
> ...
>>>> struct ctl_table {
>>>> 	const char *procname;		/* Text ID for /proc/sys */
>>>> 	void *data;
>>>> 	int maxlen;
>>>> 	umode_t mode;
>>>> 	proc_handler *proc_handler;	/* Callback for text formatting */
>>>> 	struct ctl_table_poll *poll;
>>>>         unsigned long min;
>>>>         unsigned long max;
>>>> } __randomize_layout;
>>>
>>> That is just replace extra1 and extra2 with min and max members.  The
>>> members don't have any reason to be pointers.  Without being pointers
>>> the min/max functions can just use long values to cap either ints or
>>> longs, and there is no room for error.  The integer promotion rules
>>> will ensure that even negative values can be stored in unsigned long
>>> min and max values successfully.  Plus it is all bog standard C
>>> so there is nothing special to learn.
>>>
>>> There are a bunch of fiddly little details to transition from where we
>>> are today.  The most straightforward way I can see of making the
>>> transition is to add the min and max members.  Come up with replacements
>>> for proc_doulongvec_minmax and proc_dointvec_minmax that read the new
>>> min and max members.  Update all of the users.  Update the few users
>>> that use extra1 or extra2 for something besides min and max.  Then
>>> remove extra1 and extra2.  At the end it is simpler and requires the
>>> same or a little less space.
>>>
>>> That was and remains my suggestion.
>>>
>>
>> Thanks for your valuable suggestions. We will continue to move forward along
>> it and need your more guidance.
>>
>> But there are also a few codes that do take the extra{1, 2} as pointers, for
>> example:
>>
>> int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
>>                            proc_handler *handler)
>> {
>> ...
>>          for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
>>                  t->neigh_vars[i].data += (long) p;
>>                  t->neigh_vars[i].extra1 = dev;
>>                  t->neigh_vars[i].extra2 = p;
>>          }
>> ...
>> }
>>
>> static void neigh_proc_update(const struct ctl_table *ctl, int write)
>> {
>>          struct net_device *dev = ctl->extra1;
>>          struct neigh_parms *p = ctl->extra2;
>>          struct net *net = neigh_parms_net(p);
>>          int index = (int *) ctl->data - p->data;
>> ...
>> }
> Quick question: Do you have a systemic way of identifying these? Do you
> have a grep or awk scripts somewhere? I'm actually very interested in
> finding out what is the impact of this.
> 

Thanks, we may use the following simple scripts:

- the extra {1,2} as pointers to some objects:
$ grep "\.extra1\|\.extra2" * -R | grep -v "SYSCTL_" | grep -v "\&"

- the extra {1,2} as pointers to elements in the shared constant array:
$ grep "\.extra1\|\.extra2" * -R | grep "SYSCTL_"

- the extra {1,2} as pointers to additional constant variables:
$ grep "\.extra1\|\.extra2" * -R | grep "\&"


--
Best wishes,
Wen


> 
> 
>>
>>
>> So could we modify it in this way to make it compatible with these two
>> situations:
>>
>> @@ -137,8 +137,16 @@ struct ctl_table {
>>          umode_t mode;
>>          proc_handler *proc_handler;     /* Callback for text formatting */
>>          struct ctl_table_poll *poll;
>> -       void *extra1;
>> -       void *extra2;
>> +       union {
>> +               struct {
>> +                       void *extra1;
>> +                       void *extra2;
>> +               };
>> +               struct {
>> +                       unsigned long min;
>> +                       unsigned long max;
>> +               };
>> +       };
>>   } __randomize_layout;
>>
>>
>> --
>> Best wishes,
>> Wen
>>
>
Re: [PATCH v5] sysctl: simplify the min/max boundary check
Posted by Joel Granados 9 months, 1 week ago
On Thu, Mar 06, 2025 at 09:33:39PM +0800, Wen Yang wrote:
> 
> 
> On 2025/3/3 17:26, Joel Granados wrote:
> > On Thu, Jan 30, 2025 at 10:32:14PM +0800, Wen Yang wrote:
> > > 
> > > 
> > > On 2025/1/28 01:51, Eric W. Biederman wrote:
> > > > Joel Granados <joel.granados@kernel.org> writes:
> > > > 
> > > > > On Thu, Jan 23, 2025 at 12:30:25PM -0600, Eric W. Biederman wrote:
> > > > > > "Eric W. Biederman" <ebiederm@xmission.com> writes:
> > > > > > 
> > > > > > > Joel Granados <joel.granados@kernel.org> writes:
> > > > > > > 
> > > > > > > > On Sun, Jan 19, 2025 at 10:59:21PM +0800, Wen Yang wrote:
> > ...
...
> > Quick question: Do you have a systemic way of identifying these? Do you
> > have a grep or awk scripts somewhere? I'm actually very interested in
> > finding out what is the impact of this.
> > 
> 
> Thanks, we may use the following simple scripts:
> 
> - the extra {1,2} as pointers to some objects:
> $ grep "\.extra1\|\.extra2" * -R | grep -v "SYSCTL_" | grep -v "\&"
This is actually pretty nice. Thx for that. I executed it a bit
differently: 

$  git grep "\.extra1\|\.extra2" | grep -v "SYSCTL_" | grep -v "&"

I also went and did something way more complicated :). I created an
smatch check [1] and ran it on a allyes config. This gave me all of your
results except the openat2 selftests. This might be something to
consider for when this is finished to add a check so that ppl don't just
add an int or a long to a extra

Best

[1]: https://github.com/Joelgranados/smatch/tree/jag/extra_ptr



> 
> - the extra {1,2} as pointers to elements in the shared constant array:
> $ grep "\.extra1\|\.extra2" * -R | grep "SYSCTL_"
> 
> - the extra {1,2} as pointers to additional constant variables:
> $ grep "\.extra1\|\.extra2" * -R | grep "\&"
> 
> 
> --
> Best wishes,
> Wen
> 
> 
> > 
> > 
> > > 
> > > 
> > > So could we modify it in this way to make it compatible with these two
> > > situations:
> > > 
> > > @@ -137,8 +137,16 @@ struct ctl_table {
> > >          umode_t mode;
> > >          proc_handler *proc_handler;     /* Callback for text formatting */
> > >          struct ctl_table_poll *poll;
> > > -       void *extra1;
> > > -       void *extra2;
> > > +       union {
> > > +               struct {
> > > +                       void *extra1;
> > > +                       void *extra2;
> > > +               };
> > > +               struct {
> > > +                       unsigned long min;
> > > +                       unsigned long max;
> > > +               };
> > > +       };
> > >   } __randomize_layout;
> > > 
> > > 
> > > --
> > > Best wishes,
> > > Wen
> > > 
> > 

-- 

Joel Granados
Re: [PATCH v5] sysctl: simplify the min/max boundary check
Posted by Wen Yang 9 months, 1 week ago

On 2025/3/10 22:25, Joel Granados wrote:
> On Thu, Mar 06, 2025 at 09:33:39PM +0800, Wen Yang wrote:
>>
>>
>> On 2025/3/3 17:26, Joel Granados wrote:
>>> On Thu, Jan 30, 2025 at 10:32:14PM +0800, Wen Yang wrote:
>>>>
>>>>
>>>> On 2025/1/28 01:51, Eric W. Biederman wrote:
>>>>> Joel Granados <joel.granados@kernel.org> writes:
>>>>>
>>>>>> On Thu, Jan 23, 2025 at 12:30:25PM -0600, Eric W. Biederman wrote:
>>>>>>> "Eric W. Biederman" <ebiederm@xmission.com> writes:
>>>>>>>
>>>>>>>> Joel Granados <joel.granados@kernel.org> writes:
>>>>>>>>
>>>>>>>>> On Sun, Jan 19, 2025 at 10:59:21PM +0800, Wen Yang wrote:
>>> ...
> ...
>>> Quick question: Do you have a systemic way of identifying these? Do you
>>> have a grep or awk scripts somewhere? I'm actually very interested in
>>> finding out what is the impact of this.
>>>
>>
>> Thanks, we may use the following simple scripts:
>>
>> - the extra {1,2} as pointers to some objects:
>> $ grep "\.extra1\|\.extra2" * -R | grep -v "SYSCTL_" | grep -v "\&"
> This is actually pretty nice. Thx for that. I executed it a bit
> differently:
> 
> $  git grep "\.extra1\|\.extra2" | grep -v "SYSCTL_" | grep -v "&"
> 
> I also went and did something way more complicated :). I created an
> smatch check [1] and ran it on a allyes config. This gave me all of your
> results except the openat2 selftests. This might be something to
> consider for when this is finished to add a check so that ppl don't just
> add an int or a long to a extra
> 
> Best
> 
> [1]: https://github.com/Joelgranados/smatch/tree/jag/extra_ptr
> 

Thanks.
This is an excellent approach!
We will also learn to apply it in our code.

--
Best wishes,
Wen

> 
>>
>> - the extra {1,2} as pointers to elements in the shared constant array:
>> $ grep "\.extra1\|\.extra2" * -R | grep "SYSCTL_"
>>
>> - the extra {1,2} as pointers to additional constant variables:
>> $ grep "\.extra1\|\.extra2" * -R | grep "\&"
>>
>>
>> --
>> Best wishes,
>> Wen
>>
>>
>>>
>>>
>>>>
>>>>
>>>> So could we modify it in this way to make it compatible with these two
>>>> situations:
>>>>
>>>> @@ -137,8 +137,16 @@ struct ctl_table {
>>>>           umode_t mode;
>>>>           proc_handler *proc_handler;     /* Callback for text formatting */
>>>>           struct ctl_table_poll *poll;
>>>> -       void *extra1;
>>>> -       void *extra2;
>>>> +       union {
>>>> +               struct {
>>>> +                       void *extra1;
>>>> +                       void *extra2;
>>>> +               };
>>>> +               struct {
>>>> +                       unsigned long min;
>>>> +                       unsigned long max;
>>>> +               };
>>>> +       };
>>>>    } __randomize_layout;
>>>>
>>>>
>>>> --
>>>> Best wishes,
>>>> Wen
>>>>
>>>
>
Re: [PATCH v5] sysctl: simplify the min/max boundary check
Posted by Joel Granados 9 months, 3 weeks ago
On Thu, Jan 30, 2025 at 10:32:14PM +0800, Wen Yang wrote:
> 
> 
> On 2025/1/28 01:51, Eric W. Biederman wrote:
> > Joel Granados <joel.granados@kernel.org> writes:
> > 
...
> > that use extra1 or extra2 for something besides min and max.  Then
> > remove extra1 and extra2.  At the end it is simpler and requires the
> > same or a little less space.
> > 
> > That was and remains my suggestion.
> > 
> 
> Thanks for your valuable suggestions. We will continue to move forward along
> it and need your more guidance.
> 
> But there are also a few codes that do take the extra{1, 2} as pointers, for
> example:
> 
> int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
>                           proc_handler *handler)
> {
> ...
>         for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
>                 t->neigh_vars[i].data += (long) p;
>                 t->neigh_vars[i].extra1 = dev;
>                 t->neigh_vars[i].extra2 = p;
>         }
> ...
> }
> 
> static void neigh_proc_update(const struct ctl_table *ctl, int write)
> {
>         struct net_device *dev = ctl->extra1;
>         struct neigh_parms *p = ctl->extra2;
>         struct net *net = neigh_parms_net(p);
>         int index = (int *) ctl->data - p->data;
> ...
> }
> 
> 
> So could we modify it in this way to make it compatible with these two
> situations:
> 
> @@ -137,8 +137,16 @@ struct ctl_table {
>         umode_t mode;
>         proc_handler *proc_handler;     /* Callback for text formatting */
>         struct ctl_table_poll *poll;
> -       void *extra1;
> -       void *extra2;
> +       union {
> +               struct {
> +                       void *extra1;
> +                       void *extra2;
> +               };
> +               struct {
> +                       unsigned long min;
> +                       unsigned long max;
> +               };
> +       };
>  } __randomize_layout;

I'm still not convinced that a union is the best way out of this. I have
postponed reviewing this for several weeks, but I'm slowly coming back
to it.

Thx for your suggestion

Best


-- 

Joel Granados