[v4] livepatch: Add using attribute to klp_func for using function

[PATCH v4 2/2] livepatch: Add using attribute to klp_func for using function show

Posted by Wardenjohn 1 year, 5 months ago

One system may contains more than one livepatch module. We can see
which patch is enabled. If some patches applied to one system
modifing the same function, livepatch will use the function enabled
on top of the function stack. However, we can not excatly know
which function of which patch is now enabling.

This patch introduce one sysfs attribute of "using" to klp_func.
For example, if there are serval patches  make changes to function
"meminfo_proc_show", the attribute "enabled" of all the patch is 1.
With this attribute, we can easily know the version enabling belongs
to which patch.

The "using" is set as three state. 0 is disabled, it means that this
version of function is not used. 1 is running, it means that this
version of function is now running. -1 is unknown, it means that
this version of function is under transition, some task is still
chaning their running version of this function.

cat /sys/kernel/livepatch/<patch1>/<object1>/<function1,sympos>/using -> 0
means that the function1 of patch1 is disabled.

cat /sys/kernel/livepatch/<patchN>/<object1>/<function1,sympos>/using -> 1
means that the function1 of patchN is enabled.

cat /sys/kernel/livepatch/<patchN>/<object1>/<function1,sympos>/using -> -1
means that the function1 of patchN is under transition and unknown.

Signed-off-by: Wardenjohn <zhangwarden@gmail.com>

diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h
index d874aecc817b..5a6bacebd66f 100644
--- a/include/linux/livepatch.h
+++ b/include/linux/livepatch.h
@@ -57,6 +57,7 @@ struct klp_ops {
  * @nop:        temporary patch to use the original code again; dyn. allocated
  * @patched:	the func has been added to the klp_ops list
  * @transition:	the func is currently being applied or reverted
+ * @using:      the func is on top of the function stack that is using
  *
  * The patched and transition variables define the func's patching state.  When
  * patching, a func is always in one of the following states:
@@ -72,6 +73,12 @@ struct klp_ops {
  *   patched=1 transition=1: patched, may be visible to some tasks
  *   patched=0 transition=1: unpatched, temporary ending state
  *   patched=0 transition=0: unpatched
+ *
+ * 'using' flag is used to show if this function is now using
+ *
+ *   using=-1 (unknown): the function is now under transition
+ *   using=1  (using):   the function is now running
+ *   using=0  (not used): the function is not used
  */
 struct klp_func {
 	/* external */
@@ -96,6 +103,7 @@ struct klp_func {
 	bool nop;
 	bool patched;
 	bool transition;
+	int using;
 };
 
 struct klp_object;
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index e4572bf34316..bc1b2085e3c5 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -349,6 +349,7 @@ int klp_apply_section_relocs(struct module *pmod, Elf_Shdr *sechdrs,
  * /sys/kernel/livepatch/<patch>/<object>
  * /sys/kernel/livepatch/<patch>/<object>/patched
  * /sys/kernel/livepatch/<patch>/<object>/<function,sympos>
+ * /sys/kernel/livepatch/<patch>/<object>/<function,sympos>/using
  */
 static int __klp_disable_patch(struct klp_patch *patch);
 
@@ -470,6 +471,22 @@ static struct attribute *klp_object_attrs[] = {
 };
 ATTRIBUTE_GROUPS(klp_object);
 
+static ssize_t using_show(struct kobject *kobj,
+				struct kobj_attribute *attr, char *buf)
+{
+	struct klp_func *func;
+
+	func = container_of(kobj, struct klp_func, kobj);
+	return sysfs_emit(buf, "%d\n", func->using);
+}
+
+static struct kobj_attribute using_kobj_attr = __ATTR_RO(using);
+static struct attribute *klp_func_attrs[] = {
+	&using_kobj_attr.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(klp_func);
+
 static void klp_free_object_dynamic(struct klp_object *obj)
 {
 	kfree(obj->name);
@@ -631,6 +648,7 @@ static void klp_kobj_release_func(struct kobject *kobj)
 static const struct kobj_type klp_ktype_func = {
 	.release = klp_kobj_release_func,
 	.sysfs_ops = &kobj_sysfs_ops,
+	.default_groups = klp_func_groups,
 };
 
 static void __klp_free_funcs(struct klp_object *obj, bool nops_only)
@@ -775,6 +793,7 @@ static int klp_init_func(struct klp_object *obj, struct klp_func *func)
 	INIT_LIST_HEAD(&func->stack_node);
 	func->patched = false;
 	func->transition = false;
+	func->using = 0;
 
 	/* The format for the sysfs directory is <function,sympos> where sympos
 	 * is the nth occurrence of this symbol in kallsyms for the patched
diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c
index 8ab9c35570f4..5138cedfcfaa 100644
--- a/kernel/livepatch/patch.c
+++ b/kernel/livepatch/patch.c
@@ -134,6 +134,7 @@ static void notrace klp_ftrace_handler(unsigned long ip,
 static void klp_unpatch_func(struct klp_func *func)
 {
 	struct klp_ops *ops;
+	struct klp_func *stack_top_func;
 
 	if (WARN_ON(!func->patched))
 		return;
@@ -160,6 +161,10 @@ static void klp_unpatch_func(struct klp_func *func)
 		kfree(ops);
 	} else {
 		list_del_rcu(&func->stack_node);
+		// the previous function is deleted, the stack top is under transition
+		stack_top_func = list_first_entry(&ops->func_stack, struct klp_func,
+							stack_node);
+		stack_top_func->using = -1;
 	}
 
 	func->patched = false;
@@ -168,6 +173,7 @@ static void klp_unpatch_func(struct klp_func *func)
 static int klp_patch_func(struct klp_func *func)
 {
 	struct klp_ops *ops;
+	struct klp_func *stack_top_func;
 	int ret;
 
 	if (WARN_ON(!func->old_func))
@@ -219,10 +225,16 @@ static int klp_patch_func(struct klp_func *func)
 
 		func->ops = ops;
 	} else {
+		// stack_top_func is going to be in transition
+		stack_top_func = list_first_entry(&ops->func_stack, struct klp_func,
+							stack_node);
+		stack_top_func->using = -1;
+		// The new patched function is the one enabling
 		list_add_rcu(&func->stack_node, &ops->func_stack);
 	}
 
 	func->patched = true;
+	func->using = -1;
 
 	return 0;
 
diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
index d9a3f9c7a93b..365dac635efe 100644
--- a/kernel/livepatch/transition.c
+++ b/kernel/livepatch/transition.c
@@ -90,8 +90,9 @@ static void klp_synchronize_transition(void)
 static void klp_complete_transition(void)
 {
 	struct klp_object *obj;
-	struct klp_func *func;
+	struct klp_func *func, *next_func, *stack_top_func;
 	struct task_struct *g, *task;
+	struct klp_ops *ops;
 	unsigned int cpu;
 
 	pr_debug("'%s': completing %s transition\n",
@@ -119,9 +120,39 @@ static void klp_complete_transition(void)
 		klp_synchronize_transition();
 	}
 
-	klp_for_each_object(klp_transition_patch, obj)
-		klp_for_each_func(obj, func)
-			func->transition = false;
+	/*
+	 * The transition patch is finished. The stack top function is now truly
+	 * running. The previous function should be set as 0 as none task is
+	 * using this function anymore.
+	 *
+	 * If this is a patching patch, all function is using.
+	 * if this patch is unpatching, all function of the func stack top is using
+	 */
+	if (klp_target_state == KLP_TRANSITION_PATCHED) {
+		klp_for_each_object(klp_transition_patch, obj) {
+			klp_for_each_func(obj, func) {
+				func->using = 1;
+				func->transition = false;
+				next_func = list_entry_rcu(func->stack_node.next,
+								struct klp_func, stack_node);
+				if (&func->stack_node != &func->ops->func_stack)
+					next_func->using = 0;
+			}
+		}
+	} else {
+		// for the unpatch func, if ops exist, the top of this func is using
+		klp_for_each_object(klp_transition_patch, obj) {
+			klp_for_each_func(obj, func) {
+				func->transition = false;
+				ops = func->ops;
+				if (ops) {
+					stack_top_func = list_first_entry(&ops->func_stack,
+							struct klp_func, stack_node);
+					stack_top_func->using = 1;
+				}
+			}
+		}
+	}
 
 	/* Prevent klp_ftrace_handler() from seeing KLP_TRANSITION_IDLE state */
 	if (klp_target_state == KLP_TRANSITION_PATCHED)
-- 
2.18.2

Re: [PATCH v4 2/2] livepatch: Add using attribute to klp_func for using function show

Posted by Miroslav Benes 1 year, 5 months ago

Hi,

On Wed, 28 Aug 2024, Wardenjohn wrote:

> One system may contains more than one livepatch module. We can see
> which patch is enabled. If some patches applied to one system
> modifing the same function, livepatch will use the function enabled
> on top of the function stack. However, we can not excatly know
> which function of which patch is now enabling.
> 
> This patch introduce one sysfs attribute of "using" to klp_func.
> For example, if there are serval patches  make changes to function
> "meminfo_proc_show", the attribute "enabled" of all the patch is 1.
> With this attribute, we can easily know the version enabling belongs
> to which patch.
> 
> The "using" is set as three state. 0 is disabled, it means that this
> version of function is not used. 1 is running, it means that this
> version of function is now running. -1 is unknown, it means that
> this version of function is under transition, some task is still
> chaning their running version of this function.
> 
> cat /sys/kernel/livepatch/<patch1>/<object1>/<function1,sympos>/using -> 0
> means that the function1 of patch1 is disabled.
> 
> cat /sys/kernel/livepatch/<patchN>/<object1>/<function1,sympos>/using -> 1
> means that the function1 of patchN is enabled.
> 
> cat /sys/kernel/livepatch/<patchN>/<object1>/<function1,sympos>/using -> -1
> means that the function1 of patchN is under transition and unknown.
> 
> Signed-off-by: Wardenjohn <zhangwarden@gmail.com>

I am not a fan. Josh wrote most of my objections already so I will not 
repeat them. I understand that the attribute might be useful but the 
amount of code it adds to sensitive functions like 
klp_complete_transition() is no fun.

Would it be possible to just use klp_transition_patch and implement the 
logic just in using_show()? I have not thought through it completely but 
klp_transition_patch is also an indicator that there is a transition going 
on. It is set to NULL only after all func->transition are false. So if you 
check that, you can assign -1 in using_show() immediately and then just 
look at the top of func_stack.

If possible (and there are corner cases everywhere. Just take a look at 
barriers in all those functions.) and the resulting code is much simpler, 
we might take it. But otherwise this should really be solved in userspace 
using some live patch management tool as Josh said. I mean generally 
because you have much more serious problems without it.

Regards,
Miroslav

Re: [PATCH v4 2/2] livepatch: Add using attribute to klp_func for using function show

Posted by Petr Mladek 1 year, 5 months ago

On Thu 2024-09-05 12:23:20, Miroslav Benes wrote:
> Hi,
> 
> On Wed, 28 Aug 2024, Wardenjohn wrote:
> 
> > One system may contains more than one livepatch module. We can see
> > which patch is enabled. If some patches applied to one system
> > modifing the same function, livepatch will use the function enabled
> > on top of the function stack. However, we can not excatly know
> > which function of which patch is now enabling.
> > 
> > This patch introduce one sysfs attribute of "using" to klp_func.
> > For example, if there are serval patches  make changes to function
> > "meminfo_proc_show", the attribute "enabled" of all the patch is 1.
> > With this attribute, we can easily know the version enabling belongs
> > to which patch.
> > 
> > The "using" is set as three state. 0 is disabled, it means that this
> > version of function is not used. 1 is running, it means that this
> > version of function is now running. -1 is unknown, it means that
> > this version of function is under transition, some task is still
> > chaning their running version of this function.
> > 
> > cat /sys/kernel/livepatch/<patch1>/<object1>/<function1,sympos>/using -> 0
> > means that the function1 of patch1 is disabled.
> > 
> > cat /sys/kernel/livepatch/<patchN>/<object1>/<function1,sympos>/using -> 1
> > means that the function1 of patchN is enabled.
> > 
> > cat /sys/kernel/livepatch/<patchN>/<object1>/<function1,sympos>/using -> -1
> > means that the function1 of patchN is under transition and unknown.
> > 
> > Signed-off-by: Wardenjohn <zhangwarden@gmail.com>
> 
> I am not a fan. Josh wrote most of my objections already so I will not 
> repeat them. I understand that the attribute might be useful but the 
> amount of code it adds to sensitive functions like 
> klp_complete_transition() is no fun.
> 
> Would it be possible to just use klp_transition_patch and implement the 
> logic just in using_show()? I have not thought through it completely but 
> klp_transition_patch is also an indicator that there is a transition going 
> on. It is set to NULL only after all func->transition are false. So if you 
> check that, you can assign -1 in using_show() immediately and then just 
> look at the top of func_stack.

The 1st patch adds the pointer to struct klp_ops into struct
klp_func. We might check the state a similar way as klp_ftrace_handler().

I had something like this in mind when I suggested to move the pointer:

static ssize_t using_show(struct kobject *kobj,
				struct kobj_attribute *attr, char *buf)
{
	struct klp_func *func, *using_func;
	struct klp_ops *ops;
	int using;

	func = container_of(kobj, struct klp_func, kobj);

	rcu_read_lock();

	if (func->transition) {
		using = -1;
		goto out;
	}

	# FIXME: This requires releasing struct klp_ops via call_rcu()
	ops = func->ops;
	if (!ops) {
		using = 0;
		goto out;
	}

	using_func = list_first_or_null_rcu(&ops->func_stack,
					struct klp_func, stack_node);
	if (func == using_func)
		using = 1;
	else
		using = 0;

out:
	rcu_read_unlock();

	return sysfs_emit(buf, "%d\n", func->using);
}

It is racy and tricky. We probably should add some memory barriers.
And maybe even the ordering of reads should be different.

We could not take klp_mutex because it might cause a deadlock when
the sysfs file gets removed. kobject_put(&func->kobj) is called
by __klp_free_funcs() under klp_mutex.

It would be easier if we could take klp_mutex. But it would require
decrementing the kobject refcout without of klp_mutex. It might
be complicated.

I am afraid that this approach is not worth the effort and
is is not the way to go.

Best Regards,
Petr

Re: [PATCH v4 2/2] livepatch: Add using attribute to klp_func for using function show

Posted by zhang warden 1 year, 5 months ago

Hi, Petr
> 
> The 1st patch adds the pointer to struct klp_ops into struct
> klp_func. We might check the state a similar way as klp_ftrace_handler().
> 
> I had something like this in mind when I suggested to move the pointer:
> 
> static ssize_t using_show(struct kobject *kobj,
> struct kobj_attribute *attr, char *buf)
> {
> struct klp_func *func, *using_func;
> struct klp_ops *ops;
> int using;
> 
> func = container_of(kobj, struct klp_func, kobj);
> 
> rcu_read_lock();
> 
> if (func->transition) {
> using = -1;
> goto out;
> }
> 
> # FIXME: This requires releasing struct klp_ops via call_rcu()
> ops = func->ops;
> if (!ops) {
> using = 0;
> goto out;
> }
> 
> using_func = list_first_or_null_rcu(&ops->func_stack,
> struct klp_func, stack_node);
> if (func == using_func)
> using = 1;
> else
> using = 0;
> 
> out:
> rcu_read_unlock();
> 
> return sysfs_emit(buf, "%d\n", func->using);
> }

Bravo, I also have something like this in mind when Miroslav suggested to implement the logic in using_show. 

> 
> It is racy and tricky. We probably should add some memory barriers.
> And maybe even the ordering of reads should be different.
> 
> We could not take klp_mutex because it might cause a deadlock when
> the sysfs file gets removed. kobject_put(&func->kobj) is called
> by __klp_free_funcs() under klp_mutex.
> 
> It would be easier if we could take klp_mutex. But it would require
> decrementing the kobject refcout without of klp_mutex. It might
> be complicated.
> 
> I am afraid that this approach is not worth the effort and
> is is not the way to go.
> 

But I don't think I've thought as deeply as you do and may not have considered the possible risks. Therefore, I do need your help to make my patch perfect.  ^_^

Thank you.
Wardenjohn.

Re: [PATCH v4 2/2] livepatch: Add using attribute to klp_func for using function show

Posted by Petr Mladek 1 year, 5 months ago

On Sun 2024-09-08 10:51:14, zhang warden wrote:
> 
> Hi, Petr
> > 
> > The 1st patch adds the pointer to struct klp_ops into struct
> > klp_func. We might check the state a similar way as klp_ftrace_handler().
> > 
> > I had something like this in mind when I suggested to move the pointer:
> > 
> > static ssize_t using_show(struct kobject *kobj,
> > struct kobj_attribute *attr, char *buf)
> > {
> > struct klp_func *func, *using_func;
> > struct klp_ops *ops;
> > int using;
> > 
> > func = container_of(kobj, struct klp_func, kobj);
> > 
> > rcu_read_lock();
> > 
> > if (func->transition) {
> > using = -1;
> > goto out;
> > }
> > 
> > # FIXME: This requires releasing struct klp_ops via call_rcu()

This would require adding "struct rcu_head" into "struct klp_ops",
like:

struct klp_ops {
	struct list_head func_stack;
	struct ftrace_ops fops;
	struct rcu_head rcu;
};

and then freeing the structure using kfree_rcu():

diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c
index 90408500e5a3..f096dd9390d2 100644
--- a/kernel/livepatch/patch.c
+++ b/kernel/livepatch/patch.c
@@ -149,7 +149,7 @@ static void klp_unpatch_func(struct klp_func *func)
 
 		list_del_rcu(&func->stack_node);
 		list_del(&ops->node);
-		kfree(ops);
+		kfree_rcu(ops, rcu);
 	} else {
 		list_del_rcu(&func->stack_node);
 	}
@@ -223,7 +223,7 @@ static int klp_patch_func(struct klp_func *func)
 err:
 	list_del_rcu(&func->stack_node);
 	list_del(&ops->node);
-	kfree(ops);
+	kfree_rcu(ops, rcu);
 	return ret;
 }

With this the function should be safe against accessing an invalid
pointer.

> > ops = func->ops;
> > if (!ops) {
> > using = 0;
> > goto out;
> > }
> > 
> > using_func = list_first_or_null_rcu(&ops->func_stack,
> > struct klp_func, stack_node);
> > if (func == using_func)
> > using = 1;
> > else
> > using = 0;
> > 
> > out:
> > rcu_read_unlock();
> > 
> > return sysfs_emit(buf, "%d\n", func->using);
> > }

But the function is still not correct according the order of reading.
A more correct solution would be something like:

static ssize_t using_show(struct kobject *kobj,
				struct kobj_attribute *attr, char *buf)
{
	struct klp_func *func, *using_func;
	struct klp_ops *ops;
	int using;

	func = container_of(kobj, struct klp_func, kobj);

	rcu_read_lock();

	/* This livepatch is used when the function is on top of the stack. */
	ops = func->ops;
	if (ops) {
		using_func = list_first_or_null_rcu(&ops->func_stack,
						struct klp_func, stack_node);
		if (func == using_func)
			using = 1;
		else
			using = 0;
	}

	/*
	 * The function stack gives the right information only when there
	 * is no transition in progress.
	 *
	 * Make sure that we see the updated ops->func_stack when
	 * func->transition is cleared. This matches with:
	 *
	 * The write barrier in  __klp_enable_patch() between
	 * klp_init_transition() and klp_patch_object().
	 *
	 * The write barrier in  __klp_disable_patch() between
	 * klp_init_transition() and klp_start_transition().
	 *
	 * The write barrier in klp_complete_transition()
	 * between klp_unpatch_objects() and func->transition = false.
	 */
	smp_rmb();

	if (func->transition)
		using = -1;

	rcu_read_unlock();

	return sysfs_emit(buf, "%d\n", func->using);
}

Now, the question is whether we want to maintain such a barrier. Any
lockless access and barrier adds a maintenance burden.

You might try to put the above into a patch see what others tell
about it.

Best Regards,
Petr

Re: [PATCH v4 2/2] livepatch: Add using attribute to klp_func for using function show

Posted by zhang warden 1 year, 5 months ago

Hi, Petr

> On Sep 10, 2024, at 16:01, Petr Mladek <pmladek@suse.com> wrote:
> 
> On Sun 2024-09-08 10:51:14, zhang warden wrote:
>> 
>> Hi, Petr
>>> 
>>> The 1st patch adds the pointer to struct klp_ops into struct
>>> klp_func. We might check the state a similar way as klp_ftrace_handler().
>>> 
>>> I had something like this in mind when I suggested to move the pointer:
>>> 
>>> static ssize_t using_show(struct kobject *kobj,
>>> struct kobj_attribute *attr, char *buf)
>>> {
>>> struct klp_func *func, *using_func;
>>> struct klp_ops *ops;
>>> int using;
>>> 
>>> func = container_of(kobj, struct klp_func, kobj);
>>> 
>>> rcu_read_lock();
>>> 
>>> if (func->transition) {
>>> using = -1;
>>> goto out;
>>> }
>>> 
>>> # FIXME: This requires releasing struct klp_ops via call_rcu()
> 
> This would require adding "struct rcu_head" into "struct klp_ops",
> like:
> 
> struct klp_ops {
> struct list_head func_stack;
> struct ftrace_ops fops;
> struct rcu_head rcu;
> };
> 
> and then freeing the structure using kfree_rcu():
> 
> diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c
> index 90408500e5a3..f096dd9390d2 100644
> --- a/kernel/livepatch/patch.c
> +++ b/kernel/livepatch/patch.c
> @@ -149,7 +149,7 @@ static void klp_unpatch_func(struct klp_func *func)
> 
> list_del_rcu(&func->stack_node);
> list_del(&ops->node);
> - kfree(ops);
> + kfree_rcu(ops, rcu);
> } else {
> list_del_rcu(&func->stack_node);
> }
> @@ -223,7 +223,7 @@ static int klp_patch_func(struct klp_func *func)
> err:
> list_del_rcu(&func->stack_node);
> list_del(&ops->node);
> - kfree(ops);
> + kfree_rcu(ops, rcu);
> return ret;
> }
> 
> With this the function should be safe against accessing an invalid
> pointer.
> 
>>> ops = func->ops;
>>> if (!ops) {
>>> using = 0;
>>> goto out;
>>> }
>>> 
>>> using_func = list_first_or_null_rcu(&ops->func_stack,
>>> struct klp_func, stack_node);
>>> if (func == using_func)
>>> using = 1;
>>> else
>>> using = 0;
>>> 
>>> out:
>>> rcu_read_unlock();
>>> 
>>> return sysfs_emit(buf, "%d\n", func->using);
>>> }
> 
> But the function is still not correct according the order of reading.
> A more correct solution would be something like:
> 
> static ssize_t using_show(struct kobject *kobj,
> struct kobj_attribute *attr, char *buf)
> {
> struct klp_func *func, *using_func;
> struct klp_ops *ops;
> int using;
> 
> func = container_of(kobj, struct klp_func, kobj);
> 
> rcu_read_lock();
> 
> /* This livepatch is used when the function is on top of the stack. */
> ops = func->ops;
> if (ops) {
> using_func = list_first_or_null_rcu(&ops->func_stack,
> struct klp_func, stack_node);
> if (func == using_func)
> using = 1;
> else
> using = 0;
> }
> 
> /*
>  * The function stack gives the right information only when there
>  * is no transition in progress.
>  *
>  * Make sure that we see the updated ops->func_stack when
>  * func->transition is cleared. This matches with:
>  *
>  * The write barrier in  __klp_enable_patch() between
>  * klp_init_transition() and klp_patch_object().
>  *
>  * The write barrier in  __klp_disable_patch() between
>  * klp_init_transition() and klp_start_transition().
>  *
>  * The write barrier in klp_complete_transition()
>  * between klp_unpatch_objects() and func->transition = false.
>  */
> smp_rmb();
> 
> if (func->transition)
> using = -1;
> 
> rcu_read_unlock();
> 
> return sysfs_emit(buf, "%d\n", func->using);
> }
> 
> Now, the question is whether we want to maintain such a barrier. Any
> lockless access and barrier adds a maintenance burden.
> 
> You might try to put the above into a patch see what others tell
> about it.
> 
> Best Regards,
> Petr

After the previous discussion, it seems that patch-level "order" feature
 is the more acceptable way to fulfill the patch order information.

Therefore, I am trying to go that way instead of adding "using" into klp_func.

Maybe patch-level interface will bring less maintenance burden.

Regards,
Wardenjohn.

Re: [PATCH v4 2/2] livepatch: Add using attribute to klp_func for using function show

Posted by Josh Poimboeuf 1 year, 5 months ago

On Thu, Sep 05, 2024 at 12:23:20PM +0200, Miroslav Benes wrote:
> I am not a fan. Josh wrote most of my objections already so I will not 
> repeat them. I understand that the attribute might be useful but the 
> amount of code it adds to sensitive functions like 
> klp_complete_transition() is no fun.
> 
> Would it be possible to just use klp_transition_patch and implement the 
> logic just in using_show()?

Yes, containing the logic to the sysfs file sounds a lot better.

> I have not thought through it completely but 
> klp_transition_patch is also an indicator that there is a transition going 
> on. It is set to NULL only after all func->transition are false. So if you 
> check that, you can assign -1 in using_show() immediately and then just 
> look at the top of func_stack.

sysfs already has per-patch 'transition' and 'enabled' files so I don't
like duplicating that information.

The only thing missing is the patch stack order.  How about a simple
per-patch file which indicates that?

  /sys/kernel/livepatch/<patchA>/order => 1
  /sys/kernel/livepatch/<patchB>/order => 2

The implementation should be trivial with the use of
klp_for_each_patch() to count the patches.

-- 
Josh

Re: [PATCH v4 2/2] livepatch: Add using attribute to klp_func for using function show

Posted by zhang warden 1 year, 5 months ago

Hi, John & Miroslav

>> 
>> Would it be possible to just use klp_transition_patch and implement the 
>> logic just in using_show()?
> 
> Yes, containing the logic to the sysfs file sounds a lot better.

Maybe I can try to use the state of klp_transition_patch to update the function's state instead of changing code in klp_complete_transition?

> 
>> I have not thought through it completely but 
>> klp_transition_patch is also an indicator that there is a transition going 
>> on. It is set to NULL only after all func->transition are false. So if you 
>> check that, you can assign -1 in using_show() immediately and then just 
>> look at the top of func_stack.
> 
> sysfs already has per-patch 'transition' and 'enabled' files so I don't
> like duplicating that information.
> 
> The only thing missing is the patch stack order.  How about a simple
> per-patch file which indicates that?
> 
>  /sys/kernel/livepatch/<patchA>/order => 1
>  /sys/kernel/livepatch/<patchB>/order => 2
> 
> The implementation should be trivial with the use of
> klp_for_each_patch() to count the patches.
> 
I think this is the second solution. It seems that adding an interface to patch level is an acceptable way. And if patch order is provided in /sys/kernel/livepatch/<patchA>/order, we should make a user space tool to calculate the function that is activate in the system. From my point to the original problem, it is more look like a workaround.

> journalctl -b ?
> Store the state in a file in /var/run?

Getting patch order from journal is the way I think not reliable. In fact, I don't recommend to get patch order in that way. ^_^

Thanks!
Wardenjohn

Re: [PATCH v4 2/2] livepatch: Add using attribute to klp_func for using function show

Posted by Petr Mladek 1 year, 5 months ago

On Fri 2024-09-06 17:39:46, zhang warden wrote:
> Hi, John & Miroslav
> 
> >> 
> >> Would it be possible to just use klp_transition_patch and implement the 
> >> logic just in using_show()?
> > 
> > Yes, containing the logic to the sysfs file sounds a lot better.
> 
> Maybe I can try to use the state of klp_transition_patch to update the function's state instead of changing code in klp_complete_transition?
> 
> > 
> >> I have not thought through it completely but 
> >> klp_transition_patch is also an indicator that there is a transition going 
> >> on. It is set to NULL only after all func->transition are false. So if you 
> >> check that, you can assign -1 in using_show() immediately and then just 
> >> look at the top of func_stack.
> > 
> > sysfs already has per-patch 'transition' and 'enabled' files so I don't
> > like duplicating that information.
> > 
> > The only thing missing is the patch stack order.  How about a simple
> > per-patch file which indicates that?
> > 
> >  /sys/kernel/livepatch/<patchA>/order => 1
> >  /sys/kernel/livepatch/<patchB>/order => 2
> > 
> > The implementation should be trivial with the use of
> > klp_for_each_patch() to count the patches.
> > 
> I think this is the second solution. It seems that adding an
> interface to patch level is an acceptable way.

It seems to be the only acceptable idea at the moment.

> And if patch order
> is provided in /sys/kernel/livepatch/<patchA>/order, we should
> make a user space tool to calculate the function that
> is activate in the system. From my point to the original
> problem, it is more look like a workaround.

It is always a compromise between the complexity and the benefit.
Upstream will accept only changes which are worth it.

Here, the main problem is that you do not have coordinated developement
and installation of livepatches. This is dangerous and you should
not do it! Upstream will never support such a wild approach.

You could get upstream some features which would make your life
easier. But the features must be worth the effort.

Best Regards,
Petr

Re: [PATCH v4 2/2] livepatch: Add using attribute to klp_func for using function show

Posted by zhang warden 1 year, 5 months ago

Hi, Petr

> On Sep 7, 2024, at 00:39, Petr Mladek <pmladek@suse.com> wrote:
> 
> On Fri 2024-09-06 17:39:46, zhang warden wrote:
>> Hi, John & Miroslav
>> 
>>>> 
>>>> Would it be possible to just use klp_transition_patch and implement the 
>>>> logic just in using_show()?
>>> 
>>> Yes, containing the logic to the sysfs file sounds a lot better.
>> 
>> Maybe I can try to use the state of klp_transition_patch to update the function's state instead of changing code in klp_complete_transition?
>> 
>>> 
>>>> I have not thought through it completely but 
>>>> klp_transition_patch is also an indicator that there is a transition going 
>>>> on. It is set to NULL only after all func->transition are false. So if you 
>>>> check that, you can assign -1 in using_show() immediately and then just 
>>>> look at the top of func_stack.
>>> 
>>> sysfs already has per-patch 'transition' and 'enabled' files so I don't
>>> like duplicating that information.
>>> 
>>> The only thing missing is the patch stack order.  How about a simple
>>> per-patch file which indicates that?
>>> 
>>> /sys/kernel/livepatch/<patchA>/order => 1
>>> /sys/kernel/livepatch/<patchB>/order => 2
>>> 
>>> The implementation should be trivial with the use of
>>> klp_for_each_patch() to count the patches.
>>> 
>> I think this is the second solution. It seems that adding an
>> interface to patch level is an acceptable way.
> 
> It seems to be the only acceptable idea at the moment.
> 
>> And if patch order
>> is provided in /sys/kernel/livepatch/<patchA>/order, we should
>> make a user space tool to calculate the function that
>> is activate in the system. From my point to the original
>> problem, it is more look like a workaround.
> 
> It is always a compromise between the complexity and the benefit.
> Upstream will accept only changes which are worth it.
> 
> Here, the main problem is that you do not have coordinated developement
> and installation of livepatches. This is dangerous and you should
> not do it! Upstream will never support such a wild approach.
> 
> You could get upstream some features which would make your life
> easier. But the features must be worth the effort.

Yep, I have the same idea as you here. The problem we face now as Josh describes, livepatch now missing the information of patch order. If we can have it, the rest information user need can be processed my the original information from the kernel.

My intention to introduce "using" feature is to solve the missing info of livepatch, facing the original problem I met. But as livepatch becomes more and more widely used, this is a real problem that may be widely encountered.

If maintainer thinks the way changing klp_transition_patch is not worth it. Maybe I can try another way to fix this problem. ( For example, the patch-level order interface?) :) 

From my point of view, this information have its worth it provide, what we need to consider is how to implement this function specifically. Do you agree with me?

Best Regards,
Wardenjohn.

Re: [PATCH v4 2/2] livepatch: Add using attribute to klp_func for using function show

Posted by Miroslav Benes 1 year, 5 months ago

On Thu, 5 Sep 2024, Josh Poimboeuf wrote:

> On Thu, Sep 05, 2024 at 12:23:20PM +0200, Miroslav Benes wrote:
> > I am not a fan. Josh wrote most of my objections already so I will not 
> > repeat them. I understand that the attribute might be useful but the 
> > amount of code it adds to sensitive functions like 
> > klp_complete_transition() is no fun.
> > 
> > Would it be possible to just use klp_transition_patch and implement the 
> > logic just in using_show()?
> 
> Yes, containing the logic to the sysfs file sounds a lot better.
> 
> > I have not thought through it completely but 
> > klp_transition_patch is also an indicator that there is a transition going 
> > on. It is set to NULL only after all func->transition are false. So if you 
> > check that, you can assign -1 in using_show() immediately and then just 
> > look at the top of func_stack.
> 
> sysfs already has per-patch 'transition' and 'enabled' files so I don't
> like duplicating that information.
> 
> The only thing missing is the patch stack order.  How about a simple
> per-patch file which indicates that?
> 
>   /sys/kernel/livepatch/<patchA>/order => 1
>   /sys/kernel/livepatch/<patchB>/order => 2
> 
> The implementation should be trivial with the use of
> klp_for_each_patch() to count the patches.

Yes, it should work as well.

Wardenjohn, you should then get all the information that you need. Also, 
please test your patches with livepatch kselftests before a submission 
next time. New sysfs attributes need to be documented in 
Documentation/ABI/testing/sysfs-kernel-livepatch and there should be a new 
kselftest for them.

Thank you,
Miroslav

Re: [PATCH v4 2/2] livepatch: Add using attribute to klp_func for using function show

Posted by zhang warden 1 year, 5 months ago

Hi Miroslav,
> 
> 
> I am not a fan. Josh wrote most of my objections already so I will not 
> repeat them. I understand that the attribute might be useful but the 
> amount of code it adds to sensitive functions like 
> klp_complete_transition() is no fun.
> 
OK, the point I make changes to klp_complete_transition is that when a transition is going to be complete, we can make sure the function state can go to an end state (0 or 1), which is the most easy way to do so...lol...

> Would it be possible to just use klp_transition_patch and implement the 
> logic just in using_show()? I have not thought through it completely but 
> klp_transition_patch is also an indicator that there is a transition going 
> on. It is set to NULL only after all func->transition are false. So if you 
> check that, you can assign -1 in using_show() immediately and then just 
> look at the top of func_stack.
> 

I will consider it later. If you have any suggestions or other solutions, please let me know.

> If possible (and there are corner cases everywhere. Just take a look at 
> barriers in all those functions.) and the resulting code is much simpler, 
> we might take it. But otherwise this should really be solved in userspace 
> using some live patch management tool as Josh said. I mean generally 
> because you have much more serious problems without it.
> 

I replied to Josh to explain my reason of not using user space tools to maintain livepatch information. Of cause, I put my patch here and tell you the problem I am facing, maybe there some people may face the same problem as me...hah...

We can discuss it, if you have a better idea for that patch, please fell free to tell me.

Also, I forgot to sign at the end of the email I sent Josh, I'm sorry...

Thanks.
Wardenjohn.

Re: [PATCH v4 2/2] livepatch: Add using attribute to klp_func for using function show

Posted by Josh Poimboeuf 1 year, 5 months ago

On Wed, Aug 28, 2024 at 10:23:50AM +0800, Wardenjohn wrote:
> One system may contains more than one livepatch module. We can see
> which patch is enabled. If some patches applied to one system
> modifing the same function, livepatch will use the function enabled
> on top of the function stack. However, we can not excatly know
> which function of which patch is now enabling.
> 
> This patch introduce one sysfs attribute of "using" to klp_func.
> For example, if there are serval patches  make changes to function
> "meminfo_proc_show", the attribute "enabled" of all the patch is 1.
> With this attribute, we can easily know the version enabling belongs
> to which patch.
> 
> The "using" is set as three state. 0 is disabled, it means that this
> version of function is not used. 1 is running, it means that this
> version of function is now running. -1 is unknown, it means that
> this version of function is under transition, some task is still
> chaning their running version of this function.

I'm missing how this is actually useful in the real world.  It feels
like a solution in search of a problem.  And it adds significant
maintenance burden.  Why?

Do you not have any control over what order your patches are applied?
If not, that sounds dangerous and you have much bigger problems.

This "problem" needs to be managed in user space.

-- 
Josh

Re: [PATCH v4 2/2] livepatch: Add using attribute to klp_func for using function show

Posted by zhang warden 1 year, 5 months ago

> On Sep 4, 2024, at 12:48, Josh Poimboeuf <jpoimboe@kernel.org> wrote:
> 
> On Wed, Aug 28, 2024 at 10:23:50AM +0800, Wardenjohn wrote:
>> One system may contains more than one livepatch module. We can see
>> which patch is enabled. If some patches applied to one system
>> modifing the same function, livepatch will use the function enabled
>> on top of the function stack. However, we can not excatly know
>> which function of which patch is now enabling.
>> 
>> This patch introduce one sysfs attribute of "using" to klp_func.
>> For example, if there are serval patches  make changes to function
>> "meminfo_proc_show", the attribute "enabled" of all the patch is 1.
>> With this attribute, we can easily know the version enabling belongs
>> to which patch.
>> 
>> The "using" is set as three state. 0 is disabled, it means that this
>> version of function is not used. 1 is running, it means that this
>> version of function is now running. -1 is unknown, it means that
>> this version of function is under transition, some task is still
>> chaning their running version of this function.
> 
> I'm missing how this is actually useful in the real world.  It feels
> like a solution in search of a problem.  And it adds significant
> maintenance burden.  Why?
> 
> Do you not have any control over what order your patches are applied?
> If not, that sounds dangerous and you have much bigger problems.
> 
> This "problem" needs to be managed in user space.
> 
> -- 
> Josh

Hi, Josh.

First, introducing feature "using" is just a state of one klp_function is using or not in the system.
I think this state will not bring significant maintenance burden, right?

And then, this feature can tell user that which function is now running in this system. As we know, livepatch enable many patches for one function, and the stack top function of target function is the "using" function. Here will bring us some questions: 
1. Since we patch some patches to one function, which version of this function A is my system exactly now using? 
2. One patch may contains many function fixes, the "using" version of target function belongs to which patch now? 
Livepatch now cannot tell us this information, although livepatch can do it.

In the scenario where multiple people work together to maintain the same system, or a long time running system, the patch sets will inevitably be cumulative. I think kernel can maintain and tell user this most accurate information by one sysfs interface.

A real case I met have been shown in the previous mail to explain the use in real world.

At the process I introduce this feature, we found that the function of klp_find_ops can be optimized. If we can move klp_ops structure into klp_func, we don't need to maintain one global list again. And if we want to find an klp_ops, we just need to get the klp_func (which we will already have). This will be a nice efficiency improvement.

Regards.
Wardenjohn.

Re: [PATCH v4 2/2] livepatch: Add using attribute to klp_func for using function show

Posted by Josh Poimboeuf 1 year, 5 months ago

On Wed, Sep 04, 2024 at 02:34:44PM +0800, zhang warden wrote:
> In the scenario where multiple people work together to maintain the
> same system, or a long time running system, the patch sets will
> inevitably be cumulative. I think kernel can maintain and tell user
> this most accurate information by one sysfs interface.

If there are multiple people applying patches independently from each
other (to the same function even!), you are playing with fire, as there
could easily be implicit dependencies and conflicts between the patches.

-- 
Josh

Re: [PATCH v4 2/2] livepatch: Add using attribute to klp_func for using function show

Posted by zhang warden 1 year, 5 months ago

> On Sep 4, 2024, at 15:14, Josh Poimboeuf <jpoimboe@kernel.org> wrote:
> 
> On Wed, Sep 04, 2024 at 02:34:44PM +0800, zhang warden wrote:
>> In the scenario where multiple people work together to maintain the
>> same system, or a long time running system, the patch sets will
>> inevitably be cumulative. I think kernel can maintain and tell user
>> this most accurate information by one sysfs interface.
> 
> If there are multiple people applying patches independently from each
> other (to the same function even!), you are playing with fire, as there
> could easily be implicit dependencies and conflicts between the patches.
> 
> 
Yep, I agree with you. This is not a good practice.

But we can work further, livepatch can tell which function is now running. This feature can do more than that.

Afterall, users alway want to know if their newly patched function successfully enabled and using to fix the bug-existed kernel function.

With this feature, user can confirm their patch is successfully running instead of using crash to look into /proc/kcore to make sure this function is running now. (I always use this method to check my function patched ... lol).

And I think further, if we use kpatch-build[1], `kpatch list` can not only tell us which patch is enabled, but also tell us the relationship between running function and patched module.
I think this is an exciting feature...

[1] https://github.com/dynup/kpatch.git

Re: [PATCH v4 2/2] livepatch: Add using attribute to klp_func for using function show

Posted by Josh Poimboeuf 1 year, 5 months ago

On Wed, Sep 04, 2024 at 03:30:22PM +0800, zhang warden wrote:
> > On Sep 4, 2024, at 15:14, Josh Poimboeuf <jpoimboe@kernel.org> wrote:
> > If there are multiple people applying patches independently from each
> > other (to the same function even!), you are playing with fire, as there
> > could easily be implicit dependencies and conflicts between the patches.
> > 
> > 
> Yep, I agree with you. This is not a good practice.
> 
> But we can work further, livepatch can tell which function is now running. This feature can do more than that.
> 
> Afterall, users alway want to know if their newly patched function successfully enabled and using to fix the bug-existed kernel function.
> 
> With this feature, user can confirm their patch is successfully running instead of using crash to look into /proc/kcore to make sure this function is running now. (I always use this method to check my function patched ... lol).
> 
> And I think further, if we use kpatch-build[1], `kpatch list` can not only tell us which patch is enabled, but also tell us the relationship between running function and patched module.
> I think this is an exciting feature...

Most of this information is already available in sysfs, with the
exception of patch stacking order.

Every new feature adds maintenance burden.  It might not seem like much
to you, but the features add up, and livepatch needs to be solid.

We want patches that fix real world, tangible problems, not theoretical
problems that it *might* solve for a hypothetical user.

What is the motiviation behind this patch?  What real world problem does
it fix for you, or an actual user?  Have you considered other solutions,
like more organized patch management in user space?

-- 
Josh

Re: [PATCH v4 2/2] livepatch: Add using attribute to klp_func for using function show

Posted by zhang warden 1 year, 5 months ago

Hi, Josh.
> Most of this information is already available in sysfs, with the
> exception of patch stacking order.
> 
Well, this is the problem my patch want to fix. But my patch is more simpler, it just shows the stack top of the target function, which is the only thing users care.

> 
> We want patches that fix real world, tangible problems, not theoretical
> problems that it *might* solve for a hypothetical user.
> 
> What is the motiviation behind this patch?  What real world problem does
> it fix for you, or an actual user?  

Here I can give you an example as I previous described:

>>Here I can give you an example.
We are going to fix a problem of io_uring.
Our team made a livepatch of io_sq_offload_create.
This livepatch module is deployed to some running servers.

Then, another team make some change to the same function and deployed it to the same cluster.

Finally, they found that there are some livepatch module modifying the same function io_sq_offload_create. But none of them can tell which version of io_sq_offload_create is now exactly running in the system.

We can only use crash to debug /proc/kcore to see if we can get more information from the kcore.

If livepatch can tell which version of the function is now running or going to run, it will be very useful.

>>>>>>
What's more, the scenario we easily face is that for the confidential environment, the system maintenance mainly depends on SREs. Different team may do bug fix or performance optimization to kernel function. 

Here usually some SREs comes to me and ask me how to make sure which version is now actually active because tow teams make tow livepatch modules, both of them make changes to one function. 

He wants to know if his system is under risk, he want the system run the right version of the function because one module is a bug fix and the other is just a performance optimization module, at this time, the bug fix version is much more important. dmesg is too long, he find it hard to find out the patch order from dmesg.

With this patch, he can just cat /sys/kernel/livepatch/<module>/<object>/<function>/using and get his answer.

> Have you considered other solutions,
> like more organized patch management in user space?

User space solutions seems unreliable. What we need is just the enabling version of target function. The order of livepatch module enable mainly from dmesg, which is easily flush away or being cleaned.

If we use an user space program to maintain the information of patch order, once the program is killed, the information is loss either.

Neither of the previous user space solutions seems reliable. Only kernel space will no one can change it. And it is the most accurate.

Re: [PATCH v4 2/2] livepatch: Add using attribute to klp_func for using function show

Posted by Josh Poimboeuf 1 year, 5 months ago

On Thu, Sep 05, 2024 at 10:03:52PM +0800, zhang warden wrote:
> >>Here I can give you an example.
> We are going to fix a problem of io_uring.
> Our team made a livepatch of io_sq_offload_create.
> This livepatch module is deployed to some running servers.
> 
> Then, another team make some change to the same function and deployed it to the same cluster.
> 
> Finally, they found that there are some livepatch module modifying the same function io_sq_offload_create. But none of them can tell which version of io_sq_offload_create is now exactly running in the system.
> 
> We can only use crash to debug /proc/kcore to see if we can get more information from the kcore.
> 
> If livepatch can tell which version of the function is now running or going to run, it will be very useful.
> 
> >>>>>>
> What's more, the scenario we easily face is that for the confidential environment, the system maintenance mainly depends on SREs. Different team may do bug fix or performance optimization to kernel function. 
> 
> Here usually some SREs comes to me and ask me how to make sure which version is now actually active because tow teams make tow livepatch modules, both of them make changes to one function. 
> 
> He wants to know if his system is under risk, he want the system run the right version of the function because one module is a bug fix and the other is just a performance optimization module, at this time, the bug fix version is much more important. dmesg is too long, he find it hard to find out the patch order from dmesg.
> 
> With this patch, he can just cat /sys/kernel/livepatch/<module>/<object>/<function>/using and get his answer.

Thanks for the details, and sorry if I missed it before.  This would
have been helpful to have in the cover letter.

But again I want to stress that livepatching should be done with care.
Having different teams applying patches without coordination is not
recommended.  The teams' processes and/or tooling really need to be
improved.

> > Have you considered other solutions,
> > like more organized patch management in user space?
> 
> User space solutions seems unreliable. What we need is just the enabling version of target function. The order of livepatch module enable mainly from dmesg, which is easily flush away or being cleaned.

journalctl -b ?

> If we use an user space program to maintain the information of patch order, once the program is killed, the information is loss either.

Store the state in a file in /var/run?

-- 
Josh

Re: [PATCH v4 2/2] livepatch: Add using attribute to klp_func for using function show

Posted by zhang warden 1 year, 5 months ago


> On Aug 28, 2024, at 10:23, Wardenjohn <zhangwarden@gmail.com> wrote:
> 
> One system may contains more than one livepatch module. We can see
> which patch is enabled. If some patches applied to one system
> modifing the same function, livepatch will use the function enabled
> on top of the function stack. However, we can not excatly know
> which function of which patch is now enabling.
> 
> This patch introduce one sysfs attribute of "using" to klp_func.
> For example, if there are serval patches  make changes to function
> "meminfo_proc_show", the attribute "enabled" of all the patch is 1.
> With this attribute, we can easily know the version enabling belongs
> to which patch.
> 
> The "using" is set as three state. 0 is disabled, it means that this
> version of function is not used. 1 is running, it means that this
> version of function is now running. -1 is unknown, it means that
> this version of function is under transition, some task is still
> chaning their running version of this function.
> 
> cat /sys/kernel/livepatch/<patch1>/<object1>/<function1,sympos>/using -> 0
> means that the function1 of patch1 is disabled.
> 
> cat /sys/kernel/livepatch/<patchN>/<object1>/<function1,sympos>/using -> 1
> means that the function1 of patchN is enabled.
> 
> cat /sys/kernel/livepatch/<patchN>/<object1>/<function1,sympos>/using -> -1
> means that the function1 of patchN is under transition and unknown.
> 
> Signed-off-by: Wardenjohn <zhangwarden@gmail.com>
> 
> 

Hi, Maintainers.

How about this patch? I am looking for your suggestions.

Regards,
Wardenjohn.