[PATCH v2] xen/sched: fix error handling in cpu_schedule_up()

Juergen Gross posted 1 patch 2 months, 1 week ago
Patches applied successfully (tree, apply log)
git fetch https://gitlab.com/xen-project/patchew/xen tags/patchew/20240626055425.3622-1-jgross@suse.com
xen/common/sched/core.c | 63 +++++++++++++++++++++--------------------
1 file changed, 33 insertions(+), 30 deletions(-)
[PATCH v2] xen/sched: fix error handling in cpu_schedule_up()
Posted by Juergen Gross 2 months, 1 week ago
In case cpu_schedule_up() is failing, it needs to undo all externally
visible changes it has done before.

Reason is that cpu_schedule_callback() won't be called with the
CPU_UP_CANCELED notifier in case cpu_schedule_up() did fail.

Reported-by: Jan Beulich <jbeulich@suse.com>
Fixes: 207589dbacd4 ("xen/sched: move per cpu scheduler private data into struct sched_resource")
Signed-off-by: Juergen Gross <jgross@suse.com>
---
V2:
- undo changes in cpu_schedule_up() in case of failure
---
 xen/common/sched/core.c | 63 +++++++++++++++++++++--------------------
 1 file changed, 33 insertions(+), 30 deletions(-)

diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
index d84b65f197..c466711e9e 100644
--- a/xen/common/sched/core.c
+++ b/xen/common/sched/core.c
@@ -2755,6 +2755,36 @@ static struct sched_resource *sched_alloc_res(void)
     return sr;
 }
 
+static void cf_check sched_res_free(struct rcu_head *head)
+{
+    struct sched_resource *sr = container_of(head, struct sched_resource, rcu);
+
+    free_cpumask_var(sr->cpus);
+    if ( sr->sched_unit_idle )
+        sched_free_unit_mem(sr->sched_unit_idle);
+    xfree(sr);
+}
+
+static void cpu_schedule_down(unsigned int cpu)
+{
+    struct sched_resource *sr;
+
+    rcu_read_lock(&sched_res_rculock);
+
+    sr = get_sched_res(cpu);
+
+    kill_timer(&sr->s_timer);
+
+    cpumask_clear_cpu(cpu, &sched_res_mask);
+    set_sched_res(cpu, NULL);
+
+    /* Keep idle unit. */
+    sr->sched_unit_idle = NULL;
+    call_rcu(&sr->rcu, sched_res_free);
+
+    rcu_read_unlock(&sched_res_rculock);
+}
+
 static int cpu_schedule_up(unsigned int cpu)
 {
     struct sched_resource *sr;
@@ -2794,7 +2824,10 @@ static int cpu_schedule_up(unsigned int cpu)
         idle_vcpu[cpu]->sched_unit->res = sr;
 
     if ( idle_vcpu[cpu] == NULL )
+    {
+        cpu_schedule_down(cpu);
         return -ENOMEM;
+    }
 
     idle_vcpu[cpu]->sched_unit->rendezvous_in_cnt = 0;
 
@@ -2812,36 +2845,6 @@ static int cpu_schedule_up(unsigned int cpu)
     return 0;
 }
 
-static void cf_check sched_res_free(struct rcu_head *head)
-{
-    struct sched_resource *sr = container_of(head, struct sched_resource, rcu);
-
-    free_cpumask_var(sr->cpus);
-    if ( sr->sched_unit_idle )
-        sched_free_unit_mem(sr->sched_unit_idle);
-    xfree(sr);
-}
-
-static void cpu_schedule_down(unsigned int cpu)
-{
-    struct sched_resource *sr;
-
-    rcu_read_lock(&sched_res_rculock);
-
-    sr = get_sched_res(cpu);
-
-    kill_timer(&sr->s_timer);
-
-    cpumask_clear_cpu(cpu, &sched_res_mask);
-    set_sched_res(cpu, NULL);
-
-    /* Keep idle unit. */
-    sr->sched_unit_idle = NULL;
-    call_rcu(&sr->rcu, sched_res_free);
-
-    rcu_read_unlock(&sched_res_rculock);
-}
-
 void sched_rm_cpu(unsigned int cpu)
 {
     int rc;
-- 
2.35.3
Re: [PATCH v2] xen/sched: fix error handling in cpu_schedule_up()
Posted by Jan Beulich 2 months, 1 week ago
On 26.06.2024 07:54, Juergen Gross wrote:
> In case cpu_schedule_up() is failing, it needs to undo all externally
> visible changes it has done before.
> 
> Reason is that cpu_schedule_callback() won't be called with the
> CPU_UP_CANCELED notifier in case cpu_schedule_up() did fail.
> 
> Reported-by: Jan Beulich <jbeulich@suse.com>
> Fixes: 207589dbacd4 ("xen/sched: move per cpu scheduler private data into struct sched_resource")
> Signed-off-by: Juergen Gross <jgross@suse.com>

Reviewed-by: Jan Beulich <jbeulich@suse.com>
with two questions, just for my own reassurance:

> --- a/xen/common/sched/core.c
> +++ b/xen/common/sched/core.c
> @@ -2755,6 +2755,36 @@ static struct sched_resource *sched_alloc_res(void)
>      return sr;
>  }
>  
> +static void cf_check sched_res_free(struct rcu_head *head)
> +{
> +    struct sched_resource *sr = container_of(head, struct sched_resource, rcu);
> +
> +    free_cpumask_var(sr->cpus);
> +    if ( sr->sched_unit_idle )
> +        sched_free_unit_mem(sr->sched_unit_idle);
> +    xfree(sr);
> +}
> +
> +static void cpu_schedule_down(unsigned int cpu)
> +{
> +    struct sched_resource *sr;
> +
> +    rcu_read_lock(&sched_res_rculock);
> +
> +    sr = get_sched_res(cpu);
> +
> +    kill_timer(&sr->s_timer);
> +
> +    cpumask_clear_cpu(cpu, &sched_res_mask);
> +    set_sched_res(cpu, NULL);
> +
> +    /* Keep idle unit. */
> +    sr->sched_unit_idle = NULL;
> +    call_rcu(&sr->rcu, sched_res_free);
> +
> +    rcu_read_unlock(&sched_res_rculock);
> +}

Eyeballing suggests these two functions don't change at all; they're solely
being moved up?

Also, for the purpose here, use of RCU to effect the freeing isn't strictly
necessary. It's just that it also doesn't hurt doing it that way, and it
would be more code to directly free when coming from cpu_schedule_up()?

Jan
Re: [PATCH v2] xen/sched: fix error handling in cpu_schedule_up()
Posted by Jürgen Groß 2 months, 1 week ago
On 26.06.24 11:02, Jan Beulich wrote:
> On 26.06.2024 07:54, Juergen Gross wrote:
>> In case cpu_schedule_up() is failing, it needs to undo all externally
>> visible changes it has done before.
>>
>> Reason is that cpu_schedule_callback() won't be called with the
>> CPU_UP_CANCELED notifier in case cpu_schedule_up() did fail.
>>
>> Reported-by: Jan Beulich <jbeulich@suse.com>
>> Fixes: 207589dbacd4 ("xen/sched: move per cpu scheduler private data into struct sched_resource")
>> Signed-off-by: Juergen Gross <jgross@suse.com>
> 
> Reviewed-by: Jan Beulich <jbeulich@suse.com>
> with two questions, just for my own reassurance:
> 
>> --- a/xen/common/sched/core.c
>> +++ b/xen/common/sched/core.c
>> @@ -2755,6 +2755,36 @@ static struct sched_resource *sched_alloc_res(void)
>>       return sr;
>>   }
>>   
>> +static void cf_check sched_res_free(struct rcu_head *head)
>> +{
>> +    struct sched_resource *sr = container_of(head, struct sched_resource, rcu);
>> +
>> +    free_cpumask_var(sr->cpus);
>> +    if ( sr->sched_unit_idle )
>> +        sched_free_unit_mem(sr->sched_unit_idle);
>> +    xfree(sr);
>> +}
>> +
>> +static void cpu_schedule_down(unsigned int cpu)
>> +{
>> +    struct sched_resource *sr;
>> +
>> +    rcu_read_lock(&sched_res_rculock);
>> +
>> +    sr = get_sched_res(cpu);
>> +
>> +    kill_timer(&sr->s_timer);
>> +
>> +    cpumask_clear_cpu(cpu, &sched_res_mask);
>> +    set_sched_res(cpu, NULL);
>> +
>> +    /* Keep idle unit. */
>> +    sr->sched_unit_idle = NULL;
>> +    call_rcu(&sr->rcu, sched_res_free);
>> +
>> +    rcu_read_unlock(&sched_res_rculock);
>> +}
> 
> Eyeballing suggests these two functions don't change at all; they're solely
> being moved up?

Correct.

> Also, for the purpose here, use of RCU to effect the freeing isn't strictly
> necessary. It's just that it also doesn't hurt doing it that way, and it
> would be more code to directly free when coming from cpu_schedule_up()?

Yes.


Juergen