This is nasty.
https://gitlab.com/xen-project/people/andyhhp/xen/-/jobs/9137008215
When preprocessed, we get:
diff --git a/xen/common/sched/credit2.c b/xen/common/sched/credit2.c
index 0a83f237259f..6b8d3660240a 100644
--- a/xen/common/sched/credit2.c
+++ b/xen/common/sched/credit2.c
@@ -958,7 +958,28 @@ cpu_add_to_runqueue(const struct scheduler *ops,
unsigned int cpu)
write_lock_irqsave(&prv->lock, flags);
rqd_ins = &prv->rql;
+
+#if 0
list_for_each_entry ( rqd, &prv->rql, rql )
+#else
+ for ( (rqd) = ({
+ typeof(((typeof(*(rqd)) *)((void*)0))->rql) *__mptr =
+ ((&prv->rql)->next);
+ (typeof(*(rqd)) *)
+ ((char *)__mptr -
+ __builtin_offsetof(typeof(*(rqd)),rql) );
+ });
+ &(rqd)->rql != // <-- problem expression
+ (&prv->rql);
+ (rqd) = ({
+ typeof(((typeof(*(rqd)) *)((void*)0))->rql) *__mptr =
+ ((rqd)->rql.next);
+ (typeof(*(rqd)) *)
+ ((char *)__mptr -
+ __builtin_offsetof(typeof(*(rqd)),rql) );
+ })
+ )
+#endif
{
/* Remember first unused queue index. */
if ( !rqi_unused && rqd->id > rqi )
The alignment of csched2_runqueue_data is 8, while csched2_private is 4.
priv's list_head for rql is at +28 (+0x1c), and list_for_each_entry()
performs a buggily-typed container_of(), treating a csched2_private as
if it were csched2_runqueue_data.
It functions because it's only an address equality check, but it's also
why UBSAN objects.
This seems to fix the issue:
diff --git a/xen/common/sched/credit2.c b/xen/common/sched/credit2.c
index 6b8d3660240a..ab938942d75f 100644
--- a/xen/common/sched/credit2.c
+++ b/xen/common/sched/credit2.c
@@ -537,7 +537,8 @@ struct csched2_private {
unsigned int ratelimit_us; /* Rate limiting for this
scheduler */
unsigned int active_queues; /* Number of active
runqueues */
- struct list_head rql; /* List of
runqueues */
+ struct list_head rql /* List of
runqueues */
+ __aligned(alignof(struct csched2_runqueue_data));
cpumask_t initialized; /* CPUs part of this
scheduler */
struct list_head sdom; /* List of domains (for debug
key) */
but it's obviously not a viable fix. I can't help feeling that the bug
is really in the list macros.
~Andrew
On 15.02.25 00:36, Andrew Cooper wrote:
> This is nasty.
>
> https://gitlab.com/xen-project/people/andyhhp/xen/-/jobs/9137008215
>
> When preprocessed, we get:
>
> diff --git a/xen/common/sched/credit2.c b/xen/common/sched/credit2.c
> index 0a83f237259f..6b8d3660240a 100644
> --- a/xen/common/sched/credit2.c
> +++ b/xen/common/sched/credit2.c
> @@ -958,7 +958,28 @@ cpu_add_to_runqueue(const struct scheduler *ops,
> unsigned int cpu)
> write_lock_irqsave(&prv->lock, flags);
>
> rqd_ins = &prv->rql;
> +
> +#if 0
> list_for_each_entry ( rqd, &prv->rql, rql )
> +#else
> + for ( (rqd) = ({
> + typeof(((typeof(*(rqd)) *)((void*)0))->rql) *__mptr =
> + ((&prv->rql)->next);
> + (typeof(*(rqd)) *)
> + ((char *)__mptr -
> + __builtin_offsetof(typeof(*(rqd)),rql) );
> + });
> + &(rqd)->rql != // <-- problem expression
> + (&prv->rql);
> + (rqd) = ({
> + typeof(((typeof(*(rqd)) *)((void*)0))->rql) *__mptr =
> + ((rqd)->rql.next);
> + (typeof(*(rqd)) *)
> + ((char *)__mptr -
> + __builtin_offsetof(typeof(*(rqd)),rql) );
> + })
> + )
> +#endif
> {
> /* Remember first unused queue index. */
> if ( !rqi_unused && rqd->id > rqi )
>
>
> The alignment of csched2_runqueue_data is 8, while csched2_private is 4.
>
> priv's list_head for rql is at +28 (+0x1c), and list_for_each_entry()
> performs a buggily-typed container_of(), treating a csched2_private as
> if it were csched2_runqueue_data.
No, I don't think so. It just compares the addresses of 2 struct list_head.
1 in csched2_runqueue_data and 1 in csched2_private.
> It functions because it's only an address equality check, but it's also
> why UBSAN objects.
struct list_head should require only 4 byte alignment, so I don't see why
this would trigger UBSAN. Could it be that UBSAN has a bug here?
Juergen
On 15.02.25 12:14, Jürgen Groß wrote:
> On 15.02.25 00:36, Andrew Cooper wrote:
>> This is nasty.
>>
>> https://gitlab.com/xen-project/people/andyhhp/xen/-/jobs/9137008215
>>
>> When preprocessed, we get:
>>
>> diff --git a/xen/common/sched/credit2.c b/xen/common/sched/credit2.c
>> index 0a83f237259f..6b8d3660240a 100644
>> --- a/xen/common/sched/credit2.c
>> +++ b/xen/common/sched/credit2.c
>> @@ -958,7 +958,28 @@ cpu_add_to_runqueue(const struct scheduler *ops,
>> unsigned int cpu)
>> write_lock_irqsave(&prv->lock, flags);
>> rqd_ins = &prv->rql;
>> +
>> +#if 0
>> list_for_each_entry ( rqd, &prv->rql, rql )
>> +#else
>> + for ( (rqd) = ({
>> + typeof(((typeof(*(rqd)) *)((void*)0))->rql) *__mptr =
>> + ((&prv->rql)->next);
>> + (typeof(*(rqd)) *)
>> + ((char *)__mptr -
>> + __builtin_offsetof(typeof(*(rqd)),rql) );
>> + });
>> + &(rqd)->rql != // <-- problem expression
>> + (&prv->rql);
>> + (rqd) = ({
>> + typeof(((typeof(*(rqd)) *)((void*)0))->rql) *__mptr =
>> + ((rqd)->rql.next);
>> + (typeof(*(rqd)) *)
>> + ((char *)__mptr -
>> + __builtin_offsetof(typeof(*(rqd)),rql) );
>> + })
>> + )
>> +#endif
>> {
>> /* Remember first unused queue index. */
>> if ( !rqi_unused && rqd->id > rqi )
>>
>>
>> The alignment of csched2_runqueue_data is 8, while csched2_private is 4.
>>
>> priv's list_head for rql is at +28 (+0x1c), and list_for_each_entry()
>> performs a buggily-typed container_of(), treating a csched2_private as
>> if it were csched2_runqueue_data.
>
> No, I don't think so. It just compares the addresses of 2 struct list_head.
> 1 in csched2_runqueue_data and 1 in csched2_private.
>
>> It functions because it's only an address equality check, but it's also
>> why UBSAN objects.
>
> struct list_head should require only 4 byte alignment, so I don't see why
> this would trigger UBSAN. Could it be that UBSAN has a bug here?
Ah, meanwhile I've got it.
I think we want something like:
#define list_for_each_entry(pos, head, member) \
for ((pos) = list_empty(head) ? NULL : list_entry((head)->next, \
typeof(*(pos)), member); \
pos; \
(pos) = list_is_last((pos)->member) ? NULL : \
list_entry((pos)->member.next, typeof(*(pos)), member))
Juergen
© 2016 - 2025 Red Hat, Inc.