These are all loops over a scalar value, and don't need to call general bitop
helpers behind the scenes.
Clamp data to the width of the access in dispatch_mmio_write(), rather than
doing so in every handler.
No functional change.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Stefano Stabellini <sstabellini@kernel.org>
CC: Julien Grall <julien@xen.org>
CC: Volodymyr Babchuk <Volodymyr_Babchuk@epam.com>
CC: Bertrand Marquis <bertrand.marquis@arm.com>
CC: Michal Orzel <michal.orzel@amd.com>
v3:
* Split out of series.
* Move clamping to common location.
Bloat-o-meter analysis. The negative change in each handler also includes the
removal of calls to _find_{first,next}_bit_le(), so the improvement is better
than the summary suggests.
$ ../scripts/bloat-o-meter xen-syms-arm32-{before,after}
add/remove: 0/0 grow/shrink: 1/6 up/down: 24/-156 (-132)
Function old new delta
dispatch_mmio_write 212 236 +24
vgic_mmio_write_spending 392 368 -24
vgic_mmio_write_senable 324 300 -24
vgic_mmio_write_cenable 320 296 -24
vgic_mmio_write_sactive 204 176 -28
vgic_mmio_write_cpending 464 436 -28
vgic_mmio_write_cactive 204 176 -28
$ ../scripts/bloat-o-meter xen-syms-arm64-{before,after}
add/remove: 0/0 grow/shrink: 1/6 up/down: 32/-196 (-164)
Function old new delta
dispatch_mmio_write 164 196 +32
vgic_mmio_write_spending 320 312 -8
vgic_mmio_write_cpending 368 356 -12
vgic_mmio_write_sactive 192 156 -36
vgic_mmio_write_cactive 192 156 -36
vgic_mmio_write_cenable 316 268 -48
vgic_mmio_write_senable 320 264 -56
---
xen/arch/arm/vgic/vgic-mmio.c | 22 ++++++++++------------
1 file changed, 10 insertions(+), 12 deletions(-)
diff --git a/xen/arch/arm/vgic/vgic-mmio.c b/xen/arch/arm/vgic/vgic-mmio.c
index 4ad350c21c8b..b4ba34cfc368 100644
--- a/xen/arch/arm/vgic/vgic-mmio.c
+++ b/xen/arch/arm/vgic/vgic-mmio.c
@@ -70,9 +70,8 @@ void vgic_mmio_write_senable(struct vcpu *vcpu,
unsigned long val)
{
uint32_t intid = VGIC_ADDR_TO_INTID(addr, 1);
- unsigned int i;
- bitmap_for_each ( i, &val, len * 8 )
+ for_each_set_bit ( i, val )
{
struct vgic_irq *irq = vgic_get_irq(vcpu->domain, vcpu, intid + i);
unsigned long flags;
@@ -115,9 +114,8 @@ void vgic_mmio_write_cenable(struct vcpu *vcpu,
unsigned long val)
{
uint32_t intid = VGIC_ADDR_TO_INTID(addr, 1);
- unsigned int i;
- bitmap_for_each ( i, &val, len * 8 )
+ for_each_set_bit ( i, val )
{
struct vgic_irq *irq;
unsigned long flags;
@@ -183,11 +181,10 @@ void vgic_mmio_write_spending(struct vcpu *vcpu,
unsigned long val)
{
uint32_t intid = VGIC_ADDR_TO_INTID(addr, 1);
- unsigned int i;
unsigned long flags;
irq_desc_t *desc;
- bitmap_for_each ( i, &val, len * 8 )
+ for_each_set_bit ( i, val )
{
struct vgic_irq *irq = vgic_get_irq(vcpu->domain, vcpu, intid + i);
@@ -231,11 +228,10 @@ void vgic_mmio_write_cpending(struct vcpu *vcpu,
unsigned long val)
{
uint32_t intid = VGIC_ADDR_TO_INTID(addr, 1);
- unsigned int i;
unsigned long flags;
irq_desc_t *desc;
- bitmap_for_each ( i, &val, len * 8 )
+ for_each_set_bit ( i, val )
{
struct vgic_irq *irq = vgic_get_irq(vcpu->domain, vcpu, intid + i);
@@ -327,9 +323,8 @@ void vgic_mmio_write_cactive(struct vcpu *vcpu,
unsigned long val)
{
uint32_t intid = VGIC_ADDR_TO_INTID(addr, 1);
- unsigned int i;
- bitmap_for_each ( i, &val, len * 8 )
+ for_each_set_bit ( i, val )
{
struct vgic_irq *irq = vgic_get_irq(vcpu->domain, vcpu, intid + i);
@@ -357,9 +352,8 @@ void vgic_mmio_write_sactive(struct vcpu *vcpu,
unsigned long val)
{
uint32_t intid = VGIC_ADDR_TO_INTID(addr, 1);
- unsigned int i;
- bitmap_for_each ( i, &val, len * 8 )
+ for_each_set_bit ( i, val )
{
struct vgic_irq *irq = vgic_get_irq(vcpu->domain, vcpu, intid + i);
@@ -591,6 +585,10 @@ static int dispatch_mmio_write(struct vcpu *vcpu, mmio_info_t *info,
if ( !region )
return 0;
+ /* Clamp data to the width of the access. */
+ if ( len < sizeof(data) )
+ data &= (1UL << (len * 8)) - 1;
+
switch (iodev->iodev_type)
{
case IODEV_DIST:
--
2.39.5
On Thu, 27 Mar 2025, Andrew Cooper wrote:
> These are all loops over a scalar value, and don't need to call general bitop
> helpers behind the scenes.
>
> Clamp data to the width of the access in dispatch_mmio_write(), rather than
> doing so in every handler.
>
> No functional change.
>
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
> ---
> CC: Stefano Stabellini <sstabellini@kernel.org>
> CC: Julien Grall <julien@xen.org>
> CC: Volodymyr Babchuk <Volodymyr_Babchuk@epam.com>
> CC: Bertrand Marquis <bertrand.marquis@arm.com>
> CC: Michal Orzel <michal.orzel@amd.com>
>
> v3:
> * Split out of series.
> * Move clamping to common location.
>
> Bloat-o-meter analysis. The negative change in each handler also includes the
> removal of calls to _find_{first,next}_bit_le(), so the improvement is better
> than the summary suggests.
>
> $ ../scripts/bloat-o-meter xen-syms-arm32-{before,after}
> add/remove: 0/0 grow/shrink: 1/6 up/down: 24/-156 (-132)
> Function old new delta
> dispatch_mmio_write 212 236 +24
> vgic_mmio_write_spending 392 368 -24
> vgic_mmio_write_senable 324 300 -24
> vgic_mmio_write_cenable 320 296 -24
> vgic_mmio_write_sactive 204 176 -28
> vgic_mmio_write_cpending 464 436 -28
> vgic_mmio_write_cactive 204 176 -28
>
> $ ../scripts/bloat-o-meter xen-syms-arm64-{before,after}
> add/remove: 0/0 grow/shrink: 1/6 up/down: 32/-196 (-164)
> Function old new delta
> dispatch_mmio_write 164 196 +32
> vgic_mmio_write_spending 320 312 -8
> vgic_mmio_write_cpending 368 356 -12
> vgic_mmio_write_sactive 192 156 -36
> vgic_mmio_write_cactive 192 156 -36
> vgic_mmio_write_cenable 316 268 -48
> vgic_mmio_write_senable 320 264 -56
> ---
> xen/arch/arm/vgic/vgic-mmio.c | 22 ++++++++++------------
> 1 file changed, 10 insertions(+), 12 deletions(-)
>
> diff --git a/xen/arch/arm/vgic/vgic-mmio.c b/xen/arch/arm/vgic/vgic-mmio.c
> index 4ad350c21c8b..b4ba34cfc368 100644
> --- a/xen/arch/arm/vgic/vgic-mmio.c
> +++ b/xen/arch/arm/vgic/vgic-mmio.c
> @@ -70,9 +70,8 @@ void vgic_mmio_write_senable(struct vcpu *vcpu,
> unsigned long val)
> {
> uint32_t intid = VGIC_ADDR_TO_INTID(addr, 1);
> - unsigned int i;
>
> - bitmap_for_each ( i, &val, len * 8 )
> + for_each_set_bit ( i, val )
> {
> struct vgic_irq *irq = vgic_get_irq(vcpu->domain, vcpu, intid + i);
> unsigned long flags;
> @@ -115,9 +114,8 @@ void vgic_mmio_write_cenable(struct vcpu *vcpu,
> unsigned long val)
> {
> uint32_t intid = VGIC_ADDR_TO_INTID(addr, 1);
> - unsigned int i;
>
> - bitmap_for_each ( i, &val, len * 8 )
> + for_each_set_bit ( i, val )
> {
> struct vgic_irq *irq;
> unsigned long flags;
> @@ -183,11 +181,10 @@ void vgic_mmio_write_spending(struct vcpu *vcpu,
> unsigned long val)
> {
> uint32_t intid = VGIC_ADDR_TO_INTID(addr, 1);
> - unsigned int i;
> unsigned long flags;
> irq_desc_t *desc;
>
> - bitmap_for_each ( i, &val, len * 8 )
> + for_each_set_bit ( i, val )
> {
> struct vgic_irq *irq = vgic_get_irq(vcpu->domain, vcpu, intid + i);
>
> @@ -231,11 +228,10 @@ void vgic_mmio_write_cpending(struct vcpu *vcpu,
> unsigned long val)
> {
> uint32_t intid = VGIC_ADDR_TO_INTID(addr, 1);
> - unsigned int i;
> unsigned long flags;
> irq_desc_t *desc;
>
> - bitmap_for_each ( i, &val, len * 8 )
> + for_each_set_bit ( i, val )
> {
> struct vgic_irq *irq = vgic_get_irq(vcpu->domain, vcpu, intid + i);
>
> @@ -327,9 +323,8 @@ void vgic_mmio_write_cactive(struct vcpu *vcpu,
> unsigned long val)
> {
> uint32_t intid = VGIC_ADDR_TO_INTID(addr, 1);
> - unsigned int i;
>
> - bitmap_for_each ( i, &val, len * 8 )
> + for_each_set_bit ( i, val )
> {
> struct vgic_irq *irq = vgic_get_irq(vcpu->domain, vcpu, intid + i);
>
> @@ -357,9 +352,8 @@ void vgic_mmio_write_sactive(struct vcpu *vcpu,
> unsigned long val)
> {
> uint32_t intid = VGIC_ADDR_TO_INTID(addr, 1);
> - unsigned int i;
>
> - bitmap_for_each ( i, &val, len * 8 )
> + for_each_set_bit ( i, val )
> {
> struct vgic_irq *irq = vgic_get_irq(vcpu->domain, vcpu, intid + i);
>
> @@ -591,6 +585,10 @@ static int dispatch_mmio_write(struct vcpu *vcpu, mmio_info_t *info,
> if ( !region )
> return 0;
>
> + /* Clamp data to the width of the access. */
> + if ( len < sizeof(data) )
> + data &= (1UL << (len * 8)) - 1;
> +
> switch (iodev->iodev_type)
> {
> case IODEV_DIST:
> --
> 2.39.5
>
© 2016 - 2026 Red Hat, Inc.