ARM/vgic: Use for_each_set_bit() in vgic-mmio*

[PATCH v3] ARM/vgic: Use for_each_set_bit() in vgic-mmio*

Posted by Andrew Cooper 10 months, 2 weeks ago

These are all loops over a scalar value, and don't need to call general bitop
helpers behind the scenes.

Clamp data to the width of the access in dispatch_mmio_write(), rather than
doing so in every handler.

No functional change.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Stefano Stabellini <sstabellini@kernel.org>
CC: Julien Grall <julien@xen.org>
CC: Volodymyr Babchuk <Volodymyr_Babchuk@epam.com>
CC: Bertrand Marquis <bertrand.marquis@arm.com>
CC: Michal Orzel <michal.orzel@amd.com>

v3:
 * Split out of series.
 * Move clamping to common location.

Bloat-o-meter analysis.  The negative change in each handler also includes the
removal of calls to _find_{first,next}_bit_le(), so the improvement is better
than the summary suggests.

$ ../scripts/bloat-o-meter xen-syms-arm32-{before,after}
add/remove: 0/0 grow/shrink: 1/6 up/down: 24/-156 (-132)
Function                                     old     new   delta
dispatch_mmio_write                          212     236     +24
vgic_mmio_write_spending                     392     368     -24
vgic_mmio_write_senable                      324     300     -24
vgic_mmio_write_cenable                      320     296     -24
vgic_mmio_write_sactive                      204     176     -28
vgic_mmio_write_cpending                     464     436     -28
vgic_mmio_write_cactive                      204     176     -28

$ ../scripts/bloat-o-meter xen-syms-arm64-{before,after}
add/remove: 0/0 grow/shrink: 1/6 up/down: 32/-196 (-164)
Function                                     old     new   delta
dispatch_mmio_write                          164     196     +32
vgic_mmio_write_spending                     320     312      -8
vgic_mmio_write_cpending                     368     356     -12
vgic_mmio_write_sactive                      192     156     -36
vgic_mmio_write_cactive                      192     156     -36
vgic_mmio_write_cenable                      316     268     -48
vgic_mmio_write_senable                      320     264     -56
---
 xen/arch/arm/vgic/vgic-mmio.c | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/xen/arch/arm/vgic/vgic-mmio.c b/xen/arch/arm/vgic/vgic-mmio.c
index 4ad350c21c8b..b4ba34cfc368 100644
--- a/xen/arch/arm/vgic/vgic-mmio.c
+++ b/xen/arch/arm/vgic/vgic-mmio.c
@@ -70,9 +70,8 @@ void vgic_mmio_write_senable(struct vcpu *vcpu,
                              unsigned long val)
 {
     uint32_t intid = VGIC_ADDR_TO_INTID(addr, 1);
-    unsigned int i;
 
-    bitmap_for_each ( i, &val, len * 8 )
+    for_each_set_bit ( i, val )
     {
         struct vgic_irq *irq = vgic_get_irq(vcpu->domain, vcpu, intid + i);
         unsigned long flags;
@@ -115,9 +114,8 @@ void vgic_mmio_write_cenable(struct vcpu *vcpu,
                              unsigned long val)
 {
     uint32_t intid = VGIC_ADDR_TO_INTID(addr, 1);
-    unsigned int i;
 
-    bitmap_for_each ( i, &val, len * 8 )
+    for_each_set_bit ( i, val )
     {
         struct vgic_irq *irq;
         unsigned long flags;
@@ -183,11 +181,10 @@ void vgic_mmio_write_spending(struct vcpu *vcpu,
                               unsigned long val)
 {
     uint32_t intid = VGIC_ADDR_TO_INTID(addr, 1);
-    unsigned int i;
     unsigned long flags;
     irq_desc_t *desc;
 
-    bitmap_for_each ( i, &val, len * 8 )
+    for_each_set_bit ( i, val )
     {
         struct vgic_irq *irq = vgic_get_irq(vcpu->domain, vcpu, intid + i);
 
@@ -231,11 +228,10 @@ void vgic_mmio_write_cpending(struct vcpu *vcpu,
                               unsigned long val)
 {
     uint32_t intid = VGIC_ADDR_TO_INTID(addr, 1);
-    unsigned int i;
     unsigned long flags;
     irq_desc_t *desc;
 
-    bitmap_for_each ( i, &val, len * 8 )
+    for_each_set_bit ( i, val )
     {
         struct vgic_irq *irq = vgic_get_irq(vcpu->domain, vcpu, intid + i);
 
@@ -327,9 +323,8 @@ void vgic_mmio_write_cactive(struct vcpu *vcpu,
                              unsigned long val)
 {
     uint32_t intid = VGIC_ADDR_TO_INTID(addr, 1);
-    unsigned int i;
 
-    bitmap_for_each ( i, &val, len * 8 )
+    for_each_set_bit ( i, val )
     {
         struct vgic_irq *irq = vgic_get_irq(vcpu->domain, vcpu, intid + i);
 
@@ -357,9 +352,8 @@ void vgic_mmio_write_sactive(struct vcpu *vcpu,
                              unsigned long val)
 {
     uint32_t intid = VGIC_ADDR_TO_INTID(addr, 1);
-    unsigned int i;
 
-    bitmap_for_each ( i, &val, len * 8 )
+    for_each_set_bit ( i, val )
     {
         struct vgic_irq *irq = vgic_get_irq(vcpu->domain, vcpu, intid + i);
 
@@ -591,6 +585,10 @@ static int dispatch_mmio_write(struct vcpu *vcpu, mmio_info_t *info,
     if ( !region )
         return 0;
 
+    /* Clamp data to the width of the access. */
+    if ( len < sizeof(data) )
+        data &= (1UL << (len * 8)) - 1;
+
     switch (iodev->iodev_type)
     {
     case IODEV_DIST:
-- 
2.39.5

Re: [PATCH v3] ARM/vgic: Use for_each_set_bit() in vgic-mmio*

Posted by Stefano Stabellini 10 months, 2 weeks ago

On Thu, 27 Mar 2025, Andrew Cooper wrote:
> These are all loops over a scalar value, and don't need to call general bitop
> helpers behind the scenes.
> 
> Clamp data to the width of the access in dispatch_mmio_write(), rather than
> doing so in every handler.
> 
> No functional change.
> 
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>

Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>


> ---
> CC: Stefano Stabellini <sstabellini@kernel.org>
> CC: Julien Grall <julien@xen.org>
> CC: Volodymyr Babchuk <Volodymyr_Babchuk@epam.com>
> CC: Bertrand Marquis <bertrand.marquis@arm.com>
> CC: Michal Orzel <michal.orzel@amd.com>
> 
> v3:
>  * Split out of series.
>  * Move clamping to common location.
> 
> Bloat-o-meter analysis.  The negative change in each handler also includes the
> removal of calls to _find_{first,next}_bit_le(), so the improvement is better
> than the summary suggests.
> 
> $ ../scripts/bloat-o-meter xen-syms-arm32-{before,after}
> add/remove: 0/0 grow/shrink: 1/6 up/down: 24/-156 (-132)
> Function                                     old     new   delta
> dispatch_mmio_write                          212     236     +24
> vgic_mmio_write_spending                     392     368     -24
> vgic_mmio_write_senable                      324     300     -24
> vgic_mmio_write_cenable                      320     296     -24
> vgic_mmio_write_sactive                      204     176     -28
> vgic_mmio_write_cpending                     464     436     -28
> vgic_mmio_write_cactive                      204     176     -28
> 
> $ ../scripts/bloat-o-meter xen-syms-arm64-{before,after}
> add/remove: 0/0 grow/shrink: 1/6 up/down: 32/-196 (-164)
> Function                                     old     new   delta
> dispatch_mmio_write                          164     196     +32
> vgic_mmio_write_spending                     320     312      -8
> vgic_mmio_write_cpending                     368     356     -12
> vgic_mmio_write_sactive                      192     156     -36
> vgic_mmio_write_cactive                      192     156     -36
> vgic_mmio_write_cenable                      316     268     -48
> vgic_mmio_write_senable                      320     264     -56
> ---
>  xen/arch/arm/vgic/vgic-mmio.c | 22 ++++++++++------------
>  1 file changed, 10 insertions(+), 12 deletions(-)
> 
> diff --git a/xen/arch/arm/vgic/vgic-mmio.c b/xen/arch/arm/vgic/vgic-mmio.c
> index 4ad350c21c8b..b4ba34cfc368 100644
> --- a/xen/arch/arm/vgic/vgic-mmio.c
> +++ b/xen/arch/arm/vgic/vgic-mmio.c
> @@ -70,9 +70,8 @@ void vgic_mmio_write_senable(struct vcpu *vcpu,
>                               unsigned long val)
>  {
>      uint32_t intid = VGIC_ADDR_TO_INTID(addr, 1);
> -    unsigned int i;
>  
> -    bitmap_for_each ( i, &val, len * 8 )
> +    for_each_set_bit ( i, val )
>      {
>          struct vgic_irq *irq = vgic_get_irq(vcpu->domain, vcpu, intid + i);
>          unsigned long flags;
> @@ -115,9 +114,8 @@ void vgic_mmio_write_cenable(struct vcpu *vcpu,
>                               unsigned long val)
>  {
>      uint32_t intid = VGIC_ADDR_TO_INTID(addr, 1);
> -    unsigned int i;
>  
> -    bitmap_for_each ( i, &val, len * 8 )
> +    for_each_set_bit ( i, val )
>      {
>          struct vgic_irq *irq;
>          unsigned long flags;
> @@ -183,11 +181,10 @@ void vgic_mmio_write_spending(struct vcpu *vcpu,
>                                unsigned long val)
>  {
>      uint32_t intid = VGIC_ADDR_TO_INTID(addr, 1);
> -    unsigned int i;
>      unsigned long flags;
>      irq_desc_t *desc;
>  
> -    bitmap_for_each ( i, &val, len * 8 )
> +    for_each_set_bit ( i, val )
>      {
>          struct vgic_irq *irq = vgic_get_irq(vcpu->domain, vcpu, intid + i);
>  
> @@ -231,11 +228,10 @@ void vgic_mmio_write_cpending(struct vcpu *vcpu,
>                                unsigned long val)
>  {
>      uint32_t intid = VGIC_ADDR_TO_INTID(addr, 1);
> -    unsigned int i;
>      unsigned long flags;
>      irq_desc_t *desc;
>  
> -    bitmap_for_each ( i, &val, len * 8 )
> +    for_each_set_bit ( i, val )
>      {
>          struct vgic_irq *irq = vgic_get_irq(vcpu->domain, vcpu, intid + i);
>  
> @@ -327,9 +323,8 @@ void vgic_mmio_write_cactive(struct vcpu *vcpu,
>                               unsigned long val)
>  {
>      uint32_t intid = VGIC_ADDR_TO_INTID(addr, 1);
> -    unsigned int i;
>  
> -    bitmap_for_each ( i, &val, len * 8 )
> +    for_each_set_bit ( i, val )
>      {
>          struct vgic_irq *irq = vgic_get_irq(vcpu->domain, vcpu, intid + i);
>  
> @@ -357,9 +352,8 @@ void vgic_mmio_write_sactive(struct vcpu *vcpu,
>                               unsigned long val)
>  {
>      uint32_t intid = VGIC_ADDR_TO_INTID(addr, 1);
> -    unsigned int i;
>  
> -    bitmap_for_each ( i, &val, len * 8 )
> +    for_each_set_bit ( i, val )
>      {
>          struct vgic_irq *irq = vgic_get_irq(vcpu->domain, vcpu, intid + i);
>  
> @@ -591,6 +585,10 @@ static int dispatch_mmio_write(struct vcpu *vcpu, mmio_info_t *info,
>      if ( !region )
>          return 0;
>  
> +    /* Clamp data to the width of the access. */
> +    if ( len < sizeof(data) )
> +        data &= (1UL << (len * 8)) - 1;
> +
>      switch (iodev->iodev_type)
>      {
>      case IODEV_DIST:
> -- 
> 2.39.5
>