find_first_set_bit() is a Xen-ism which has undefined behaviour with a 0
input. The latter is well defined with an input of 0, and is a found outside
of Xen too.
_init_heap_pages() is the one special case here, comparing the LSB of two
different addresses. The -1 cancels off both sides of the expression.
No functional change.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Wei Liu <wl@xen.org>
CC: Stefano Stabellini <sstabellini@kernel.org>
CC: Julien Grall <julien@xen.org>
CC: Volodymyr Babchuk <Volodymyr_Babchuk@epam.com>
CC: Bertrand Marquis <bertrand.marquis@arm.com>
CC: Michal Orzel <michal.orzel@amd.com>
CC: Oleksii Kurochko <oleksii.kurochko@gmail.com>
CC: Shawn Anastasio <sanastasio@raptorengineering.com>
CC: consulting@bugseng.com <consulting@bugseng.com>
CC: Simone Ballarin <simone.ballarin@bugseng.com>
CC: Federico Serafini <federico.serafini@bugseng.com>
CC: Nicola Vetrini <nicola.vetrini@bugseng.com>
v2:
* Reorder from later in the series to keep ARM bisectable
In an x86 build, we get the following delta:
add/remove: 0/0 grow/shrink: 2/4 up/down: 39/-52 (-13)
Function old new delta
hpet_write 2183 2206 +23
init_heap_pages 1222 1238 +16
dom0_construct_pvh 3959 3958 -1
mapping_order 139 126 -13
guest_physmap_mark_populate_on_demand 1301 1285 -16
vcpumask_to_pcpumask 525 503 -22
so the optimiser improvements for ffsl() really do speak for themselves.
I'm surprised by the increase in hpet_write(), but looking at the code, it
very clearly wants the same treatment as:
commit 188fa82305e72b725473db9146e20cc9abf7bff3
Author: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Fri Mar 15 11:31:33 2024
xen/vpci: Improve code generation in mask_write()
which I'm confident will end up as a net improvement.
---
xen/arch/x86/guest/xen/xen.c | 4 ++--
xen/arch/x86/hvm/dom0_build.c | 2 +-
xen/arch/x86/hvm/hpet.c | 8 ++++----
xen/arch/x86/include/asm/pt-contig-markers.h | 2 +-
xen/arch/x86/mm.c | 2 +-
xen/arch/x86/mm/p2m-pod.c | 4 ++--
xen/common/page_alloc.c | 2 +-
xen/common/softirq.c | 2 +-
xen/drivers/passthrough/amd/iommu_map.c | 2 +-
xen/drivers/passthrough/iommu.c | 4 ++--
xen/drivers/passthrough/x86/iommu.c | 4 ++--
11 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/xen/arch/x86/guest/xen/xen.c b/xen/arch/x86/guest/xen/xen.c
index d9768cc9527d..7484b3f73ad3 100644
--- a/xen/arch/x86/guest/xen/xen.c
+++ b/xen/arch/x86/guest/xen/xen.c
@@ -168,14 +168,14 @@ static void cf_check xen_evtchn_upcall(void)
while ( pending )
{
- unsigned int l1 = find_first_set_bit(pending);
+ unsigned int l1 = ffsl(pending) - 1;
unsigned long evtchn = xchg(&XEN_shared_info->evtchn_pending[l1], 0);
__clear_bit(l1, &pending);
evtchn &= ~XEN_shared_info->evtchn_mask[l1];
while ( evtchn )
{
- unsigned int port = find_first_set_bit(evtchn);
+ unsigned int port = ffsl(evtchn) - 1;
__clear_bit(port, &evtchn);
port += l1 * BITS_PER_LONG;
diff --git a/xen/arch/x86/hvm/dom0_build.c b/xen/arch/x86/hvm/dom0_build.c
index b0cb96c3bc76..68c08bbe94f7 100644
--- a/xen/arch/x86/hvm/dom0_build.c
+++ b/xen/arch/x86/hvm/dom0_build.c
@@ -139,7 +139,7 @@ static int __init pvh_populate_memory_range(struct domain *d,
order = get_order_from_pages(end - start + 1);
order = min(order ? order - 1 : 0, max_order);
/* The order allocated and populated must be aligned to the address. */
- order = min(order, start ? find_first_set_bit(start) : MAX_ORDER);
+ order = min(order, start ? ffsl(start) - 1 : MAX_ORDER);
page = alloc_domheap_pages(d, order, dom0_memflags | MEMF_no_scrub);
if ( page == NULL )
{
diff --git a/xen/arch/x86/hvm/hpet.c b/xen/arch/x86/hvm/hpet.c
index 12b00b770257..37e765e97df9 100644
--- a/xen/arch/x86/hvm/hpet.c
+++ b/xen/arch/x86/hvm/hpet.c
@@ -335,7 +335,7 @@ static void timer_sanitize_int_route(HPETState *h, unsigned int tn)
* enabled pick the first irq.
*/
timer_config(h, tn) |=
- MASK_INSR(find_first_set_bit(timer_int_route_cap(h, tn)),
+ MASK_INSR(ffsl(timer_int_route_cap(h, tn)) - 1,
HPET_TN_ROUTE);
}
@@ -409,7 +409,7 @@ static int cf_check hpet_write(
{
bool active;
- i = find_first_set_bit(new_val);
+ i = ffsl(new_val) - 1;
if ( i >= HPET_TIMER_NUM )
break;
__clear_bit(i, &new_val);
@@ -535,14 +535,14 @@ static int cf_check hpet_write(
/* stop/start timers whos state was changed by this write. */
while (stop_timers)
{
- i = find_first_set_bit(stop_timers);
+ i = ffsl(stop_timers) - 1;
__clear_bit(i, &stop_timers);
hpet_stop_timer(h, i, guest_time);
}
while (start_timers)
{
- i = find_first_set_bit(start_timers);
+ i = ffsl(start_timers) - 1;
__clear_bit(i, &start_timers);
hpet_set_timer(h, i, guest_time);
}
diff --git a/xen/arch/x86/include/asm/pt-contig-markers.h b/xen/arch/x86/include/asm/pt-contig-markers.h
index b3c1fe803534..e8c8157d605f 100644
--- a/xen/arch/x86/include/asm/pt-contig-markers.h
+++ b/xen/arch/x86/include/asm/pt-contig-markers.h
@@ -60,7 +60,7 @@ static bool pt_update_contig_markers(uint64_t *pt, unsigned int idx,
/* Step 1: Reduce markers in lower numbered entries. */
while ( i )
{
- b = find_first_set_bit(i);
+ b = ffsl(i) - 1;
i &= ~(1U << b);
if ( GET_MARKER(pt[i]) <= b )
break;
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index d968bbbc7315..2a84bdae670b 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -3424,7 +3424,7 @@ static int vcpumask_to_pcpumask(
{
unsigned int cpu;
- vcpu_id = find_first_set_bit(vmask);
+ vcpu_id = ffsl(vmask) - 1;
vmask &= ~(1UL << vcpu_id);
vcpu_id += vcpu_bias;
if ( (vcpu_id >= d->max_vcpus) )
diff --git a/xen/arch/x86/mm/p2m-pod.c b/xen/arch/x86/mm/p2m-pod.c
index 1c727c78c833..bd84fe9e27ee 100644
--- a/xen/arch/x86/mm/p2m-pod.c
+++ b/xen/arch/x86/mm/p2m-pod.c
@@ -684,7 +684,7 @@ unsigned long
p2m_pod_decrease_reservation(struct domain *d, gfn_t gfn, unsigned int order)
{
unsigned long left = 1UL << order, ret = 0;
- unsigned int chunk_order = find_first_set_bit(gfn_x(gfn) | left);
+ unsigned int chunk_order = ffsl(gfn_x(gfn) | left) - 1;
do {
ret += decrease_reservation(d, gfn, chunk_order);
@@ -1393,7 +1393,7 @@ guest_physmap_mark_populate_on_demand(struct domain *d, unsigned long gfn,
unsigned int order)
{
unsigned long left = 1UL << order;
- unsigned int chunk_order = find_first_set_bit(gfn | left);
+ unsigned int chunk_order = ffsl(gfn | left) - 1;
int rc;
if ( !paging_mode_translate(d) )
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 8d3342e95236..054b7edb3989 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -1819,7 +1819,7 @@ static void _init_heap_pages(const struct page_info *pg,
if ( unlikely(!avail[nid]) )
{
bool use_tail = IS_ALIGNED(s, 1UL << MAX_ORDER) &&
- (find_first_set_bit(e) <= find_first_set_bit(s));
+ (ffsl(e) <= ffsl(s));
unsigned long n;
n = init_node_heap(nid, s, nr_pages, &use_tail);
diff --git a/xen/common/softirq.c b/xen/common/softirq.c
index 321d26902d37..bee4a82009c3 100644
--- a/xen/common/softirq.c
+++ b/xen/common/softirq.c
@@ -48,7 +48,7 @@ static void __do_softirq(unsigned long ignore_mask)
|| cpu_is_offline(cpu) )
break;
- i = find_first_set_bit(pending);
+ i = ffsl(pending) - 1;
clear_bit(i, &softirq_pending(cpu));
(*softirq_handlers[i])();
}
diff --git a/xen/drivers/passthrough/amd/iommu_map.c b/xen/drivers/passthrough/amd/iommu_map.c
index e0f4fe736a8d..f1061bfc798c 100644
--- a/xen/drivers/passthrough/amd/iommu_map.c
+++ b/xen/drivers/passthrough/amd/iommu_map.c
@@ -137,7 +137,7 @@ static void set_iommu_ptes_present(unsigned long pt_mfn,
ASSERT(!pde->u);
if ( pde > table )
- ASSERT(pde->ign0 == find_first_set_bit(pde - table));
+ ASSERT(pde->ign0 == ffsl(pde - table) - 1);
else
ASSERT(pde->ign0 == CONTIG_LEVEL_SHIFT);
diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
index ba18136c461c..50bfd62553ae 100644
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -305,7 +305,7 @@ static unsigned int mapping_order(const struct domain_iommu *hd,
{
unsigned long res = dfn_x(dfn) | mfn_x(mfn);
unsigned long sizes = hd->platform_ops->page_sizes;
- unsigned int bit = find_first_set_bit(sizes), order = 0;
+ unsigned int bit = ffsl(sizes) - 1, order = 0;
ASSERT(bit == PAGE_SHIFT);
@@ -313,7 +313,7 @@ static unsigned int mapping_order(const struct domain_iommu *hd,
{
unsigned long mask;
- bit = find_first_set_bit(sizes);
+ bit = ffsl(sizes) - 1;
mask = (1UL << bit) - 1;
if ( nr <= mask || (res & mask) )
break;
diff --git a/xen/drivers/passthrough/x86/iommu.c b/xen/drivers/passthrough/x86/iommu.c
index 304a2f5480c7..cc0062b02712 100644
--- a/xen/drivers/passthrough/x86/iommu.c
+++ b/xen/drivers/passthrough/x86/iommu.c
@@ -641,7 +641,7 @@ struct page_info *iommu_alloc_pgtable(struct domain_iommu *hd,
if ( contig_mask )
{
/* See pt-contig-markers.h for a description of the marker scheme. */
- unsigned int i, shift = find_first_set_bit(contig_mask);
+ unsigned int i, shift = ffsl(contig_mask) - 1;
ASSERT((CONTIG_LEVEL_SHIFT & (contig_mask >> shift)) == CONTIG_LEVEL_SHIFT);
@@ -652,7 +652,7 @@ struct page_info *iommu_alloc_pgtable(struct domain_iommu *hd,
for ( i = 4; i < PAGE_SIZE / sizeof(*p); i += 4 )
{
- p[i + 0] = (find_first_set_bit(i) + 0ULL) << shift;
+ p[i + 0] = (ffsl(i) - 1ULL) << shift;
p[i + 1] = 0;
p[i + 2] = 1ULL << shift;
p[i + 3] = 0;
--
2.30.2
On 24.05.2024 22:03, Andrew Cooper wrote: > --- a/xen/arch/x86/hvm/hpet.c > +++ b/xen/arch/x86/hvm/hpet.c > @@ -335,7 +335,7 @@ static void timer_sanitize_int_route(HPETState *h, unsigned int tn) > * enabled pick the first irq. > */ > timer_config(h, tn) |= > - MASK_INSR(find_first_set_bit(timer_int_route_cap(h, tn)), > + MASK_INSR(ffsl(timer_int_route_cap(h, tn)) - 1, > HPET_TN_ROUTE); > } This can be just ffs(). > @@ -409,7 +409,7 @@ static int cf_check hpet_write( > { > bool active; > > - i = find_first_set_bit(new_val); > + i = ffsl(new_val) - 1; > if ( i >= HPET_TIMER_NUM ) > break; This in principle can be, too, but would require a little further care. > @@ -535,14 +535,14 @@ static int cf_check hpet_write( > /* stop/start timers whos state was changed by this write. */ > while (stop_timers) > { > - i = find_first_set_bit(stop_timers); > + i = ffsl(stop_timers) - 1; > __clear_bit(i, &stop_timers); > hpet_stop_timer(h, i, guest_time); > } > > while (start_timers) > { > - i = find_first_set_bit(start_timers); > + i = ffsl(start_timers) - 1; > __clear_bit(i, &start_timers); > hpet_set_timer(h, i, guest_time); > } Same here; in fact {start,stop}_timers are needlessly unsigned long in the first place. > --- a/xen/arch/x86/include/asm/pt-contig-markers.h > +++ b/xen/arch/x86/include/asm/pt-contig-markers.h > @@ -60,7 +60,7 @@ static bool pt_update_contig_markers(uint64_t *pt, unsigned int idx, > /* Step 1: Reduce markers in lower numbered entries. */ > while ( i ) > { > - b = find_first_set_bit(i); > + b = ffsl(i) - 1; > i &= ~(1U << b); Considering i's type and the immediately following expression, this again can easily be just ffs(). > --- a/xen/drivers/passthrough/amd/iommu_map.c > +++ b/xen/drivers/passthrough/amd/iommu_map.c > @@ -137,7 +137,7 @@ static void set_iommu_ptes_present(unsigned long pt_mfn, > ASSERT(!pde->u); > > if ( pde > table ) > - ASSERT(pde->ign0 == find_first_set_bit(pde - table)); > + ASSERT(pde->ign0 == ffsl(pde - table) - 1); pde pointing into the page starting at table, this can be ffs(), too. Preferably with at least the easy adjustments done: Reviewed-by: Jan Beulich <jbeulich@suse.com> Jan
© 2016 - 2024 Red Hat, Inc.