From: Yevgeny Kliteynik <kliteyn@nvidia.com>
Matcher size is dynamic: it starts at initial size, and then it grows
through rehash as more and more rules are added to this matcher.
When rules are deleted, matcher's size is not decreased. Rehash
approach is greedy. The idea is: if the matcher got to a certain size
at some point, chances are - it will get to this size again, so it is
better to avoid costly rehash operations whenever possible.
However, when all the rules of the matcher are deleted, this should
be viewed as special case. If the matcher actually got to the point
where it has zero rules, it might be an indication that some usecase
from the past is no longer happening. This is where some ICM can be
freed.
This patch handles this case: when a number of rules in a matcher
goes down to zero, the matcher's tables are shrunk to the initial
size.
Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
Reviewed-by: Vlad Dogaru <vdogaru@nvidia.com>
Signed-off-by: Mark Bloch <mbloch@nvidia.com>
---
.../mellanox/mlx5/core/steering/hws/bwc.c | 68 ++++++++++++++++++-
1 file changed, 67 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c
index 0a7903cf75e8..b7098c7d2112 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c
@@ -3,6 +3,8 @@
#include "internal.h"
+static int hws_bwc_matcher_move(struct mlx5hws_bwc_matcher *bwc_matcher);
+
static u16 hws_bwc_gen_queue_idx(struct mlx5hws_context *ctx)
{
/* assign random queue */
@@ -409,6 +411,70 @@ static void hws_bwc_rule_cnt_dec(struct mlx5hws_bwc_rule *bwc_rule)
atomic_dec(&bwc_matcher->tx_size.num_of_rules);
}
+static int
+hws_bwc_matcher_rehash_shrink(struct mlx5hws_bwc_matcher *bwc_matcher)
+{
+ struct mlx5hws_bwc_matcher_size *rx_size = &bwc_matcher->rx_size;
+ struct mlx5hws_bwc_matcher_size *tx_size = &bwc_matcher->tx_size;
+
+ /* It is possible that another thread has added a rule.
+ * Need to check again if we really need rehash/shrink.
+ */
+ if (atomic_read(&rx_size->num_of_rules) ||
+ atomic_read(&tx_size->num_of_rules))
+ return 0;
+
+ /* If the current matcher RX/TX size is already at its initial size. */
+ if (rx_size->size_log == MLX5HWS_BWC_MATCHER_INIT_SIZE_LOG &&
+ tx_size->size_log == MLX5HWS_BWC_MATCHER_INIT_SIZE_LOG)
+ return 0;
+
+ /* Now we've done all the checking - do the shrinking:
+ * - reset match RTC size to the initial size
+ * - create new matcher
+ * - move the rules, which will not do anything as the matcher is empty
+ * - destroy the old matcher
+ */
+
+ rx_size->size_log = MLX5HWS_BWC_MATCHER_INIT_SIZE_LOG;
+ tx_size->size_log = MLX5HWS_BWC_MATCHER_INIT_SIZE_LOG;
+
+ return hws_bwc_matcher_move(bwc_matcher);
+}
+
+static int hws_bwc_rule_cnt_dec_with_shrink(struct mlx5hws_bwc_rule *bwc_rule,
+ u16 bwc_queue_idx)
+{
+ struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher;
+ struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx;
+ struct mutex *queue_lock; /* Protect the queue */
+ int ret;
+
+ hws_bwc_rule_cnt_dec(bwc_rule);
+
+ if (atomic_read(&bwc_matcher->rx_size.num_of_rules) ||
+ atomic_read(&bwc_matcher->tx_size.num_of_rules))
+ return 0;
+
+ /* Matcher has no more rules - shrink it to save ICM. */
+
+ queue_lock = hws_bwc_get_queue_lock(ctx, bwc_queue_idx);
+ mutex_unlock(queue_lock);
+
+ hws_bwc_lock_all_queues(ctx);
+ ret = hws_bwc_matcher_rehash_shrink(bwc_matcher);
+ hws_bwc_unlock_all_queues(ctx);
+
+ mutex_lock(queue_lock);
+
+ if (unlikely(ret))
+ mlx5hws_err(ctx,
+ "BWC rule deletion: shrinking empty matcher failed (%d)\n",
+ ret);
+
+ return ret;
+}
+
int mlx5hws_bwc_rule_destroy_simple(struct mlx5hws_bwc_rule *bwc_rule)
{
struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher;
@@ -425,8 +491,8 @@ int mlx5hws_bwc_rule_destroy_simple(struct mlx5hws_bwc_rule *bwc_rule)
mutex_lock(queue_lock);
ret = hws_bwc_rule_destroy_hws_sync(bwc_rule, &attr);
- hws_bwc_rule_cnt_dec(bwc_rule);
hws_bwc_rule_list_remove(bwc_rule);
+ hws_bwc_rule_cnt_dec_with_shrink(bwc_rule, idx);
mutex_unlock(queue_lock);
--
2.34.1
On Sun, 22 Jun 2025 20:22:25 +0300 Mark Bloch wrote: > diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c > index 0a7903cf75e8..b7098c7d2112 100644 > --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c > +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c > @@ -3,6 +3,8 @@ > > #include "internal.h" > > +static int hws_bwc_matcher_move(struct mlx5hws_bwc_matcher *bwc_matcher); Is there a circular dependency? Normally we recommend that people reorder code rather that add forward declarations. > static u16 hws_bwc_gen_queue_idx(struct mlx5hws_context *ctx) > { > /* assign random queue */ > @@ -409,6 +411,70 @@ static void hws_bwc_rule_cnt_dec(struct mlx5hws_bwc_rule *bwc_rule) > atomic_dec(&bwc_matcher->tx_size.num_of_rules); > } > > +static int > +hws_bwc_matcher_rehash_shrink(struct mlx5hws_bwc_matcher *bwc_matcher) > +{ > + struct mlx5hws_bwc_matcher_size *rx_size = &bwc_matcher->rx_size; > + struct mlx5hws_bwc_matcher_size *tx_size = &bwc_matcher->tx_size; > + > + /* It is possible that another thread has added a rule. > + * Need to check again if we really need rehash/shrink. > + */ > + if (atomic_read(&rx_size->num_of_rules) || > + atomic_read(&tx_size->num_of_rules)) > + return 0; > + > + /* If the current matcher RX/TX size is already at its initial size. */ > + if (rx_size->size_log == MLX5HWS_BWC_MATCHER_INIT_SIZE_LOG && > + tx_size->size_log == MLX5HWS_BWC_MATCHER_INIT_SIZE_LOG) > + return 0; > + > + /* Now we've done all the checking - do the shrinking: > + * - reset match RTC size to the initial size > + * - create new matcher > + * - move the rules, which will not do anything as the matcher is empty > + * - destroy the old matcher > + */ > + > + rx_size->size_log = MLX5HWS_BWC_MATCHER_INIT_SIZE_LOG; > + tx_size->size_log = MLX5HWS_BWC_MATCHER_INIT_SIZE_LOG; > + > + return hws_bwc_matcher_move(bwc_matcher); > +} > + > +static int hws_bwc_rule_cnt_dec_with_shrink(struct mlx5hws_bwc_rule *bwc_rule, > + u16 bwc_queue_idx) > +{ > + struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; > + struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; > + struct mutex *queue_lock; /* Protect the queue */ > + int ret; > + > + hws_bwc_rule_cnt_dec(bwc_rule); > + > + if (atomic_read(&bwc_matcher->rx_size.num_of_rules) || > + atomic_read(&bwc_matcher->tx_size.num_of_rules)) > + return 0; > + > + /* Matcher has no more rules - shrink it to save ICM. */ > + > + queue_lock = hws_bwc_get_queue_lock(ctx, bwc_queue_idx); > + mutex_unlock(queue_lock); > + > + hws_bwc_lock_all_queues(ctx); > + ret = hws_bwc_matcher_rehash_shrink(bwc_matcher); > + hws_bwc_unlock_all_queues(ctx); > + > + mutex_lock(queue_lock); Dropping and re-taking caller-held locks is a bad code smell. Please refactor - presumably you want some portion of the condition to be under the lock with the dec? return true / false based on that. let the caller drop the lock and do the shrink if true was returned (directly or with another helper) > + if (unlikely(ret)) > + mlx5hws_err(ctx, > + "BWC rule deletion: shrinking empty matcher failed (%d)\n", > + ret); > + > + return ret; > +} > + > int mlx5hws_bwc_rule_destroy_simple(struct mlx5hws_bwc_rule *bwc_rule) > { > struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; > @@ -425,8 +491,8 @@ int mlx5hws_bwc_rule_destroy_simple(struct mlx5hws_bwc_rule *bwc_rule) > mutex_lock(queue_lock); > > ret = hws_bwc_rule_destroy_hws_sync(bwc_rule, &attr); > - hws_bwc_rule_cnt_dec(bwc_rule); > hws_bwc_rule_list_remove(bwc_rule); > + hws_bwc_rule_cnt_dec_with_shrink(bwc_rule, idx); > > mutex_unlock(queue_lock);
On 25-Jun-25 03:08, Jakub Kicinski wrote: > On Sun, 22 Jun 2025 20:22:25 +0300 Mark Bloch wrote: >> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c >> index 0a7903cf75e8..b7098c7d2112 100644 >> --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c >> +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c >> @@ -3,6 +3,8 @@ >> >> #include "internal.h" >> >> +static int hws_bwc_matcher_move(struct mlx5hws_bwc_matcher *bwc_matcher); > > Is there a circular dependency? Normally we recommend that people > reorder code rather that add forward declarations. Sure, I can rearrange the code. It would, however, mean moving a lot of code... I think I'll do it in a separate refactoring patch before this functional one. >> +static int hws_bwc_rule_cnt_dec_with_shrink(struct mlx5hws_bwc_rule *bwc_rule, >> + u16 bwc_queue_idx) >> +{ >> + struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; >> + struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; >> + struct mutex *queue_lock; /* Protect the queue */ >> + int ret; >> + >> + hws_bwc_rule_cnt_dec(bwc_rule); >> + >> + if (atomic_read(&bwc_matcher->rx_size.num_of_rules) || >> + atomic_read(&bwc_matcher->tx_size.num_of_rules)) >> + return 0; >> + >> + /* Matcher has no more rules - shrink it to save ICM. */ >> + >> + queue_lock = hws_bwc_get_queue_lock(ctx, bwc_queue_idx); >> + mutex_unlock(queue_lock); >> + >> + hws_bwc_lock_all_queues(ctx); >> + ret = hws_bwc_matcher_rehash_shrink(bwc_matcher); >> + hws_bwc_unlock_all_queues(ctx); >> + >> + mutex_lock(queue_lock); > > Dropping and re-taking caller-held locks is a bad code smell. > Please refactor - presumably you want some portion of the condition > to be under the lock with the dec? return true / false based on that. > let the caller drop the lock and do the shrink if true was returned > (directly or with another helper) There are multiple queues that can function in parallel. Each rule selects a random queue and immediately locks it. All the further processing of this rule is done when this lock is held. Sometimes there is need to do operation that requires full ownership of the matcher. That is, this rule has to be the only rule that is being processed. In such case, all the locks should be acquired, which means that we're facing the 'dining philosophers' scenario. All the locks should be acquired in the same order: the lock is freed, and then all the locks are acquired in an orderly manner. To have all this logic in the same function that acquires the first lock would mean really complicating the code and breaking the simple logical flow of the functions. Thanks for the review!
© 2016 - 2025 Red Hat, Inc.