[PATCH] ipmi: Close the race between __scan_channels() and deliver_response()

Jinhui Guo posted 1 patch 2 days, 13 hours ago
There is a newer version of this series
drivers/char/ipmi/ipmi_msghandler.c | 14 ++++----------
1 file changed, 4 insertions(+), 10 deletions(-)
[PATCH] ipmi: Close the race between __scan_channels() and deliver_response()
Posted by Jinhui Guo 2 days, 13 hours ago
The command "ipmi -b -t" would occasionally fail:
  #ipmitool -b 6 -t 0x2c raw 0x6 0x01
  Unable to send command: Invalid argument
  Unable to send RAW command (channel=0x6 netfn=0x6 lun=0x0 cmd=0x1)

The race window between __scan_channels() and deliver_response() causes
the parameters of some channels to be set to 0.

1.[CPUA] After ipmi_add_smi() calling __bmc_get_device_id() ->
         __scan_channels(), the intf->channels_ready is set to true and
	 is never cleared by any function. ipmi_add_smi() then invokes
	 __scan_channels(), which issues an IPMI request and waits with
	 wait_event() until all channels have been scanned. wait_event()
         internally calls might_sleep(), which might yield the CPU.
         (wait_event() could also be interrupted by an interrupt, causing
	 the task to yield the CPU.)
2.[CPUB] deliver_response() is invoked when the CPU receives the IPMI
         response. After processing a IPMI response, deliver_response()
         directly assigns intf->wchannels to intf->channel_list and sets
	 intf->channels_ready to true. However, not all channels are actually
	 ready for use.
3.[CPUA] Since intf->channels_ready is already true, wait_event() never
         enters __wait_event(). __scan_channels() immediately clears
	 intf->null_user_handler and exits.
4.[CPUB] Once intf->null_user_handler is set to NULL, deliver_response()
         ignores further IPMI responses, leaving the remaining channels
	 zero-initialized and unusable.

CPUA                             CPUB
-------------------------------  -----------------------------
ipmi_add_smi()
 __scan_channels()
  intf->null_user_handler
        = channel_handler;
  send_channel_info_cmd(intf,
        0);
  wait_event(intf->waitq,
	intf->channels_ready);
   do {
    might_sleep();
                                 deliver_response()
                                  channel_handler()
                                   intf->channel_list =
				         intf->wchannels + set;
                                   intf->channels_ready = true;
                                   send_channel_info_cmd(intf,
				          intf->curr_channel);
    if (condition)
     break;
    __wait_event(wq_head,
	    condition);
   } while(0)
  intf->null_user_handler
        = NULL;
                                 deliver_response()
                                  if (!msg->user)
                                   if (intf->null_user_handler)
                                    rv = -EINVAL;
                                  return rv;
-------------------------------  -----------------------------

Fix the race between __scan_channels() and deliver_response() with the
following changes.

1. Drop the redundant __scan_channels() call in ipmi_add_smi(), the
   function is already invoked via ipmi_add_smi() -> __bmc_get_device_id()
   -> __scan_channels().
2. channel_handler() sets intf->channels_ready to true but no one clears
   it, preventing __scan_channels() from rescanning channels. Clear
   intf->channels_ready to false in channel_handler() before starting
   the channel scan.
3. Only assign intf->channel_list = intf->wchannels and set
   intf->channels_ready = true in channel_handler() after all channels
   have been successfully scanned or after failing to send the IPMI
   request.

Signed-off-by: Jinhui Guo <guojinhui.liam@bytedance.com>
---
 drivers/char/ipmi/ipmi_msghandler.c | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index 8e9050f99e9e..73dab3b21221 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -3405,11 +3405,8 @@ channel_handler(struct ipmi_smi *intf, struct ipmi_recv_msg *msg)
 			intf->channel_list = intf->wchannels + set;
 			intf->channels_ready = true;
 			wake_up(&intf->waitq);
-		} else {
-			intf->channel_list = intf->wchannels + set;
-			intf->channels_ready = true;
+		} else
 			rv = send_channel_info_cmd(intf, intf->curr_channel);
-		}
 
 		if (rv) {
 			/* Got an error somehow, just give up. */
@@ -3433,6 +3430,9 @@ static int __scan_channels(struct ipmi_smi *intf, struct ipmi_device_id *id)
 {
 	int rv;
 
+	/* Clear channels_ready to force channels rescan. */
+	intf->channels_ready = false;
+
 	if (ipmi_version_major(id) > 1
 			|| (ipmi_version_major(id) == 1
 			    && ipmi_version_minor(id) >= 5)) {
@@ -3633,12 +3633,6 @@ int ipmi_add_smi(struct module         *owner,
 		goto out_err_started;
 	}
 
-	mutex_lock(&intf->bmc_reg_mutex);
-	rv = __scan_channels(intf, &id);
-	mutex_unlock(&intf->bmc_reg_mutex);
-	if (rv)
-		goto out_err_bmc_reg;
-
 	intf->nr_users_devattr = dev_attr_nr_users;
 	sysfs_attr_init(&intf->nr_users_devattr.attr);
 	rv = device_create_file(intf->si_dev, &intf->nr_users_devattr);
-- 
2.20.1
Re: [PATCH] ipmi: Close the race between __scan_channels() and deliver_response()
Posted by Corey Minyard 2 days, 3 hours ago
On Mon, Sep 29, 2025 at 04:16:02PM +0800, Jinhui Guo wrote:
> The command "ipmi -b -t" would occasionally fail:
>   #ipmitool -b 6 -t 0x2c raw 0x6 0x01
>   Unable to send command: Invalid argument
>   Unable to send RAW command (channel=0x6 netfn=0x6 lun=0x0 cmd=0x1)
> 
> The race window between __scan_channels() and deliver_response() causes
> the parameters of some channels to be set to 0.
> 
> 1.[CPUA] After ipmi_add_smi() calling __bmc_get_device_id() ->
>          __scan_channels(), the intf->channels_ready is set to true and
> 	 is never cleared by any function. ipmi_add_smi() then invokes
> 	 __scan_channels(), which issues an IPMI request and waits with
> 	 wait_event() until all channels have been scanned. wait_event()
>          internally calls might_sleep(), which might yield the CPU.
>          (wait_event() could also be interrupted by an interrupt, causing
> 	 the task to yield the CPU.)
> 2.[CPUB] deliver_response() is invoked when the CPU receives the IPMI
>          response. After processing a IPMI response, deliver_response()
>          directly assigns intf->wchannels to intf->channel_list and sets
> 	 intf->channels_ready to true. However, not all channels are actually
> 	 ready for use.
> 3.[CPUA] Since intf->channels_ready is already true, wait_event() never
>          enters __wait_event(). __scan_channels() immediately clears
> 	 intf->null_user_handler and exits.
> 4.[CPUB] Once intf->null_user_handler is set to NULL, deliver_response()
>          ignores further IPMI responses, leaving the remaining channels
> 	 zero-initialized and unusable.
> 
> CPUA                             CPUB
> -------------------------------  -----------------------------
> ipmi_add_smi()
>  __scan_channels()
>   intf->null_user_handler
>         = channel_handler;
>   send_channel_info_cmd(intf,
>         0);
>   wait_event(intf->waitq,
> 	intf->channels_ready);
>    do {
>     might_sleep();
>                                  deliver_response()
>                                   channel_handler()
>                                    intf->channel_list =
> 				         intf->wchannels + set;
>                                    intf->channels_ready = true;
>                                    send_channel_info_cmd(intf,
> 				          intf->curr_channel);
>     if (condition)
>      break;
>     __wait_event(wq_head,
> 	    condition);
>    } while(0)
>   intf->null_user_handler
>         = NULL;
>                                  deliver_response()
>                                   if (!msg->user)
>                                    if (intf->null_user_handler)
>                                     rv = -EINVAL;
>                                   return rv;
> -------------------------------  -----------------------------
> 
> Fix the race between __scan_channels() and deliver_response() with the
> following changes.
> 
> 1. Drop the redundant __scan_channels() call in ipmi_add_smi(), the
>    function is already invoked via ipmi_add_smi() -> __bmc_get_device_id()
>    -> __scan_channels().

It's only called conditionally in __bmc_get_device_id(), what if it's
not called there?

> 2. channel_handler() sets intf->channels_ready to true but no one clears
>    it, preventing __scan_channels() from rescanning channels. Clear
>    intf->channels_ready to false in channel_handler() before starting
>    the channel scan.

This is mostly the intent.  The channels shouldn't change unless the BMC software changes.

The only exception would be after the comment:
    /* Version info changes, scan the channels again. */
    __scan_channels(intf, &bmc->fetch_id);

In that case, since the BMC has changed, you want to rescan the channels.
So you would want to reset that flag there.

> 3. Only assign intf->channel_list = intf->wchannels and set
>    intf->channels_ready = true in channel_handler() after all channels
>    have been successfully scanned or after failing to send the IPMI
>    request.

Yeah, that's a bug.

Normal style is to leave the {} in if any part of the if statement has
braces.

Also, the above three are independent problems; they should be separate
patches.

Thanks,

-corey

> 
> Signed-off-by: Jinhui Guo <guojinhui.liam@bytedance.com>
> ---
>  drivers/char/ipmi/ipmi_msghandler.c | 14 ++++----------
>  1 file changed, 4 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
> index 8e9050f99e9e..73dab3b21221 100644
> --- a/drivers/char/ipmi/ipmi_msghandler.c
> +++ b/drivers/char/ipmi/ipmi_msghandler.c
> @@ -3405,11 +3405,8 @@ channel_handler(struct ipmi_smi *intf, struct ipmi_recv_msg *msg)
>  			intf->channel_list = intf->wchannels + set;
>  			intf->channels_ready = true;
>  			wake_up(&intf->waitq);
> -		} else {
> -			intf->channel_list = intf->wchannels + set;
> -			intf->channels_ready = true;
> +		} else
>  			rv = send_channel_info_cmd(intf, intf->curr_channel);
> -		}
>  
>  		if (rv) {
>  			/* Got an error somehow, just give up. */
> @@ -3433,6 +3430,9 @@ static int __scan_channels(struct ipmi_smi *intf, struct ipmi_device_id *id)
>  {
>  	int rv;
>  
> +	/* Clear channels_ready to force channels rescan. */
> +	intf->channels_ready = false;
> +
>  	if (ipmi_version_major(id) > 1
>  			|| (ipmi_version_major(id) == 1
>  			    && ipmi_version_minor(id) >= 5)) {
> @@ -3633,12 +3633,6 @@ int ipmi_add_smi(struct module         *owner,
>  		goto out_err_started;
>  	}
>  
> -	mutex_lock(&intf->bmc_reg_mutex);
> -	rv = __scan_channels(intf, &id);
> -	mutex_unlock(&intf->bmc_reg_mutex);
> -	if (rv)
> -		goto out_err_bmc_reg;
> -
>  	intf->nr_users_devattr = dev_attr_nr_users;
>  	sysfs_attr_init(&intf->nr_users_devattr.attr);
>  	rv = device_create_file(intf->si_dev, &intf->nr_users_devattr);
> -- 
> 2.20.1
>