[PATCH 1/3] bus: mhi: host: keep bhi buffer through suspend cycle

Muhammad Usama Anjum posted 3 patches 3 months, 1 week ago
There is a newer version of this series
[PATCH 1/3] bus: mhi: host: keep bhi buffer through suspend cycle
Posted by Muhammad Usama Anjum 3 months, 1 week ago
When there is memory pressure, at resume time dma_alloc_coherent()
returns error which in turn fails the loading of firmware and hence
the driver crashes:

kernel: kworker/u33:5: page allocation failure: order:7,
mode:0xc04(GFP_NOIO|GFP_DMA32), nodemask=(null),cpuset=/,mems_allowed=0
kernel: CPU: 1 UID: 0 PID: 7693 Comm: kworker/u33:5 Not tainted
6.11.11-valve17-1-neptune-611-g027868a0ac03 #1
3843143b92e9da0fa2d3d5f21f51beaed15c7d59
kernel: Hardware name: Valve Galileo/Galileo, BIOS F7G0112 08/01/2024
kernel: Workqueue: mhi_hiprio_wq mhi_pm_st_worker [mhi]
kernel: Call Trace:
kernel:  <TASK>
kernel:  dump_stack_lvl+0x4e/0x70
kernel:  warn_alloc+0x164/0x190
kernel:  ? srso_return_thunk+0x5/0x5f
kernel:  ? __alloc_pages_direct_compact+0xaf/0x360
kernel:  __alloc_pages_slowpath.constprop.0+0xc75/0xd70
kernel:  __alloc_pages_noprof+0x321/0x350
kernel:  __dma_direct_alloc_pages.isra.0+0x14a/0x290
kernel:  dma_direct_alloc+0x70/0x270
kernel:  mhi_fw_load_handler+0x126/0x340 [mhi
a96cb91daba500cc77f86bad60c1f332dc3babdf]
kernel:  mhi_pm_st_worker+0x5e8/0xac0 [mhi
a96cb91daba500cc77f86bad60c1f332dc3babdf]
kernel:  ? srso_return_thunk+0x5/0x5f
kernel:  process_one_work+0x17e/0x330
kernel:  worker_thread+0x2ce/0x3f0
kernel:  ? __pfx_worker_thread+0x10/0x10
kernel:  kthread+0xd2/0x100
kernel:  ? __pfx_kthread+0x10/0x10
kernel:  ret_from_fork+0x34/0x50
kernel:  ? __pfx_kthread+0x10/0x10
kernel:  ret_from_fork_asm+0x1a/0x30
kernel:  </TASK>
kernel: Mem-Info:
kernel: active_anon:513809 inactive_anon:152 isolated_anon:0
    active_file:359315 inactive_file:2487001 isolated_file:0
    unevictable:637 dirty:19 writeback:0
    slab_reclaimable:160391 slab_unreclaimable:39729
    mapped:175836 shmem:51039 pagetables:4415
    sec_pagetables:0 bounce:0
    kernel_misc_reclaimable:0
    free:125666 free_pcp:0 free_cma:0

In above example, if we sum all the consumed memory, it comes out
to be 15.5GB and free memory is ~ 500MB from a total of 16GB RAM.
Even though memory is present. But all of the dma memory has been
exhausted or fragmented.

Fix it by allocating it only once and then reuse the same allocated
memory. As we'll allocate this memory only once, this memory will stay
allocated.

Tested-on: WCN6855 WLAN.HSP.1.1-03926.13-QCAHSPSWPL_V2_SILICONZ_CE-2.52297.6

Fixes: cd457afb1667 ("bus: mhi: core: Add support for downloading firmware over BHIe")
Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
---
Reported here:
https://lore.kernel.org/all/ead32f5b-730a-4b81-b38f-93d822f990c6@collabora.com

Still a lot of more fixes are required. Hence, I'm not adding closes tag.
---
 drivers/bus/mhi/host/boot.c     | 19 ++++++++++---------
 drivers/bus/mhi/host/init.c     |  5 +++++
 drivers/bus/mhi/host/internal.h |  2 ++
 include/linux/mhi.h             |  1 +
 4 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/drivers/bus/mhi/host/boot.c b/drivers/bus/mhi/host/boot.c
index b3a85aa3c4768..11bb8c12ac597 100644
--- a/drivers/bus/mhi/host/boot.c
+++ b/drivers/bus/mhi/host/boot.c
@@ -302,8 +302,8 @@ static int mhi_fw_load_bhi(struct mhi_controller *mhi_cntrl,
 	return -EIO;
 }
 
-static void mhi_free_bhi_buffer(struct mhi_controller *mhi_cntrl,
-				struct image_info *image_info)
+void mhi_free_bhi_buffer(struct mhi_controller *mhi_cntrl,
+			 struct image_info *image_info)
 {
 	struct mhi_buf *mhi_buf = image_info->mhi_buf;
 
@@ -455,18 +455,19 @@ static enum mhi_fw_load_type mhi_fw_load_type_get(const struct mhi_controller *m
 
 static int mhi_load_image_bhi(struct mhi_controller *mhi_cntrl, const u8 *fw_data, size_t size)
 {
-	struct image_info *image;
+	struct image_info *image = mhi_cntrl->bhi_image;
 	int ret;
 
-	ret = mhi_alloc_bhi_buffer(mhi_cntrl, &image, size);
-	if (ret)
-		return ret;
+	if (!image) {
+		ret = mhi_alloc_bhi_buffer(mhi_cntrl, &image, size);
+		if (ret)
+			return ret;
 
-	/* Load the firmware into BHI vec table */
-	memcpy(image->mhi_buf->buf, fw_data, size);
+		/* Load the firmware into BHI vec table */
+		memcpy(image->mhi_buf->buf, fw_data, size);
+	}
 
 	ret = mhi_fw_load_bhi(mhi_cntrl, &image->mhi_buf[image->entries - 1]);
-	mhi_free_bhi_buffer(mhi_cntrl, image);
 
 	return ret;
 }
diff --git a/drivers/bus/mhi/host/init.c b/drivers/bus/mhi/host/init.c
index 6e06e4efec765..2e0f18c939e68 100644
--- a/drivers/bus/mhi/host/init.c
+++ b/drivers/bus/mhi/host/init.c
@@ -1228,6 +1228,11 @@ void mhi_unprepare_after_power_down(struct mhi_controller *mhi_cntrl)
 		mhi_cntrl->rddm_image = NULL;
 	}
 
+	if (mhi_cntrl->bhi_image) {
+		mhi_free_bhi_buffer(mhi_cntrl, mhi_cntrl->bhi_image);
+		mhi_cntrl->bhi_image = NULL;
+	}
+
 	mhi_deinit_dev_ctxt(mhi_cntrl);
 }
 EXPORT_SYMBOL_GPL(mhi_unprepare_after_power_down);
diff --git a/drivers/bus/mhi/host/internal.h b/drivers/bus/mhi/host/internal.h
index 1054e67bb450d..60b0699323375 100644
--- a/drivers/bus/mhi/host/internal.h
+++ b/drivers/bus/mhi/host/internal.h
@@ -324,6 +324,8 @@ int mhi_alloc_bhie_table(struct mhi_controller *mhi_cntrl,
 			 struct image_info **image_info, size_t alloc_size);
 void mhi_free_bhie_table(struct mhi_controller *mhi_cntrl,
 			 struct image_info *image_info);
+void mhi_free_bhi_buffer(struct mhi_controller *mhi_cntrl,
+			 struct image_info *image_info);
 
 /* Power management APIs */
 enum mhi_pm_state __must_check mhi_tryset_pm_state(
diff --git a/include/linux/mhi.h b/include/linux/mhi.h
index 4c567907933a5..593012f779d97 100644
--- a/include/linux/mhi.h
+++ b/include/linux/mhi.h
@@ -391,6 +391,7 @@ struct mhi_controller {
 	size_t reg_len;
 	struct image_info *fbc_image;
 	struct image_info *rddm_image;
+	struct image_info *bhi_image;
 	struct mhi_chan *mhi_chan;
 	struct list_head lpm_chans;
 	int *irq;
-- 
2.39.5
Re: [PATCH 1/3] bus: mhi: host: keep bhi buffer through suspend cycle
Posted by Krishna Chaitanya Chundru 3 months ago

On 6/30/2025 1:13 PM, Muhammad Usama Anjum wrote:
> When there is memory pressure, at resume time dma_alloc_coherent()
> returns error which in turn fails the loading of firmware and hence
> the driver crashes:
> 
why only bhi? bhie can also have same issue.
> kernel: kworker/u33:5: page allocation failure: order:7,
> mode:0xc04(GFP_NOIO|GFP_DMA32), nodemask=(null),cpuset=/,mems_allowed=0
> kernel: CPU: 1 UID: 0 PID: 7693 Comm: kworker/u33:5 Not tainted
> 6.11.11-valve17-1-neptune-611-g027868a0ac03 #1
> 3843143b92e9da0fa2d3d5f21f51beaed15c7d59
> kernel: Hardware name: Valve Galileo/Galileo, BIOS F7G0112 08/01/2024
> kernel: Workqueue: mhi_hiprio_wq mhi_pm_st_worker [mhi]
> kernel: Call Trace:
> kernel:  <TASK>
> kernel:  dump_stack_lvl+0x4e/0x70
> kernel:  warn_alloc+0x164/0x190
> kernel:  ? srso_return_thunk+0x5/0x5f
> kernel:  ? __alloc_pages_direct_compact+0xaf/0x360
> kernel:  __alloc_pages_slowpath.constprop.0+0xc75/0xd70
> kernel:  __alloc_pages_noprof+0x321/0x350
> kernel:  __dma_direct_alloc_pages.isra.0+0x14a/0x290
> kernel:  dma_direct_alloc+0x70/0x270
> kernel:  mhi_fw_load_handler+0x126/0x340 [mhi
> a96cb91daba500cc77f86bad60c1f332dc3babdf]
> kernel:  mhi_pm_st_worker+0x5e8/0xac0 [mhi
> a96cb91daba500cc77f86bad60c1f332dc3babdf]
> kernel:  ? srso_return_thunk+0x5/0x5f
> kernel:  process_one_work+0x17e/0x330
> kernel:  worker_thread+0x2ce/0x3f0
> kernel:  ? __pfx_worker_thread+0x10/0x10
> kernel:  kthread+0xd2/0x100
> kernel:  ? __pfx_kthread+0x10/0x10
> kernel:  ret_from_fork+0x34/0x50
> kernel:  ? __pfx_kthread+0x10/0x10
> kernel:  ret_from_fork_asm+0x1a/0x30
> kernel:  </TASK>
> kernel: Mem-Info:
> kernel: active_anon:513809 inactive_anon:152 isolated_anon:0
>      active_file:359315 inactive_file:2487001 isolated_file:0
>      unevictable:637 dirty:19 writeback:0
>      slab_reclaimable:160391 slab_unreclaimable:39729
>      mapped:175836 shmem:51039 pagetables:4415
>      sec_pagetables:0 bounce:0
>      kernel_misc_reclaimable:0
>      free:125666 free_pcp:0 free_cma:0
> 
> In above example, if we sum all the consumed memory, it comes out
> to be 15.5GB and free memory is ~ 500MB from a total of 16GB RAM.
> Even though memory is present. But all of the dma memory has been
> exhausted or fragmented.
> 
> Fix it by allocating it only once and then reuse the same allocated
> memory. As we'll allocate this memory only once, this memory will stay
> allocated.
> 
> Tested-on: WCN6855 WLAN.HSP.1.1-03926.13-QCAHSPSWPL_V2_SILICONZ_CE-2.52297.6
> 
> Fixes: cd457afb1667 ("bus: mhi: core: Add support for downloading firmware over BHIe")
> Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
> ---
> Reported here:
> https://lore.kernel.org/all/ead32f5b-730a-4b81-b38f-93d822f990c6@collabora.com
> 
> Still a lot of more fixes are required. Hence, I'm not adding closes tag.
> ---
>   drivers/bus/mhi/host/boot.c     | 19 ++++++++++---------
>   drivers/bus/mhi/host/init.c     |  5 +++++
>   drivers/bus/mhi/host/internal.h |  2 ++
>   include/linux/mhi.h             |  1 +
>   4 files changed, 18 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/bus/mhi/host/boot.c b/drivers/bus/mhi/host/boot.c
> index b3a85aa3c4768..11bb8c12ac597 100644
> --- a/drivers/bus/mhi/host/boot.c
> +++ b/drivers/bus/mhi/host/boot.c
> @@ -302,8 +302,8 @@ static int mhi_fw_load_bhi(struct mhi_controller *mhi_cntrl,
>   	return -EIO;
>   }
>   
> -static void mhi_free_bhi_buffer(struct mhi_controller *mhi_cntrl,
> -				struct image_info *image_info)
> +void mhi_free_bhi_buffer(struct mhi_controller *mhi_cntrl,
> +			 struct image_info *image_info)
>   {
>   	struct mhi_buf *mhi_buf = image_info->mhi_buf;
>   
> @@ -455,18 +455,19 @@ static enum mhi_fw_load_type mhi_fw_load_type_get(const struct mhi_controller *m
>   
>   static int mhi_load_image_bhi(struct mhi_controller *mhi_cntrl, const u8 *fw_data, size_t size)
>   {
> -	struct image_info *image;
> +	struct image_info *image = mhi_cntrl->bhi_image;
>   	int ret;
>   
> -	ret = mhi_alloc_bhi_buffer(mhi_cntrl, &image, size);
> -	if (ret)
> -		return ret;
> +	if (!image) {
> +		ret = mhi_alloc_bhi_buffer(mhi_cntrl, &image, size);
> +		if (ret)
> +			return ret;
>   
> -	/* Load the firmware into BHI vec table */
> -	memcpy(image->mhi_buf->buf, fw_data, size);
> +		/* Load the firmware into BHI vec table */
> +		memcpy(image->mhi_buf->buf, fw_data, size);
> +	}
>   
>   	ret = mhi_fw_load_bhi(mhi_cntrl, &image->mhi_buf[image->entries - 1]);
if mhi fw load fails didn't we need to free bhi buffer.

- Krishna Chaitanya.
> -	mhi_free_bhi_buffer(mhi_cntrl, image);
>   
>   	return ret;
>   }
> diff --git a/drivers/bus/mhi/host/init.c b/drivers/bus/mhi/host/init.c
> index 6e06e4efec765..2e0f18c939e68 100644
> --- a/drivers/bus/mhi/host/init.c
> +++ b/drivers/bus/mhi/host/init.c
> @@ -1228,6 +1228,11 @@ void mhi_unprepare_after_power_down(struct mhi_controller *mhi_cntrl)
>   		mhi_cntrl->rddm_image = NULL;
>   	}
>   
> +	if (mhi_cntrl->bhi_image) {
> +		mhi_free_bhi_buffer(mhi_cntrl, mhi_cntrl->bhi_image);
> +		mhi_cntrl->bhi_image = NULL;
> +	}
> +
>   	mhi_deinit_dev_ctxt(mhi_cntrl);
>   }
>   EXPORT_SYMBOL_GPL(mhi_unprepare_after_power_down);
> diff --git a/drivers/bus/mhi/host/internal.h b/drivers/bus/mhi/host/internal.h
> index 1054e67bb450d..60b0699323375 100644
> --- a/drivers/bus/mhi/host/internal.h
> +++ b/drivers/bus/mhi/host/internal.h
> @@ -324,6 +324,8 @@ int mhi_alloc_bhie_table(struct mhi_controller *mhi_cntrl,
>   			 struct image_info **image_info, size_t alloc_size);
>   void mhi_free_bhie_table(struct mhi_controller *mhi_cntrl,
>   			 struct image_info *image_info);
> +void mhi_free_bhi_buffer(struct mhi_controller *mhi_cntrl,
> +			 struct image_info *image_info);
>   
>   /* Power management APIs */
>   enum mhi_pm_state __must_check mhi_tryset_pm_state(
> diff --git a/include/linux/mhi.h b/include/linux/mhi.h
> index 4c567907933a5..593012f779d97 100644
> --- a/include/linux/mhi.h
> +++ b/include/linux/mhi.h
> @@ -391,6 +391,7 @@ struct mhi_controller {
>   	size_t reg_len;
>   	struct image_info *fbc_image;
>   	struct image_info *rddm_image;
> +	struct image_info *bhi_image;
>   	struct mhi_chan *mhi_chan;
>   	struct list_head lpm_chans;
>   	int *irq;
Re: [PATCH 1/3] bus: mhi: host: keep bhi buffer through suspend cycle
Posted by Muhammad Usama Anjum 2 months, 4 weeks ago
Thank you so much for review.

On 7/8/25 2:47 PM, Krishna Chaitanya Chundru wrote:
> 
> 
> On 6/30/2025 1:13 PM, Muhammad Usama Anjum wrote:
>> When there is memory pressure, at resume time dma_alloc_coherent()
>> returns error which in turn fails the loading of firmware and hence
>> the driver crashes:
>>
> why only bhi? bhie can also have same issue.
I was thinking I'd handled all bhie cases in earlier patch. But I haven't. I'll post
fix for bhie in next version.

>> kernel: kworker/u33:5: page allocation failure: order:7,
>> mode:0xc04(GFP_NOIO|GFP_DMA32), nodemask=(null),cpuset=/,mems_allowed=0
>> kernel: CPU: 1 UID: 0 PID: 7693 Comm: kworker/u33:5 Not tainted
>> 6.11.11-valve17-1-neptune-611-g027868a0ac03 #1
>> 3843143b92e9da0fa2d3d5f21f51beaed15c7d59
>> kernel: Hardware name: Valve Galileo/Galileo, BIOS F7G0112 08/01/2024
>> kernel: Workqueue: mhi_hiprio_wq mhi_pm_st_worker [mhi]
>> kernel: Call Trace:
>> kernel:  <TASK>
>> kernel:  dump_stack_lvl+0x4e/0x70
>> kernel:  warn_alloc+0x164/0x190
>> kernel:  ? srso_return_thunk+0x5/0x5f
>> kernel:  ? __alloc_pages_direct_compact+0xaf/0x360
>> kernel:  __alloc_pages_slowpath.constprop.0+0xc75/0xd70
>> kernel:  __alloc_pages_noprof+0x321/0x350
>> kernel:  __dma_direct_alloc_pages.isra.0+0x14a/0x290
>> kernel:  dma_direct_alloc+0x70/0x270
>> kernel:  mhi_fw_load_handler+0x126/0x340 [mhi
>> a96cb91daba500cc77f86bad60c1f332dc3babdf]
>> kernel:  mhi_pm_st_worker+0x5e8/0xac0 [mhi
>> a96cb91daba500cc77f86bad60c1f332dc3babdf]
>> kernel:  ? srso_return_thunk+0x5/0x5f
>> kernel:  process_one_work+0x17e/0x330
>> kernel:  worker_thread+0x2ce/0x3f0
>> kernel:  ? __pfx_worker_thread+0x10/0x10
>> kernel:  kthread+0xd2/0x100
>> kernel:  ? __pfx_kthread+0x10/0x10
>> kernel:  ret_from_fork+0x34/0x50
>> kernel:  ? __pfx_kthread+0x10/0x10
>> kernel:  ret_from_fork_asm+0x1a/0x30
>> kernel:  </TASK>
>> kernel: Mem-Info:
>> kernel: active_anon:513809 inactive_anon:152 isolated_anon:0
>>      active_file:359315 inactive_file:2487001 isolated_file:0
>>      unevictable:637 dirty:19 writeback:0
>>      slab_reclaimable:160391 slab_unreclaimable:39729
>>      mapped:175836 shmem:51039 pagetables:4415
>>      sec_pagetables:0 bounce:0
>>      kernel_misc_reclaimable:0
>>      free:125666 free_pcp:0 free_cma:0
>>
>> In above example, if we sum all the consumed memory, it comes out
>> to be 15.5GB and free memory is ~ 500MB from a total of 16GB RAM.
>> Even though memory is present. But all of the dma memory has been
>> exhausted or fragmented.
>>
>> Fix it by allocating it only once and then reuse the same allocated
>> memory. As we'll allocate this memory only once, this memory will stay
>> allocated.
>>
>> Tested-on: WCN6855 WLAN.HSP.1.1-03926.13-QCAHSPSWPL_V2_SILICONZ_CE-2.52297.6
>>
>> Fixes: cd457afb1667 ("bus: mhi: core: Add support for downloading firmware over BHIe")
>> Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
>> ---
>> Reported here:
>> https://lore.kernel.org/all/ead32f5b-730a-4b81-b38f-93d822f990c6@collabora.com
>>
>> Still a lot of more fixes are required. Hence, I'm not adding closes tag.
>> ---
>>   drivers/bus/mhi/host/boot.c     | 19 ++++++++++---------
>>   drivers/bus/mhi/host/init.c     |  5 +++++
>>   drivers/bus/mhi/host/internal.h |  2 ++
>>   include/linux/mhi.h             |  1 +
>>   4 files changed, 18 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/bus/mhi/host/boot.c b/drivers/bus/mhi/host/boot.c
>> index b3a85aa3c4768..11bb8c12ac597 100644
>> --- a/drivers/bus/mhi/host/boot.c
>> +++ b/drivers/bus/mhi/host/boot.c
>> @@ -302,8 +302,8 @@ static int mhi_fw_load_bhi(struct mhi_controller *mhi_cntrl,
>>       return -EIO;
>>   }
>>   -static void mhi_free_bhi_buffer(struct mhi_controller *mhi_cntrl,
>> -                struct image_info *image_info)
>> +void mhi_free_bhi_buffer(struct mhi_controller *mhi_cntrl,
>> +             struct image_info *image_info)
>>   {
>>       struct mhi_buf *mhi_buf = image_info->mhi_buf;
>>   @@ -455,18 +455,19 @@ static enum mhi_fw_load_type mhi_fw_load_type_get(const struct mhi_controller *m
>>     static int mhi_load_image_bhi(struct mhi_controller *mhi_cntrl, const u8 *fw_data, size_t size)
>>   {
>> -    struct image_info *image;
>> +    struct image_info *image = mhi_cntrl->bhi_image;
>>       int ret;
>>   -    ret = mhi_alloc_bhi_buffer(mhi_cntrl, &image, size);
>> -    if (ret)
>> -        return ret;
>> +    if (!image) {
>> +        ret = mhi_alloc_bhi_buffer(mhi_cntrl, &image, size);
>> +        if (ret)
>> +            return ret;
>>   -    /* Load the firmware into BHI vec table */
>> -    memcpy(image->mhi_buf->buf, fw_data, size);
>> +        /* Load the firmware into BHI vec table */
>> +        memcpy(image->mhi_buf->buf, fw_data, size);
>> +    }
>>         ret = mhi_fw_load_bhi(mhi_cntrl, &image->mhi_buf[image->entries - 1]);
> if mhi fw load fails didn't we need to free bhi buffer.
Good point. I'll fix in v2.

Re: [PATCH 1/3] bus: mhi: host: keep bhi buffer through suspend cycle
Posted by Greg Kroah-Hartman 3 months, 1 week ago
On Mon, Jun 30, 2025 at 12:43:28PM +0500, Muhammad Usama Anjum wrote:
> When there is memory pressure, at resume time dma_alloc_coherent()
> returns error which in turn fails the loading of firmware and hence
> the driver crashes:
> 
> kernel: kworker/u33:5: page allocation failure: order:7,
> mode:0xc04(GFP_NOIO|GFP_DMA32), nodemask=(null),cpuset=/,mems_allowed=0
> kernel: CPU: 1 UID: 0 PID: 7693 Comm: kworker/u33:5 Not tainted
> 6.11.11-valve17-1-neptune-611-g027868a0ac03 #1
> 3843143b92e9da0fa2d3d5f21f51beaed15c7d59

Please don't wrap kernel log lines.

> kernel: Hardware name: Valve Galileo/Galileo, BIOS F7G0112 08/01/2024
> kernel: Workqueue: mhi_hiprio_wq mhi_pm_st_worker [mhi]
> kernel: Call Trace:
> kernel:  <TASK>
> kernel:  dump_stack_lvl+0x4e/0x70
> kernel:  warn_alloc+0x164/0x190
> kernel:  ? srso_return_thunk+0x5/0x5f
> kernel:  ? __alloc_pages_direct_compact+0xaf/0x360
> kernel:  __alloc_pages_slowpath.constprop.0+0xc75/0xd70
> kernel:  __alloc_pages_noprof+0x321/0x350
> kernel:  __dma_direct_alloc_pages.isra.0+0x14a/0x290
> kernel:  dma_direct_alloc+0x70/0x270
> kernel:  mhi_fw_load_handler+0x126/0x340 [mhi
> a96cb91daba500cc77f86bad60c1f332dc3babdf]
> kernel:  mhi_pm_st_worker+0x5e8/0xac0 [mhi
> a96cb91daba500cc77f86bad60c1f332dc3babdf]

Same here.

> kernel:  ? srso_return_thunk+0x5/0x5f
> kernel:  process_one_work+0x17e/0x330
> kernel:  worker_thread+0x2ce/0x3f0
> kernel:  ? __pfx_worker_thread+0x10/0x10
> kernel:  kthread+0xd2/0x100
> kernel:  ? __pfx_kthread+0x10/0x10
> kernel:  ret_from_fork+0x34/0x50
> kernel:  ? __pfx_kthread+0x10/0x10
> kernel:  ret_from_fork_asm+0x1a/0x30
> kernel:  </TASK>
> kernel: Mem-Info:
> kernel: active_anon:513809 inactive_anon:152 isolated_anon:0
>     active_file:359315 inactive_file:2487001 isolated_file:0
>     unevictable:637 dirty:19 writeback:0
>     slab_reclaimable:160391 slab_unreclaimable:39729
>     mapped:175836 shmem:51039 pagetables:4415
>     sec_pagetables:0 bounce:0
>     kernel_misc_reclaimable:0
>     free:125666 free_pcp:0 free_cma:0
> 
> In above example, if we sum all the consumed memory, it comes out
> to be 15.5GB and free memory is ~ 500MB from a total of 16GB RAM.
> Even though memory is present. But all of the dma memory has been
> exhausted or fragmented.
> 
> Fix it by allocating it only once and then reuse the same allocated
> memory. As we'll allocate this memory only once, this memory will stay
> allocated.
> 
> Tested-on: WCN6855 WLAN.HSP.1.1-03926.13-QCAHSPSWPL_V2_SILICONZ_CE-2.52297.6
> 
> Fixes: cd457afb1667 ("bus: mhi: core: Add support for downloading firmware over BHIe")

No cc: stable?

> Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
> ---
> Reported here:
> https://lore.kernel.org/all/ead32f5b-730a-4b81-b38f-93d822f990c6@collabora.com
> 
> Still a lot of more fixes are required. Hence, I'm not adding closes tag.
> ---
>  drivers/bus/mhi/host/boot.c     | 19 ++++++++++---------
>  drivers/bus/mhi/host/init.c     |  5 +++++
>  drivers/bus/mhi/host/internal.h |  2 ++
>  include/linux/mhi.h             |  1 +
>  4 files changed, 18 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/bus/mhi/host/boot.c b/drivers/bus/mhi/host/boot.c
> index b3a85aa3c4768..11bb8c12ac597 100644
> --- a/drivers/bus/mhi/host/boot.c
> +++ b/drivers/bus/mhi/host/boot.c
> @@ -302,8 +302,8 @@ static int mhi_fw_load_bhi(struct mhi_controller *mhi_cntrl,
>  	return -EIO;
>  }
>  
> -static void mhi_free_bhi_buffer(struct mhi_controller *mhi_cntrl,
> -				struct image_info *image_info)
> +void mhi_free_bhi_buffer(struct mhi_controller *mhi_cntrl,
> +			 struct image_info *image_info)
>  {
>  	struct mhi_buf *mhi_buf = image_info->mhi_buf;
>  
> @@ -455,18 +455,19 @@ static enum mhi_fw_load_type mhi_fw_load_type_get(const struct mhi_controller *m
>  
>  static int mhi_load_image_bhi(struct mhi_controller *mhi_cntrl, const u8 *fw_data, size_t size)
>  {
> -	struct image_info *image;
> +	struct image_info *image = mhi_cntrl->bhi_image;
>  	int ret;
>  
> -	ret = mhi_alloc_bhi_buffer(mhi_cntrl, &image, size);
> -	if (ret)
> -		return ret;
> +	if (!image) {

What prevents image from going away right after you tested it?

thanks,

greg k-h
Re: [PATCH 1/3] bus: mhi: host: keep bhi buffer through suspend cycle
Posted by Muhammad Usama Anjum 3 months, 1 week ago
Thanks for reviewing!

On 7/1/25 3:25 PM, Greg Kroah-Hartman wrote:
> On Mon, Jun 30, 2025 at 12:43:28PM +0500, Muhammad Usama Anjum wrote:
>> When there is memory pressure, at resume time dma_alloc_coherent()
>> returns error which in turn fails the loading of firmware and hence
>> the driver crashes:
>>
>> kernel: kworker/u33:5: page allocation failure: order:7,
>> mode:0xc04(GFP_NOIO|GFP_DMA32), nodemask=(null),cpuset=/,mems_allowed=0
>> kernel: CPU: 1 UID: 0 PID: 7693 Comm: kworker/u33:5 Not tainted
>> 6.11.11-valve17-1-neptune-611-g027868a0ac03 #1
>> 3843143b92e9da0fa2d3d5f21f51beaed15c7d59
> 
> Please don't wrap kernel log lines.
Sorry, missed fixing it. I'll fix it.

> 
>> kernel: Hardware name: Valve Galileo/Galileo, BIOS F7G0112 08/01/2024
>> kernel: Workqueue: mhi_hiprio_wq mhi_pm_st_worker [mhi]
>> kernel: Call Trace:
>> kernel:  <TASK>
>> kernel:  dump_stack_lvl+0x4e/0x70
>> kernel:  warn_alloc+0x164/0x190
>> kernel:  ? srso_return_thunk+0x5/0x5f
>> kernel:  ? __alloc_pages_direct_compact+0xaf/0x360
>> kernel:  __alloc_pages_slowpath.constprop.0+0xc75/0xd70
>> kernel:  __alloc_pages_noprof+0x321/0x350
>> kernel:  __dma_direct_alloc_pages.isra.0+0x14a/0x290
>> kernel:  dma_direct_alloc+0x70/0x270
>> kernel:  mhi_fw_load_handler+0x126/0x340 [mhi
>> a96cb91daba500cc77f86bad60c1f332dc3babdf]
>> kernel:  mhi_pm_st_worker+0x5e8/0xac0 [mhi
>> a96cb91daba500cc77f86bad60c1f332dc3babdf]
> 
> Same here.
I'll fix it.

> 
>> kernel:  ? srso_return_thunk+0x5/0x5f
>> kernel:  process_one_work+0x17e/0x330
>> kernel:  worker_thread+0x2ce/0x3f0
>> kernel:  ? __pfx_worker_thread+0x10/0x10
>> kernel:  kthread+0xd2/0x100
>> kernel:  ? __pfx_kthread+0x10/0x10
>> kernel:  ret_from_fork+0x34/0x50
>> kernel:  ? __pfx_kthread+0x10/0x10
>> kernel:  ret_from_fork_asm+0x1a/0x30
>> kernel:  </TASK>
>> kernel: Mem-Info:
>> kernel: active_anon:513809 inactive_anon:152 isolated_anon:0
>>     active_file:359315 inactive_file:2487001 isolated_file:0
>>     unevictable:637 dirty:19 writeback:0
>>     slab_reclaimable:160391 slab_unreclaimable:39729
>>     mapped:175836 shmem:51039 pagetables:4415
>>     sec_pagetables:0 bounce:0
>>     kernel_misc_reclaimable:0
>>     free:125666 free_pcp:0 free_cma:0
>>
>> In above example, if we sum all the consumed memory, it comes out
>> to be 15.5GB and free memory is ~ 500MB from a total of 16GB RAM.
>> Even though memory is present. But all of the dma memory has been
>> exhausted or fragmented.
>>
>> Fix it by allocating it only once and then reuse the same allocated
>> memory. As we'll allocate this memory only once, this memory will stay
>> allocated.
>>
>> Tested-on: WCN6855 WLAN.HSP.1.1-03926.13-QCAHSPSWPL_V2_SILICONZ_CE-2.52297.6
>>
>> Fixes: cd457afb1667 ("bus: mhi: core: Add support for downloading firmware over BHIe")
> 
> No cc: stable?
I'll add stable in v2.

> 
>> Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
>> ---
>> Reported here:
>> https://lore.kernel.org/all/ead32f5b-730a-4b81-b38f-93d822f990c6@collabora.com
>>
>> Still a lot of more fixes are required. Hence, I'm not adding closes tag.
>> ---
>>  drivers/bus/mhi/host/boot.c     | 19 ++++++++++---------
>>  drivers/bus/mhi/host/init.c     |  5 +++++
>>  drivers/bus/mhi/host/internal.h |  2 ++
>>  include/linux/mhi.h             |  1 +
>>  4 files changed, 18 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/bus/mhi/host/boot.c b/drivers/bus/mhi/host/boot.c
>> index b3a85aa3c4768..11bb8c12ac597 100644
>> --- a/drivers/bus/mhi/host/boot.c
>> +++ b/drivers/bus/mhi/host/boot.c
>> @@ -302,8 +302,8 @@ static int mhi_fw_load_bhi(struct mhi_controller *mhi_cntrl,
>>  	return -EIO;
>>  }
>>  
>> -static void mhi_free_bhi_buffer(struct mhi_controller *mhi_cntrl,
>> -				struct image_info *image_info)
>> +void mhi_free_bhi_buffer(struct mhi_controller *mhi_cntrl,
>> +			 struct image_info *image_info)
>>  {
>>  	struct mhi_buf *mhi_buf = image_info->mhi_buf;
>>  
>> @@ -455,18 +455,19 @@ static enum mhi_fw_load_type mhi_fw_load_type_get(const struct mhi_controller *m
>>  
>>  static int mhi_load_image_bhi(struct mhi_controller *mhi_cntrl, const u8 *fw_data, size_t size)
>>  {
>> -	struct image_info *image;
>> +	struct image_info *image = mhi_cntrl->bhi_image;
>>  	int ret;
>>  
>> -	ret = mhi_alloc_bhi_buffer(mhi_cntrl, &image, size);
>> -	if (ret)
>> -		return ret;
>> +	if (!image) {
> 
> What prevents image from going away right after you tested it?
The changed code isn't doing what it should be doing. I'll completely fix in v2. 

> 
> thanks,
> 
> greg k-h