From: Swarna Prabhu <sw.prabhu6@gmail.com>
The WRITE SAME(16) and WRITE SAME(10) scsi commands uses
a page from a dedicated mempool('sd_page_pool') for its
payload. This pool was initialized to allocate single
pages, which was sufficient as long as the device sector
size did not exceed the PAGE_SIZE.
Given that block layer now supports block size upto
64K ie beyond PAGE_SIZE, adapt sd_set_special_bvec()
to accommodate that.
With the above fix, enable sector sizes > PAGE_SIZE in
scsi sd driver.
Cc: stable@vger.kernel.org
Signed-off-by: Swarna Prabhu <s.prabhu@samsung.com>
Co-developed-by: Pankaj Raghav <p.raghav@samsung.com>
Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
---
Note: We are allocating pages of order aligned to
BLK_MAX_BLOCK_SIZE for the mempool page allocator
'sd_page_pool' all the time. This is because we only
know that a bigger sector size device is attached at
sd_probe and it might be too late to reallocate mempool
with order >0.
drivers/scsi/sd.c | 27 +++++++++++++++++----------
1 file changed, 17 insertions(+), 10 deletions(-)
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 0252d3f6bed1..17b5c1589eb2 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -892,14 +892,24 @@ static void sd_config_discard(struct scsi_disk *sdkp, struct queue_limits *lim,
(logical_block_size >> SECTOR_SHIFT);
}
-static void *sd_set_special_bvec(struct request *rq, unsigned int data_len)
+static void *sd_set_special_bvec(struct scsi_cmnd *cmd, unsigned int data_len)
{
struct page *page;
+ struct request *rq = scsi_cmd_to_rq(cmd);
+ struct scsi_device *sdp = cmd->device;
+ unsigned sector_size = sdp->sector_size;
+ unsigned int nr_pages = DIV_ROUND_UP(sector_size, PAGE_SIZE);
+ int n = 0;
page = mempool_alloc(sd_page_pool, GFP_ATOMIC);
if (!page)
return NULL;
- clear_highpage(page);
+
+ do {
+ clear_highpage(page + n);
+ n++;
+ } while (n < nr_pages);
+
bvec_set_page(&rq->special_vec, page, data_len, 0);
rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
return bvec_virt(&rq->special_vec);
@@ -915,7 +925,7 @@ static blk_status_t sd_setup_unmap_cmnd(struct scsi_cmnd *cmd)
unsigned int data_len = 24;
char *buf;
- buf = sd_set_special_bvec(rq, data_len);
+ buf = sd_set_special_bvec(cmd, data_len);
if (!buf)
return BLK_STS_RESOURCE;
@@ -1004,7 +1014,7 @@ static blk_status_t sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd,
u32 nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq));
u32 data_len = sdp->sector_size;
- if (!sd_set_special_bvec(rq, data_len))
+ if (!sd_set_special_bvec(cmd, data_len))
return BLK_STS_RESOURCE;
cmd->cmd_len = 16;
@@ -1031,7 +1041,7 @@ static blk_status_t sd_setup_write_same10_cmnd(struct scsi_cmnd *cmd,
u32 nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq));
u32 data_len = sdp->sector_size;
- if (!sd_set_special_bvec(rq, data_len))
+ if (!sd_set_special_bvec(cmd, data_len))
return BLK_STS_RESOURCE;
cmd->cmd_len = 10;
@@ -2880,10 +2890,7 @@ sd_read_capacity(struct scsi_disk *sdkp, struct queue_limits *lim,
"assuming 512.\n");
}
- if (sector_size != 512 &&
- sector_size != 1024 &&
- sector_size != 2048 &&
- sector_size != 4096) {
+ if (blk_validate_block_size(sector_size)) {
sd_printk(KERN_NOTICE, sdkp, "Unsupported sector size %d.\n",
sector_size);
/*
@@ -4368,7 +4375,7 @@ static int __init init_sd(void)
if (err)
goto err_out;
- sd_page_pool = mempool_create_page_pool(SD_MEMPOOL_SIZE, 0);
+ sd_page_pool = mempool_create_page_pool(SD_MEMPOOL_SIZE, get_order(BLK_MAX_BLOCK_SIZE));
if (!sd_page_pool) {
printk(KERN_ERR "sd: can't init discard page pool\n");
err = -ENOMEM;
--
2.51.0
On 2025/12/09 17:41, sw.prabhu6@gmail.com wrote:
> From: Swarna Prabhu <sw.prabhu6@gmail.com>
>
> The WRITE SAME(16) and WRITE SAME(10) scsi commands uses
> a page from a dedicated mempool('sd_page_pool') for its
> payload. This pool was initialized to allocate single
> pages, which was sufficient as long as the device sector
> size did not exceed the PAGE_SIZE.
>
> Given that block layer now supports block size upto
> 64K ie beyond PAGE_SIZE, adapt sd_set_special_bvec()
> to accommodate that.
>
> With the above fix, enable sector sizes > PAGE_SIZE in
> scsi sd driver.
>
> Cc: stable@vger.kernel.org
> Signed-off-by: Swarna Prabhu <s.prabhu@samsung.com>
> Co-developed-by: Pankaj Raghav <p.raghav@samsung.com>
> Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
> ---
> Note: We are allocating pages of order aligned to
> BLK_MAX_BLOCK_SIZE for the mempool page allocator
> 'sd_page_pool' all the time. This is because we only
> know that a bigger sector size device is attached at
> sd_probe and it might be too late to reallocate mempool
> with order >0.
That is a lot heavier on the memory for the vast majority of devices which are
512B or 4K block size... It may be better to have the special "large block"
mempool attached to the scsi disk struct and keep the default single page
mempool for all other regular devices.
>
> drivers/scsi/sd.c | 27 +++++++++++++++++----------
> 1 file changed, 17 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
> index 0252d3f6bed1..17b5c1589eb2 100644
> --- a/drivers/scsi/sd.c
> +++ b/drivers/scsi/sd.c
> @@ -892,14 +892,24 @@ static void sd_config_discard(struct scsi_disk *sdkp, struct queue_limits *lim,
> (logical_block_size >> SECTOR_SHIFT);
> }
>
> -static void *sd_set_special_bvec(struct request *rq, unsigned int data_len)
> +static void *sd_set_special_bvec(struct scsi_cmnd *cmd, unsigned int data_len)
> {
> struct page *page;
> + struct request *rq = scsi_cmd_to_rq(cmd);
> + struct scsi_device *sdp = cmd->device;
> + unsigned sector_size = sdp->sector_size;
> + unsigned int nr_pages = DIV_ROUND_UP(sector_size, PAGE_SIZE);
> + int n = 0;
>
> page = mempool_alloc(sd_page_pool, GFP_ATOMIC);
> if (!page)
> return NULL;
> - clear_highpage(page);
> +
> + do {
> + clear_highpage(page + n);
> + n++;
> + } while (n < nr_pages);
> +
> bvec_set_page(&rq->special_vec, page, data_len, 0);
> rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
> return bvec_virt(&rq->special_vec);
> @@ -915,7 +925,7 @@ static blk_status_t sd_setup_unmap_cmnd(struct scsi_cmnd *cmd)
> unsigned int data_len = 24;
> char *buf;
>
> - buf = sd_set_special_bvec(rq, data_len);
> + buf = sd_set_special_bvec(cmd, data_len);
> if (!buf)
> return BLK_STS_RESOURCE;
>
> @@ -1004,7 +1014,7 @@ static blk_status_t sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd,
> u32 nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq));
> u32 data_len = sdp->sector_size;
>
> - if (!sd_set_special_bvec(rq, data_len))
> + if (!sd_set_special_bvec(cmd, data_len))
> return BLK_STS_RESOURCE;
>
> cmd->cmd_len = 16;
> @@ -1031,7 +1041,7 @@ static blk_status_t sd_setup_write_same10_cmnd(struct scsi_cmnd *cmd,
> u32 nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq));
> u32 data_len = sdp->sector_size;
>
> - if (!sd_set_special_bvec(rq, data_len))
> + if (!sd_set_special_bvec(cmd, data_len))
> return BLK_STS_RESOURCE;
>
> cmd->cmd_len = 10;
> @@ -2880,10 +2890,7 @@ sd_read_capacity(struct scsi_disk *sdkp, struct queue_limits *lim,
> "assuming 512.\n");
> }
>
> - if (sector_size != 512 &&
> - sector_size != 1024 &&
> - sector_size != 2048 &&
> - sector_size != 4096) {
> + if (blk_validate_block_size(sector_size)) {
> sd_printk(KERN_NOTICE, sdkp, "Unsupported sector size %d.\n",
> sector_size);
> /*
> @@ -4368,7 +4375,7 @@ static int __init init_sd(void)
> if (err)
> goto err_out;
>
> - sd_page_pool = mempool_create_page_pool(SD_MEMPOOL_SIZE, 0);
> + sd_page_pool = mempool_create_page_pool(SD_MEMPOOL_SIZE, get_order(BLK_MAX_BLOCK_SIZE));
> if (!sd_page_pool) {
> printk(KERN_ERR "sd: can't init discard page pool\n");
> err = -ENOMEM;
--
Damien Le Moal
Western Digital Research
On 12/10/25 07:22, Damien Le Moal wrote:
> On 2025/12/09 17:41, sw.prabhu6@gmail.com wrote:
>> From: Swarna Prabhu <sw.prabhu6@gmail.com>
>>
>> The WRITE SAME(16) and WRITE SAME(10) scsi commands uses
>> a page from a dedicated mempool('sd_page_pool') for its
>> payload. This pool was initialized to allocate single
>> pages, which was sufficient as long as the device sector
>> size did not exceed the PAGE_SIZE.
>>
>> Given that block layer now supports block size upto
>> 64K ie beyond PAGE_SIZE, adapt sd_set_special_bvec()
>> to accommodate that.
>>
>> With the above fix, enable sector sizes > PAGE_SIZE in
>> scsi sd driver.
>>
>> Cc: stable@vger.kernel.org
>> Signed-off-by: Swarna Prabhu <s.prabhu@samsung.com>
>> Co-developed-by: Pankaj Raghav <p.raghav@samsung.com>
>> Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
>> ---
>> Note: We are allocating pages of order aligned to
>> BLK_MAX_BLOCK_SIZE for the mempool page allocator
>> 'sd_page_pool' all the time. This is because we only
>> know that a bigger sector size device is attached at
>> sd_probe and it might be too late to reallocate mempool
>> with order >0.
>
> That is a lot heavier on the memory for the vast majority of devices which are
> 512B or 4K block size... It may be better to have the special "large block"
> mempool attached to the scsi disk struct and keep the default single page
> mempool for all other regular devices.
>
We had the same feeling as well and we mentioned it in the 1st RFC.
But when will you initialize the mempool for the large block devices? I don't think it
makes sense to unconditionally initialize it in init_sd.
Do we do it during the sd_probe() when we first encounter a large block device? That way
we may not waste any memory if no large block devices are attached.
--
Pankaj
On 2025/12/12 8:53, Pankaj Raghav wrote: >>> Cc: stable@vger.kernel.org Signed-off-by: Swarna Prabhu >>> <s.prabhu@samsung.com> Co-developed-by: Pankaj Raghav >>> <p.raghav@samsung.com> Signed-off-by: Pankaj Raghav >>> <p.raghav@samsung.com> --- Note: We are allocating pages of order >>> aligned to BLK_MAX_BLOCK_SIZE for the mempool page allocator >>> 'sd_page_pool' all the time. This is because we only know that a bigger >>> sector size device is attached at sd_probe and it might be too late to >>> reallocate mempool with order >0. >> >> That is a lot heavier on the memory for the vast majority of devices which >> are 512B or 4K block size... It may be better to have the special "large >> block" mempool attached to the scsi disk struct and keep the default >> single page mempool for all other regular devices. >> > > We had the same feeling as well and we mentioned it in the 1st RFC. > > But when will you initialize the mempool for the large block devices? I > don't think it makes sense to unconditionally initialize it in init_sd. Do > we do it during the sd_probe() when we first encounter a large block device? > That way we may not waste any memory if no large block devices are attached. That sounds reasonable to me. Any system that has a device with a large sector size will get this mempool initialized when the first such device is scanned, and systems with regular disks (the vast majority of cases for scsi) will not. You may want to be careful with that initialization in sd_probe() though: scsi device scan is asynchronous and done in parallel for multiple devices, so you will need some atomicity for checking the mempool existence and initializing it if needed. -- Damien Le Moal Western Digital Research
© 2016 - 2025 Red Hat, Inc.