[v2] swiotlb: allocate padding slots if necessary

[PATCH v2 1/2] swiotlb: extend buffer pre-padding to alloc_align_mask if necessary

Posted by Petr Tesarik 1 year, 10 months ago

From: Petr Tesarik <petr.tesarik1@huawei-partners.com>

Allow a buffer pre-padding of up to alloc_align_mask. If the allocation
alignment is bigger than IO_TLB_SIZE and min_align_mask covers any non-zero
bits in the original address between IO_TLB_SIZE and alloc_align_mask,
these bits are not preserved in the swiotlb buffer address.

To fix this case, increase the allocation size and use a larger offset
within the allocated buffer. As a result, extra padding slots may be
allocated before the mapping start address.

Set the orig_addr in these padding slots to INVALID_PHYS_ADDR, because they
do not correspond to any CPU buffer and the data must never be synced.

The padding slots should be automatically released when the buffer is
unmapped. However, swiotlb_tbl_unmap_single() takes only the address of the
DMA buffer slot, not the first padding slot. Save the number of padding
slots in struct io_tlb_slot and use it to adjust the slot index in
swiotlb_release_slots(), so all allocated slots are properly freed.

Fixes: 2fd4fa5d3fb5 ("swiotlb: Fix alignment checks when both allocation and DMA masks are present")
Link: https://lore.kernel.org/linux-iommu/20240311210507.217daf8b@meshulam.tesarici.cz/
Signed-off-by: Petr Tesarik <petr.tesarik1@huawei-partners.com>
---
 kernel/dma/swiotlb.c | 35 +++++++++++++++++++++++++++++------
 1 file changed, 29 insertions(+), 6 deletions(-)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 86fe172b5958..aefb05ff55e7 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -69,11 +69,14 @@
  * @alloc_size:	Size of the allocated buffer.
  * @list:	The free list describing the number of free entries available
  *		from each index.
+ * @pad_slots:	Number of preceding padding slots. Valid only in the first
+ *		allocated non-padding slot.
  */
 struct io_tlb_slot {
 	phys_addr_t orig_addr;
 	size_t alloc_size;
-	unsigned int list;
+	unsigned short list;
+	unsigned short pad_slots;
 };
 
 static bool swiotlb_force_bounce;
@@ -287,6 +290,7 @@ static void swiotlb_init_io_tlb_pool(struct io_tlb_pool *mem, phys_addr_t start,
 					 mem->nslabs - i);
 		mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
 		mem->slots[i].alloc_size = 0;
+		mem->slots[i].pad_slots = 0;
 	}
 
 	memset(vaddr, 0, bytes);
@@ -1328,11 +1332,12 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
 		unsigned long attrs)
 {
 	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
-	unsigned int offset = swiotlb_align_offset(dev, orig_addr);
+	unsigned int offset;
 	struct io_tlb_pool *pool;
 	unsigned int i;
 	int index;
 	phys_addr_t tlb_addr;
+	unsigned short pad_slots;
 
 	if (!mem || !mem->nslabs) {
 		dev_warn_ratelimited(dev,
@@ -1349,6 +1354,15 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
 		return (phys_addr_t)DMA_MAPPING_ERROR;
 	}
 
+	/*
+	 * Calculate buffer pre-padding within the allocated space. Use it to
+	 * preserve the low bits of the original address according to device's
+	 * min_align_mask. Limit the padding to alloc_align_mask or slot size
+	 * (whichever is bigger); higher bits of the original address are
+	 * preserved by selecting a suitable IO TLB slot.
+	 */
+	offset = orig_addr & dma_get_min_align_mask(dev) &
+		(alloc_align_mask | (IO_TLB_SIZE - 1));
 	index = swiotlb_find_slots(dev, orig_addr,
 				   alloc_size + offset, alloc_align_mask, &pool);
 	if (index == -1) {
@@ -1364,6 +1378,10 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
 	 * This is needed when we sync the memory.  Then we sync the buffer if
 	 * needed.
 	 */
+	pad_slots = offset / IO_TLB_SIZE;
+	offset %= IO_TLB_SIZE;
+	index += pad_slots;
+	pool->slots[index].pad_slots = i;
 	for (i = 0; i < nr_slots(alloc_size + offset); i++)
 		pool->slots[index + i].orig_addr = slot_addr(orig_addr, i);
 	tlb_addr = slot_addr(pool->start, index) + offset;
@@ -1385,12 +1403,16 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
 	struct io_tlb_pool *mem = swiotlb_find_pool(dev, tlb_addr);
 	unsigned long flags;
 	unsigned int offset = swiotlb_align_offset(dev, tlb_addr);
-	int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
-	int nslots = nr_slots(mem->slots[index].alloc_size + offset);
-	int aindex = index / mem->area_nslabs;
-	struct io_tlb_area *area = &mem->areas[aindex];
+	int index, nslots, aindex;
+	struct io_tlb_area *area;
 	int count, i;
 
+	index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
+	index -= mem->slots[index].pad_slots;
+	nslots = nr_slots(mem->slots[index].alloc_size + offset);
+	aindex = index / mem->area_nslabs;
+	area = &mem->areas[aindex];
+
 	/*
 	 * Return the buffer to the free list by setting the corresponding
 	 * entries to indicate the number of contiguous entries available.
@@ -1413,6 +1435,7 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
 		mem->slots[i].list = ++count;
 		mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
 		mem->slots[i].alloc_size = 0;
+		mem->slots[i].pad_slots = 0;
 	}
 
 	/*
-- 
2.34.1

Re: [PATCH v2 1/2] swiotlb: extend buffer pre-padding to alloc_align_mask if necessary

Posted by kernel test robot 1 year, 10 months ago

Hi Petr,

kernel test robot noticed the following build warnings:

[auto build test WARNING on v6.8]
[also build test WARNING on linus/master next-20240318]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Petr-Tesarik/swiotlb-extend-buffer-pre-padding-to-alloc_align_mask-if-necessary/20240318-212500
base:   v6.8
patch link:    https://lore.kernel.org/r/20240318130447.594-2-petrtesarik%40huaweicloud.com
patch subject: [PATCH v2 1/2] swiotlb: extend buffer pre-padding to alloc_align_mask if necessary
config: s390-allnoconfig (https://download.01.org/0day-ci/archive/20240319/202403191203.AtV7fvue-lkp@intel.com/config)
compiler: clang version 19.0.0git (https://github.com/llvm/llvm-project 8f68022f8e6e54d1aeae4ed301f5a015963089b7)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240319/202403191203.AtV7fvue-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202403191203.AtV7fvue-lkp@intel.com/

All warnings (new ones prefixed by >>):

   In file included from kernel/dma/swiotlb.c:27:
   In file included from include/linux/dma-direct.h:9:
   In file included from include/linux/dma-mapping.h:8:
   In file included from include/linux/device.h:32:
   In file included from include/linux/device/driver.h:21:
   In file included from include/linux/module.h:19:
   In file included from include/linux/elf.h:6:
   In file included from arch/s390/include/asm/elf.h:173:
   In file included from arch/s390/include/asm/mmu_context.h:11:
   In file included from arch/s390/include/asm/pgalloc.h:18:
   In file included from include/linux/mm.h:2188:
   include/linux/vmstat.h:522:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
     522 |         return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
         |                               ~~~~~~~~~~~ ^ ~~~
   In file included from kernel/dma/swiotlb.c:27:
   In file included from include/linux/dma-direct.h:9:
   In file included from include/linux/dma-mapping.h:11:
   In file included from include/linux/scatterlist.h:9:
   In file included from arch/s390/include/asm/io.h:78:
   include/asm-generic/io.h:547:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     547 |         val = __raw_readb(PCI_IOBASE + addr);
         |                           ~~~~~~~~~~ ^
   include/asm-generic/io.h:560:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     560 |         val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr));
         |                                                         ~~~~~~~~~~ ^
   include/uapi/linux/byteorder/big_endian.h:37:59: note: expanded from macro '__le16_to_cpu'
      37 | #define __le16_to_cpu(x) __swab16((__force __u16)(__le16)(x))
         |                                                           ^
   include/uapi/linux/swab.h:102:54: note: expanded from macro '__swab16'
     102 | #define __swab16(x) (__u16)__builtin_bswap16((__u16)(x))
         |                                                      ^
   In file included from kernel/dma/swiotlb.c:27:
   In file included from include/linux/dma-direct.h:9:
   In file included from include/linux/dma-mapping.h:11:
   In file included from include/linux/scatterlist.h:9:
   In file included from arch/s390/include/asm/io.h:78:
   include/asm-generic/io.h:573:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     573 |         val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr));
         |                                                         ~~~~~~~~~~ ^
   include/uapi/linux/byteorder/big_endian.h:35:59: note: expanded from macro '__le32_to_cpu'
      35 | #define __le32_to_cpu(x) __swab32((__force __u32)(__le32)(x))
         |                                                           ^
   include/uapi/linux/swab.h:115:54: note: expanded from macro '__swab32'
     115 | #define __swab32(x) (__u32)__builtin_bswap32((__u32)(x))
         |                                                      ^
   In file included from kernel/dma/swiotlb.c:27:
   In file included from include/linux/dma-direct.h:9:
   In file included from include/linux/dma-mapping.h:11:
   In file included from include/linux/scatterlist.h:9:
   In file included from arch/s390/include/asm/io.h:78:
   include/asm-generic/io.h:584:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     584 |         __raw_writeb(value, PCI_IOBASE + addr);
         |                             ~~~~~~~~~~ ^
   include/asm-generic/io.h:594:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     594 |         __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr);
         |                                                       ~~~~~~~~~~ ^
   include/asm-generic/io.h:604:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     604 |         __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr);
         |                                                       ~~~~~~~~~~ ^
   include/asm-generic/io.h:692:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     692 |         readsb(PCI_IOBASE + addr, buffer, count);
         |                ~~~~~~~~~~ ^
   include/asm-generic/io.h:700:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     700 |         readsw(PCI_IOBASE + addr, buffer, count);
         |                ~~~~~~~~~~ ^
   include/asm-generic/io.h:708:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     708 |         readsl(PCI_IOBASE + addr, buffer, count);
         |                ~~~~~~~~~~ ^
   include/asm-generic/io.h:717:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     717 |         writesb(PCI_IOBASE + addr, buffer, count);
         |                 ~~~~~~~~~~ ^
   include/asm-generic/io.h:726:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     726 |         writesw(PCI_IOBASE + addr, buffer, count);
         |                 ~~~~~~~~~~ ^
   include/asm-generic/io.h:735:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     735 |         writesl(PCI_IOBASE + addr, buffer, count);
         |                 ~~~~~~~~~~ ^
>> kernel/dma/swiotlb.c:1348:33: warning: variable 'i' is uninitialized when used here [-Wuninitialized]
    1348 |         pool->slots[index].pad_slots = i;
         |                                        ^
   kernel/dma/swiotlb.c:1301:16: note: initialize the variable 'i' to silence this warning
    1301 |         unsigned int i;
         |                       ^
         |                        = 0
   kernel/dma/swiotlb.c:1643:20: warning: unused function 'swiotlb_create_debugfs_files' [-Wunused-function]
    1643 | static inline void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
         |                    ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
   15 warnings generated.


vim +/i +1348 kernel/dma/swiotlb.c

  1292	
  1293	phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
  1294			size_t mapping_size, size_t alloc_size,
  1295			unsigned int alloc_align_mask, enum dma_data_direction dir,
  1296			unsigned long attrs)
  1297	{
  1298		struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
  1299		unsigned int offset;
  1300		struct io_tlb_pool *pool;
  1301		unsigned int i;
  1302		int index;
  1303		phys_addr_t tlb_addr;
  1304		unsigned short pad_slots;
  1305	
  1306		if (!mem || !mem->nslabs) {
  1307			dev_warn_ratelimited(dev,
  1308				"Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
  1309			return (phys_addr_t)DMA_MAPPING_ERROR;
  1310		}
  1311	
  1312		if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
  1313			pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
  1314	
  1315		if (mapping_size > alloc_size) {
  1316			dev_warn_once(dev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)",
  1317				      mapping_size, alloc_size);
  1318			return (phys_addr_t)DMA_MAPPING_ERROR;
  1319		}
  1320	
  1321		/*
  1322		 * Calculate buffer pre-padding within the allocated space. Use it to
  1323		 * preserve the low bits of the original address according to device's
  1324		 * min_align_mask. Limit the padding to alloc_align_mask or slot size
  1325		 * (whichever is bigger); higher bits of the original address are
  1326		 * preserved by selecting a suitable IO TLB slot.
  1327		 */
  1328		offset = orig_addr & dma_get_min_align_mask(dev) &
  1329			(alloc_align_mask | (IO_TLB_SIZE - 1));
  1330		index = swiotlb_find_slots(dev, orig_addr,
  1331					   alloc_size + offset, alloc_align_mask, &pool);
  1332		if (index == -1) {
  1333			if (!(attrs & DMA_ATTR_NO_WARN))
  1334				dev_warn_ratelimited(dev,
  1335		"swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
  1336					 alloc_size, mem->nslabs, mem_used(mem));
  1337			return (phys_addr_t)DMA_MAPPING_ERROR;
  1338		}
  1339	
  1340		/*
  1341		 * Save away the mapping from the original address to the DMA address.
  1342		 * This is needed when we sync the memory.  Then we sync the buffer if
  1343		 * needed.
  1344		 */
  1345		pad_slots = offset / IO_TLB_SIZE;
  1346		offset %= IO_TLB_SIZE;
  1347		index += pad_slots;
> 1348		pool->slots[index].pad_slots = i;
  1349		for (i = 0; i < nr_slots(alloc_size + offset); i++)
  1350			pool->slots[index + i].orig_addr = slot_addr(orig_addr, i);
  1351		tlb_addr = slot_addr(pool->start, index) + offset;
  1352		/*
  1353		 * When the device is writing memory, i.e. dir == DMA_FROM_DEVICE, copy
  1354		 * the original buffer to the TLB buffer before initiating DMA in order
  1355		 * to preserve the original's data if the device does a partial write,
  1356		 * i.e. if the device doesn't overwrite the entire buffer.  Preserving
  1357		 * the original data, even if it's garbage, is necessary to match
  1358		 * hardware behavior.  Use of swiotlb is supposed to be transparent,
  1359		 * i.e. swiotlb must not corrupt memory by clobbering unwritten bytes.
  1360		 */
  1361		swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE);
  1362		return tlb_addr;
  1363	}
  1364	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

[PATCH v2 1/2] swiotlb: extend buffer pre-padding to alloc_align_mask if necessary
[PATCH v2 2/2] bug: introduce ASSERT_VAR_CAN_HOLD()