linux-next: manual merge of the drm tree with the origin tree

Mark Brown posted 1 patch 6 days, 15 hours ago
linux-next: manual merge of the drm tree with the origin tree
Posted by Mark Brown 6 days, 15 hours ago
Hi all,

Today's linux-next merge of the drm tree got a conflict in:

  drivers/gpu/drm/xe/xe_migrate.c

between commit:

  4568dfbb8363a ("drm/xe: Make decision to use Xe2-style blitter instructions a feature flag")

from the origin tree and commits:

  09b3998429075 ("drm/xe: Make decision to use Xe2-style blitter instructions a feature flag")
  388768fe17386 ("drm/xe: Refactor emit_clear_main_copy")
  65be72013baf9 ("drm/xe: Refactor emit_clear_link_copy")

from the drm tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

diff --combined drivers/gpu/drm/xe/xe_migrate.c
index a22413f892a09,9428dd5e7760c..0000000000000
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@@ -31,6 -31,7 +31,7 @@@
  #include "xe_map.h"
  #include "xe_mem_pool.h"
  #include "xe_mocs.h"
+ #include "xe_pat.h"
  #include "xe_printk.h"
  #include "xe_pt.h"
  #include "xe_res_cursor.h"
@@@ -217,7 -218,7 +218,7 @@@ static void xe_migrate_prepare_vm(struc
  				  struct xe_vm *vm, u32 *ofs)
  {
  	struct xe_device *xe = tile_to_xe(tile);
- 	u16 pat_index = xe->pat.idx[XE_CACHE_WB];
+ 	u16 pat_index = xe_cache_pat_idx(xe, XE_CACHE_WB);
  	u8 id = tile->id;
  	u32 num_entries = NUM_PT_SLOTS, num_level = vm->pt_root[id]->level;
  #define VRAM_IDENTITY_MAP_COUNT	2
@@@ -337,7 -338,7 +338,7 @@@
  		 * if flat ccs is enabled.
  		 */
  		if (GRAPHICS_VER(xe) >= 20 && xe_device_has_flat_ccs(xe)) {
- 			u16 comp_pat_index = xe->pat.idx[XE_CACHE_NONE_COMPRESSION];
+ 			u16 comp_pat_index = xe_cache_pat_idx(xe, XE_CACHE_NONE_COMPRESSION);
  			u64 vram_offset = IDENTITY_OFFSET +
  				DIV_ROUND_UP_ULL(actual_phy_size, SZ_1G);
  			u64 pt31_ofs = xe_bo_size(bo) - XE_PAGE_SIZE;
@@@ -637,10 -638,10 +638,10 @@@ static void emit_pte(struct xe_migrate 
  
  	/* Indirect access needs compression enabled uncached PAT index */
  	if (GRAPHICS_VERx100(xe) >= 2000)
- 		pat_index = is_comp_pte ? xe->pat.idx[XE_CACHE_NONE_COMPRESSION] :
- 					  xe->pat.idx[XE_CACHE_WB];
+ 		pat_index = is_comp_pte ? xe_cache_pat_idx(xe, XE_CACHE_NONE_COMPRESSION) :
+ 					  xe_cache_pat_idx(xe, XE_CACHE_WB);
  	else
- 		pat_index = xe->pat.idx[XE_CACHE_WB];
+ 		pat_index = xe_cache_pat_idx(xe, XE_CACHE_WB);
  
  	ptes = DIV_ROUND_UP(size, XE_PAGE_SIZE);
  
@@@ -727,7 -728,22 +728,22 @@@ static void emit_copy_ccs(struct xe_gt 
  	bb->len = cs - bb->cs;
  }
  
- #define EMIT_COPY_DW 10
+ static u32 blt_fast_copy_cmd_len(struct xe_device *xe)
+ {
+ 	return 10;
+ }
+ 
+ static u32 blt_mem_copy_cmd_len(struct xe_device *xe)
+ {
+ 	return 10;
+ }
+ 
+ static u32 emit_copy_cmd_len(struct xe_device *xe)
+ {
+ 	return (xe->info.has_mem_copy_instr) ? blt_mem_copy_cmd_len(xe) :
+ 		  blt_fast_copy_cmd_len(xe);
+ }
+ 
  static void emit_xy_fast_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
  			      u64 dst_ofs, unsigned int size,
  			      unsigned int pitch)
@@@ -735,6 -751,7 +751,7 @@@
  	struct xe_device *xe = gt_to_xe(gt);
  	u32 mocs = 0;
  	u32 tile_y = 0;
+ 	u32 len;
  
  	xe_gt_assert(gt, !(pitch & 3));
  	xe_gt_assert(gt, size / pitch <= S16_MAX);
@@@ -747,7 -764,8 +764,8 @@@
  	if (GRAPHICS_VERx100(xe) >= 1250)
  		tile_y = XY_FAST_COPY_BLT_D1_SRC_TILE4 | XY_FAST_COPY_BLT_D1_DST_TILE4;
  
- 	bb->cs[bb->len++] = XY_FAST_COPY_BLT_CMD | (10 - 2);
+ 	len = blt_fast_copy_cmd_len(xe);
+ 	bb->cs[bb->len++] = XY_FAST_COPY_BLT_CMD | (len - 2);
  	bb->cs[bb->len++] = XY_FAST_COPY_BLT_DEPTH_32 | pitch | tile_y | mocs;
  	bb->cs[bb->len++] = 0;
  	bb->cs[bb->len++] = (size / pitch) << 16 | pitch / 4;
@@@ -764,6 -782,7 +782,7 @@@ static void emit_mem_copy(struct xe_gt 
  			  u64 dst_ofs, unsigned int size, unsigned int pitch)
  {
  	u32 mode, copy_type, width;
+ 	u32 len;
  
  	xe_gt_assert(gt, IS_ALIGNED(size, pitch));
  	xe_gt_assert(gt, pitch <= U16_MAX);
@@@ -789,7 -808,9 +808,9 @@@
  
  	xe_gt_assert(gt, width <= U16_MAX);
  
- 	bb->cs[bb->len++] = MEM_COPY_CMD | mode | copy_type;
+ 	len = blt_mem_copy_cmd_len(gt_to_xe(gt));
+ 
+ 	bb->cs[bb->len++] = MEM_COPY_CMD | mode | copy_type | (len - 2);
  	bb->cs[bb->len++] = width - 1;
  	bb->cs[bb->len++] = size / pitch - 1; /* ignored by hw for page-copy/linear above */
  	bb->cs[bb->len++] = pitch - 1;
@@@ -966,7 -987,7 +987,7 @@@ static struct dma_fence *__xe_migrate_c
  		}
  
  		/* Add copy commands size here */
- 		batch_size += ((copy_only_ccs) ? 0 : EMIT_COPY_DW) +
+ 		batch_size += ((copy_only_ccs) ? 0 : emit_copy_cmd_len(xe)) +
  			((needs_ccs_emit ? EMIT_COPY_CCS_DW : 0));
  
  		bb = xe_bb_new(gt, batch_size, usm);
@@@ -1405,7 -1426,7 +1426,7 @@@ struct dma_fence *xe_migrate_vram_copy_
  
  		batch_size += pte_update_size(m, 0, sysmem, &sysmem_it, &vram_L0, &sysmem_L0_ofs,
  					      &sysmem_L0_pt, 0, avail_pts, avail_pts);
- 		batch_size += EMIT_COPY_DW;
+ 		batch_size += emit_copy_cmd_len(xe);
  
  		bb = xe_bb_new(gt, batch_size, usm);
  		if (IS_ERR(bb)) {
@@@ -1460,12 -1481,17 +1481,17 @@@
  	return fence;
  }
  
+ static u32 blt_mem_set_cmd_len(struct xe_device *xe)
+ {
+ 	return 7;
+ }
+ 
  static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
  				 u32 size, u32 pitch)
  {
  	struct xe_device *xe = gt_to_xe(gt);
  	u32 *cs = bb->cs + bb->len;
- 	u32 len = PVC_MEM_SET_CMD_LEN_DW;
+ 	u32 len = blt_mem_set_cmd_len(xe);
  
  	*cs++ = PVC_MEM_SET_CMD | PVC_MEM_SET_MATRIX | (len - 2);
  	*cs++ = pitch - 1;
@@@ -1483,15 -1509,21 +1509,21 @@@
  	bb->len += len;
  }
  
+ static u32 blt_fast_color_cmd_len(struct xe_device *xe)
+ {
+ 	if (GRAPHICS_VERx100(xe) >= 1250)
+ 		return 16;
+ 	else
+ 		return 11;
+ }
+ 
  static void emit_clear_main_copy(struct xe_gt *gt, struct xe_bb *bb,
  				 u64 src_ofs, u32 size, u32 pitch, bool is_vram)
  {
  	struct xe_device *xe = gt_to_xe(gt);
  	u32 *cs = bb->cs + bb->len;
- 	u32 len = XY_FAST_COLOR_BLT_DW;
+ 	u32 len = blt_fast_color_cmd_len(xe);
  
- 	if (GRAPHICS_VERx100(xe) < 1250)
- 		len = 11;
  
  	*cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 |
  		(len - 2);
@@@ -1526,10 -1558,12 +1558,12 @@@
  
  static u32 emit_clear_cmd_len(struct xe_gt *gt)
  {
+ 	struct xe_device *xe = gt_to_xe(gt);
+ 
  	if (gt->info.has_xe2_blt_instructions)
- 		return PVC_MEM_SET_CMD_LEN_DW;
+ 		return blt_mem_set_cmd_len(xe);
  	else
- 		return XY_FAST_COLOR_BLT_DW;
+ 		return blt_fast_color_cmd_len(xe);
  }
  
  static void emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
@@@ -1862,7 -1896,7 +1896,7 @@@ __xe_migrate_update_pgtables(struct xe_
  
  	/* For sysmem PTE's, need to map them in our hole.. */
  	if (!IS_DGFX(xe)) {
- 		u16 pat_index = xe->pat.idx[XE_CACHE_WB];
+ 		u16 pat_index = xe_cache_pat_idx(xe, XE_CACHE_WB);
  		u32 ptes, ofs;
  
  		ppgtt_ofs = NUM_KERNEL_PDE - 1;
@@@ -2084,7 -2118,7 +2118,7 @@@ static void build_pt_update_batch_sram(
  				       struct drm_pagemap_addr *sram_addr,
  				       u32 size, int level)
  {
- 	u16 pat_index = tile_to_xe(m->tile)->pat.idx[XE_CACHE_WB];
+ 	u16 pat_index = xe_cache_pat_idx(tile_to_xe(m->tile), XE_CACHE_WB);
  	u64 gpu_page_size = 0x1ull << xe_pt_shift(level);
  	u32 ptes;
  	int i = 0;
@@@ -2202,7 -2236,7 +2236,7 @@@ static struct dma_fence *xe_migrate_vra
  	xe_assert(xe, npages * PAGE_SIZE <= MAX_PREEMPTDISABLE_TRANSFER);
  
  	batch_size += pte_update_cmd_size(npages << PAGE_SHIFT);
- 	batch_size += EMIT_COPY_DW;
+ 	batch_size += emit_copy_cmd_len(xe);
  
  	bb = xe_bb_new(gt, batch_size, use_usm_batch);
  	if (IS_ERR(bb)) {