diff --git a/Documentation/ABI/testing/sysfs-driver-intel-xe-sriov b/Documentation/ABI/testing/sysfs-driver-intel-xe-sriov index 7f5ef9eada53..1d6eaff6882f 100644 --- a/Documentation/ABI/testing/sysfs-driver-intel-xe-sriov +++ b/Documentation/ABI/testing/sysfs-driver-intel-xe-sriov @@ -129,6 +129,37 @@ Description: -EIO if FW refuses to change the provisioning. +What: /sys/bus/pci/drivers/xe/.../sriov_admin/.bulk_profile/vram_quota +What: /sys/bus/pci/drivers/xe/.../sriov_admin/vf/profile/vram_quota +Date: February 2026 +KernelVersion: 7.0 +Contact: intel-xe@lists.freedesktop.org +Description: + These files allow to perform initial VFs VRAM provisioning prior to VFs + enabling or to change VFs VRAM provisioning once the VFs are enabled. + Any non-zero initial VRAM provisioning will block VFs auto-provisioning. + Without initial VRAM provisioning those files will show result of the + VRAM auto-provisioning performed by the PF once the VFs are enabled. + Once the VFs are disabled, all VRAM provisioning will be released. + These files are visible only on discrete Intel Xe platforms with VRAM + and are writeable only if dynamic VFs VRAM provisioning is supported. + + .bulk_profile/vram_quota: (WO) unsigned integer + The amount of the provisioned VRAM in [bytes] for each VF. + Actual quota value might be aligned per HW/FW requirements. + + profile/vram_quota: (RW) unsigned integer + The amount of the provisioned VRAM in [bytes] for this VF. + Actual quota value might be aligned per HW/FW requirements. + + Default is 0 (unprovisioned). + + Writes to these attributes may fail with errors like: + -EINVAL if provided input is malformed or not recognized, + -EPERM if change is not applicable on given HW/FW, + -EIO if FW refuses to change the provisioning. + + What: /sys/bus/pci/drivers/xe/.../sriov_admin/vf/stop Date: October 2025 KernelVersion: 6.19 diff --git a/Documentation/gpu/xe/xe_firmware.rst b/Documentation/gpu/xe/xe_firmware.rst index 5d23e9f27391..9c15a300bc62 100644 --- a/Documentation/gpu/xe/xe_firmware.rst +++ b/Documentation/gpu/xe/xe_firmware.rst @@ -31,6 +31,9 @@ GuC Power Conservation (PC) .. kernel-doc:: drivers/gpu/drm/xe/xe_guc_pc.c :doc: GuC Power Conservation (PC) +.. kernel-doc:: drivers/gpu/drm/xe/xe_guc_rc.c + :doc: GuC Render C-states (GuC RC) + PCIe Gen5 Limitations ===================== diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c index 9ef9e52c0547..35dd07297dd0 100644 --- a/drivers/gpu/drm/drm_gpusvm.c +++ b/drivers/gpu/drm/drm_gpusvm.c @@ -819,7 +819,7 @@ retry: if (!(pfns[i] & HMM_PFN_VALID)) { state = DRM_GPUSVM_SCAN_UNPOPULATED; - goto err_free; + break; } page = hmm_pfn_to_page(pfns[i]); @@ -856,9 +856,9 @@ retry: i += 1ul << drm_gpusvm_hmm_pfn_to_order(pfns[i], i, npages); } -err_free: drm_gpusvm_notifier_unlock(range->gpusvm); +err_free: kvfree(pfns); return state; } @@ -1495,7 +1495,7 @@ map_pages: } zdd = page->zone_device_data; if (pagemap != page_pgmap(page)) { - if (i > 0) { + if (pagemap) { err = -EOPNOTSUPP; goto err_unmap; } @@ -1572,6 +1572,7 @@ set_seqno: return 0; err_unmap: + svm_pages->flags.has_dma_mapping = true; __drm_gpusvm_unmap_pages(gpusvm, svm_pages, num_dma_mapped); drm_gpusvm_notifier_unlock(gpusvm); err_free: diff --git a/drivers/gpu/drm/drm_pagemap.c b/drivers/gpu/drm/drm_pagemap.c index bdc79140875c..862675ac5bb2 100644 --- a/drivers/gpu/drm/drm_pagemap.c +++ b/drivers/gpu/drm/drm_pagemap.c @@ -480,18 +480,8 @@ int drm_pagemap_migrate_to_devmem(struct drm_pagemap_devmem *devmem_allocation, .start = start, .end = end, .pgmap_owner = pagemap->owner, - /* - * FIXME: MIGRATE_VMA_SELECT_DEVICE_PRIVATE intermittently - * causes 'xe_exec_system_allocator --r *race*no*' to trigger aa - * engine reset and a hard hang due to getting stuck on a folio - * lock. This should work and needs to be root-caused. The only - * downside of not selecting MIGRATE_VMA_SELECT_DEVICE_PRIVATE - * is that device-to-device migrations won’t work; instead, - * memory will bounce through system memory. This path should be - * rare and only occur when the madvise attributes of memory are - * changed or atomics are being used. - */ - .flags = MIGRATE_VMA_SELECT_SYSTEM | MIGRATE_VMA_SELECT_DEVICE_COHERENT, + .flags = MIGRATE_VMA_SELECT_SYSTEM | MIGRATE_VMA_SELECT_DEVICE_COHERENT | + MIGRATE_VMA_SELECT_DEVICE_PRIVATE, }; unsigned long i, npages = npages_in_range(start, end); unsigned long own_pages = 0, migrated_pages = 0; diff --git a/drivers/gpu/drm/drm_suballoc.c b/drivers/gpu/drm/drm_suballoc.c index e44ad39e310c..b74277bbc14b 100644 --- a/drivers/gpu/drm/drm_suballoc.c +++ b/drivers/gpu/drm/drm_suballoc.c @@ -293,45 +293,66 @@ static bool drm_suballoc_next_hole(struct drm_suballoc_manager *sa_manager, } /** - * drm_suballoc_new() - Make a suballocation. + * drm_suballoc_alloc() - Allocate uninitialized suballoc object. + * @gfp: gfp flags used for memory allocation. + * + * Allocate memory for an uninitialized suballoc object. Intended usage is + * allocate memory for suballoc object outside of a reclaim tainted context + * and then be initialized at a later time in a reclaim tainted context. + * + * @drm_suballoc_free() should be used to release the memory if returned + * suballoc object is in uninitialized state. + * + * Return: a new uninitialized suballoc object, or an ERR_PTR(-ENOMEM). + */ +struct drm_suballoc *drm_suballoc_alloc(gfp_t gfp) +{ + struct drm_suballoc *sa; + + sa = kmalloc_obj(*sa, gfp); + if (!sa) + return ERR_PTR(-ENOMEM); + + sa->manager = NULL; + + return sa; +} +EXPORT_SYMBOL(drm_suballoc_alloc); + +/** + * drm_suballoc_insert() - Initialize a suballocation and insert a hole. * @sa_manager: pointer to the sa_manager + * @sa: The struct drm_suballoc. * @size: number of bytes we want to suballocate. - * @gfp: gfp flags used for memory allocation. Typically GFP_KERNEL but - * the argument is provided for suballocations from reclaim context or - * where the caller wants to avoid pipelining rather than wait for - * reclaim. * @intr: Whether to perform waits interruptible. This should typically * always be true, unless the caller needs to propagate a * non-interruptible context from above layers. * @align: Alignment. Must not exceed the default manager alignment. * If @align is zero, then the manager alignment is used. * - * Try to make a suballocation of size @size, which will be rounded - * up to the alignment specified in specified in drm_suballoc_manager_init(). + * Try to make a suballocation on a pre-allocated suballoc object of size @size, + * which will be rounded up to the alignment specified in specified in + * drm_suballoc_manager_init(). * - * Return: a new suballocated bo, or an ERR_PTR. + * Return: zero on success, errno on failure. */ -struct drm_suballoc * -drm_suballoc_new(struct drm_suballoc_manager *sa_manager, size_t size, - gfp_t gfp, bool intr, size_t align) +int drm_suballoc_insert(struct drm_suballoc_manager *sa_manager, + struct drm_suballoc *sa, size_t size, + bool intr, size_t align) { struct dma_fence *fences[DRM_SUBALLOC_MAX_QUEUES]; unsigned int tries[DRM_SUBALLOC_MAX_QUEUES]; unsigned int count; int i, r; - struct drm_suballoc *sa; if (WARN_ON_ONCE(align > sa_manager->align)) - return ERR_PTR(-EINVAL); + return -EINVAL; if (WARN_ON_ONCE(size > sa_manager->size || !size)) - return ERR_PTR(-EINVAL); + return -EINVAL; if (!align) align = sa_manager->align; - sa = kmalloc_obj(*sa, gfp); - if (!sa) - return ERR_PTR(-ENOMEM); sa->manager = sa_manager; sa->fence = NULL; INIT_LIST_HEAD(&sa->olist); @@ -348,7 +369,7 @@ drm_suballoc_new(struct drm_suballoc_manager *sa_manager, size_t size, if (drm_suballoc_try_alloc(sa_manager, sa, size, align)) { spin_unlock(&sa_manager->wq.lock); - return sa; + return 0; } /* see if we can skip over some allocations */ @@ -385,8 +406,48 @@ drm_suballoc_new(struct drm_suballoc_manager *sa_manager, size_t size, } while (!r); spin_unlock(&sa_manager->wq.lock); - kfree(sa); - return ERR_PTR(r); + sa->manager = NULL; + return r; +} +EXPORT_SYMBOL(drm_suballoc_insert); + +/** + * drm_suballoc_new() - Make a suballocation. + * @sa_manager: pointer to the sa_manager + * @size: number of bytes we want to suballocate. + * @gfp: gfp flags used for memory allocation. Typically GFP_KERNEL but + * the argument is provided for suballocations from reclaim context or + * where the caller wants to avoid pipelining rather than wait for + * reclaim. + * @intr: Whether to perform waits interruptible. This should typically + * always be true, unless the caller needs to propagate a + * non-interruptible context from above layers. + * @align: Alignment. Must not exceed the default manager alignment. + * If @align is zero, then the manager alignment is used. + * + * Try to make a suballocation of size @size, which will be rounded + * up to the alignment specified in specified in drm_suballoc_manager_init(). + * + * Return: a new suballocated bo, or an ERR_PTR. + */ +struct drm_suballoc * +drm_suballoc_new(struct drm_suballoc_manager *sa_manager, size_t size, + gfp_t gfp, bool intr, size_t align) +{ + struct drm_suballoc *sa; + int err; + + sa = drm_suballoc_alloc(gfp); + if (IS_ERR(sa)) + return sa; + + err = drm_suballoc_insert(sa_manager, sa, size, intr, align); + if (err) { + drm_suballoc_free(sa, NULL); + return ERR_PTR(err); + } + + return sa; } EXPORT_SYMBOL(drm_suballoc_new); @@ -405,6 +466,11 @@ void drm_suballoc_free(struct drm_suballoc *suballoc, if (!suballoc) return; + if (!suballoc->manager) { + kfree(suballoc); + return; + } + sa_manager = suballoc->manager; spin_lock(&sa_manager->wq.lock); diff --git a/drivers/gpu/drm/i915/display/intel_display_device.c b/drivers/gpu/drm/i915/display/intel_display_device.c index 01e0ae01fc50..c32d65727642 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.c +++ b/drivers/gpu/drm/i915/display/intel_display_device.c @@ -1500,6 +1500,7 @@ static const struct { INTEL_PTL_IDS(INTEL_DISPLAY_DEVICE, &ptl_desc), INTEL_WCL_IDS(INTEL_DISPLAY_DEVICE, &ptl_desc), INTEL_NVLS_IDS(INTEL_DISPLAY_DEVICE, &nvl_desc), + INTEL_NVLP_IDS(INTEL_DISPLAY_DEVICE, &nvl_desc), }; static const struct { diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index b39cbb756232..2062602c1bf3 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -74,6 +74,7 @@ xe-y += xe_bb.o \ xe_guc_log.o \ xe_guc_pagefault.o \ xe_guc_pc.o \ + xe_guc_rc.o \ xe_guc_submit.o \ xe_guc_tlb_inval.o \ xe_heci_gsc.o \ diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index d2c4e94180fa..e1d29b6ba043 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -256,7 +256,7 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, size = intel_rotation_info_size(&view->rotated) * XE_PAGE_SIZE; pte = xe_ggtt_encode_pte_flags(ggtt, bo, xe->pat.idx[XE_CACHE_NONE]); - vma->node = xe_ggtt_node_insert_transform(ggtt, bo, pte, + vma->node = xe_ggtt_insert_node_transform(ggtt, bo, pte, ALIGN(size, align), align, view->type == I915_GTT_VIEW_NORMAL ? NULL : write_ggtt_rotated_node, @@ -352,8 +352,7 @@ static void __xe_unpin_fb_vma(struct i915_vma *vma) if (vma->dpt) xe_bo_unpin_map_no_vm(vma->dpt); - else if (!xe_ggtt_node_allocated(vma->bo->ggtt_node[tile_id]) || - vma->bo->ggtt_node[tile_id] != vma->node) + else if (vma->bo->ggtt_node[tile_id] != vma->node) xe_ggtt_node_remove(vma->node, false); ttm_bo_reserve(&vma->bo->ttm, false, false, NULL); diff --git a/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h b/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h index 457881af8af9..4d854c85e588 100644 --- a/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h +++ b/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h @@ -55,6 +55,7 @@ #define PIPELINE_SELECT GFXPIPE_SINGLE_DW_CMD(0x1, 0x4) #define CMD_3DSTATE_DRAWING_RECTANGLE_FAST GFXPIPE_3D_CMD(0x0, 0x0) +#define CMD_3DSTATE_CUSTOM_SAMPLE_PATTERN GFXPIPE_3D_CMD(0x0, 0x2) #define CMD_3DSTATE_CLEAR_PARAMS GFXPIPE_3D_CMD(0x0, 0x4) #define CMD_3DSTATE_DEPTH_BUFFER GFXPIPE_3D_CMD(0x0, 0x5) #define CMD_3DSTATE_STENCIL_BUFFER GFXPIPE_3D_CMD(0x0, 0x6) @@ -138,8 +139,16 @@ #define CMD_3DSTATE_SBE_MESH GFXPIPE_3D_CMD(0x0, 0x82) #define CMD_3DSTATE_CPSIZE_CONTROL_BUFFER GFXPIPE_3D_CMD(0x0, 0x83) #define CMD_3DSTATE_COARSE_PIXEL GFXPIPE_3D_CMD(0x0, 0x89) +#define CMD_3DSTATE_MESH_SHADER_DATA_EXT GFXPIPE_3D_CMD(0x0, 0x8A) +#define CMD_3DSTATE_TASK_SHADER_DATA_EXT GFXPIPE_3D_CMD(0x0, 0x8B) +#define CMD_3DSTATE_VIEWPORT_STATE_POINTERS_CC_2 GFXPIPE_3D_CMD(0x0, 0x8D) +#define CMD_3DSTATE_CC_STATE_POINTERS_2 GFXPIPE_3D_CMD(0x0, 0x8E) +#define CMD_3DSTATE_SCISSOR_STATE_POINTERS_2 GFXPIPE_3D_CMD(0x0, 0x8F) +#define CMD_3DSTATE_BLEND_STATE_POINTERS_2 GFXPIPE_3D_CMD(0x0, 0xA0) +#define CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_2 GFXPIPE_3D_CMD(0x0, 0xA1) #define CMD_3DSTATE_DRAWING_RECTANGLE GFXPIPE_3D_CMD(0x1, 0x0) +#define CMD_3DSTATE_URB_MEMORY GFXPIPE_3D_CMD(0x1, 0x1) #define CMD_3DSTATE_CHROMA_KEY GFXPIPE_3D_CMD(0x1, 0x4) #define CMD_3DSTATE_POLY_STIPPLE_OFFSET GFXPIPE_3D_CMD(0x1, 0x6) #define CMD_3DSTATE_POLY_STIPPLE_PATTERN GFXPIPE_3D_CMD(0x1, 0x7) @@ -160,5 +169,6 @@ #define CMD_3DSTATE_SUBSLICE_HASH_TABLE GFXPIPE_3D_CMD(0x1, 0x1F) #define CMD_3DSTATE_SLICE_TABLE_STATE_POINTERS GFXPIPE_3D_CMD(0x1, 0x20) #define CMD_3DSTATE_PTBR_TILE_PASS_INFO GFXPIPE_3D_CMD(0x1, 0x22) +#define CMD_3DSTATE_SLICE_TABLE_STATE_POINTER_2 GFXPIPE_3D_CMD(0x1, 0xA0) #endif diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 24fc64fc832e..66ddad767ad4 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -58,7 +58,7 @@ #define MCR_SLICE(slice) REG_FIELD_PREP(MCR_SLICE_MASK, slice) #define MCR_SUBSLICE_MASK REG_GENMASK(26, 24) #define MCR_SUBSLICE(subslice) REG_FIELD_PREP(MCR_SUBSLICE_MASK, subslice) -#define MTL_MCR_GROUPID REG_GENMASK(11, 8) +#define MTL_MCR_GROUPID REG_GENMASK(12, 8) #define MTL_MCR_INSTANCEID REG_GENMASK(3, 0) #define PS_INVOCATION_COUNT XE_REG(0x2348) @@ -100,6 +100,9 @@ #define VE1_AUX_INV XE_REG(0x42b8) #define AUX_INV REG_BIT(0) +#define GAMSTLB_CTRL2 XE_REG_MCR(0x4788) +#define STLB_SINGLE_BANK_MODE REG_BIT(11) + #define XE2_LMEM_CFG XE_REG(0x48b0) #define XE2_GAMWALK_CTRL 0x47e4 @@ -107,6 +110,9 @@ #define XE2_GAMWALK_CTRL_3D XE_REG_MCR(XE2_GAMWALK_CTRL) #define EN_CMP_1WCOH_GW REG_BIT(14) +#define MMIOATSREQLIMIT_GAM_WALK_3D XE_REG_MCR(0x47f8) +#define DIS_ATS_WRONLY_PG REG_BIT(18) + #define XEHP_FLAT_CCS_BASE_ADDR XE_REG_MCR(0x4910) #define XEHP_FLAT_CCS_PTR REG_GENMASK(31, 8) @@ -125,6 +131,7 @@ #define VS_HIT_MAX_VALUE_MASK REG_GENMASK(25, 20) #define DIS_MESH_PARTIAL_AUTOSTRIP REG_BIT(16) #define DIS_MESH_AUTOSTRIP REG_BIT(15) +#define DIS_TE_PATCH_CTRL REG_BIT(4) #define VFLSKPD XE_REG_MCR(0x62a8, XE_REG_OPTION_MASKED) #define DIS_PARTIAL_AUTOSTRIP REG_BIT(9) @@ -169,6 +176,7 @@ #define COMMON_SLICE_CHICKEN4 XE_REG(0x7300, XE_REG_OPTION_MASKED) #define SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE REG_BIT(12) #define DISABLE_TDC_LOAD_BALANCING_CALC REG_BIT(6) +#define HW_FILTERING REG_BIT(5) #define COMMON_SLICE_CHICKEN3 XE_REG(0x7304, XE_REG_OPTION_MASKED) #define XEHP_COMMON_SLICE_CHICKEN3 XE_REG_MCR(0x7304, XE_REG_OPTION_MASKED) @@ -210,6 +218,9 @@ #define GSCPSMI_BASE XE_REG(0x880c) +#define CCCHKNREG2 XE_REG_MCR(0x881c) +#define LOCALITYDIS REG_BIT(7) + #define CCCHKNREG1 XE_REG_MCR(0x8828) #define L3CMPCTRL REG_BIT(23) #define ENCOMPPERFFIX REG_BIT(18) @@ -253,6 +264,8 @@ #define XE2_GT_COMPUTE_DSS_2 XE_REG(0x914c) #define XE2_GT_GEOMETRY_DSS_1 XE_REG(0x9150) #define XE2_GT_GEOMETRY_DSS_2 XE_REG(0x9154) +#define XE3P_XPC_GT_GEOMETRY_DSS_3 XE_REG(0x915c) +#define XE3P_XPC_GT_COMPUTE_DSS_3 XE_REG(0x9160) #define SERVICE_COPY_ENABLE XE_REG(0x9170) #define FUSE_SERVICE_COPY_ENABLE_MASK REG_GENMASK(7, 0) @@ -367,6 +380,7 @@ #define FORCEWAKE_RENDER XE_REG(0xa278) #define POWERGATE_DOMAIN_STATUS XE_REG(0xa2a0) +#define GSC_AWAKE_STATUS REG_BIT(8) #define MEDIA_SLICE3_AWAKE_STATUS REG_BIT(4) #define MEDIA_SLICE2_AWAKE_STATUS REG_BIT(3) #define MEDIA_SLICE1_AWAKE_STATUS REG_BIT(2) @@ -420,6 +434,8 @@ #define LSN_DIM_Z_WGT(value) REG_FIELD_PREP(LSN_DIM_Z_WGT_MASK, value) #define L3SQCREG2 XE_REG_MCR(0xb104) +#define L3_SQ_DISABLE_COAMA_2WAY_COH REG_BIT(30) +#define L3_SQ_DISABLE_COAMA REG_BIT(22) #define COMPMEMRD256BOVRFETCHEN REG_BIT(20) #define L3SQCREG3 XE_REG_MCR(0xb108) @@ -459,6 +475,8 @@ #define FORCE_MISS_FTLB REG_BIT(3) #define XEHP_GAMSTLB_CTRL XE_REG_MCR(0xcf4c) +#define BANK_HASH_MODE REG_GENMASK(27, 26) +#define BANK_HASH_4KB_MODE REG_FIELD_PREP(BANK_HASH_MODE, 0x3) #define CONTROL_BLOCK_CLKGATE_DIS REG_BIT(12) #define EGRESS_BLOCK_CLKGATE_DIS REG_BIT(11) #define TAG_BLOCK_CLKGATE_DIS REG_BIT(7) @@ -550,11 +568,16 @@ #define UGM_FRAGMENT_THRESHOLD_TO_3 REG_BIT(58 - 32) #define DIS_CHAIN_2XSIMD8 REG_BIT(55 - 32) #define XE2_ALLOC_DPA_STARVE_FIX_DIS REG_BIT(47 - 32) +#define SAMPLER_LD_LSC_DISABLE REG_BIT(45 - 32) #define ENABLE_SMP_LD_RENDER_SURFACE_CONTROL REG_BIT(44 - 32) #define FORCE_SLM_FENCE_SCOPE_TO_TILE REG_BIT(42 - 32) #define FORCE_UGM_FENCE_SCOPE_TO_TILE REG_BIT(41 - 32) #define MAXREQS_PER_BANK REG_GENMASK(39 - 32, 37 - 32) #define DISABLE_128B_EVICTION_COMMAND_UDW REG_BIT(36 - 32) +#define LSCFE_SAME_ADDRESS_ATOMICS_COALESCING_DISABLE REG_BIT(35 - 32) + +#define ROW_CHICKEN5 XE_REG_MCR(0xe7f0) +#define CPSS_AWARE_DIS REG_BIT(3) #define SARB_CHICKEN1 XE_REG_MCR(0xe90c) #define COMP_CKN_IN REG_GENMASK(30, 29) diff --git a/drivers/gpu/drm/xe/regs/xe_guc_regs.h b/drivers/gpu/drm/xe/regs/xe_guc_regs.h index 87984713dd12..5faac8316b66 100644 --- a/drivers/gpu/drm/xe/regs/xe_guc_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_guc_regs.h @@ -40,6 +40,9 @@ #define GS_BOOTROM_JUMP_PASSED REG_FIELD_PREP(GS_BOOTROM_MASK, 0x76) #define GS_MIA_IN_RESET REG_BIT(0) +#define BOOT_HASH_CHK XE_REG(0xc010) +#define GUC_BOOT_UKERNEL_VALID REG_BIT(31) + #define GUC_HEADER_INFO XE_REG(0xc014) #define GUC_WOPCM_SIZE XE_REG(0xc050) @@ -83,7 +86,12 @@ #define GUC_WOPCM_OFFSET_MASK REG_GENMASK(31, GUC_WOPCM_OFFSET_SHIFT) #define HUC_LOADING_AGENT_GUC REG_BIT(1) #define GUC_WOPCM_OFFSET_VALID REG_BIT(0) + +#define GUC_SRAM_STATUS XE_REG(0xc398) +#define GUC_SRAM_HANDLING_MASK REG_GENMASK(8, 7) + #define GUC_MAX_IDLE_COUNT XE_REG(0xc3e4) +#define GUC_IDLE_FLOW_DISABLE REG_BIT(31) #define GUC_PMTIMESTAMP_LO XE_REG(0xc3e8) #define GUC_PMTIMESTAMP_HI XE_REG(0xc3ec) diff --git a/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_config_kunit.c b/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_config_kunit.c index 42bfc4bcfbcf..305dbd4e5d1a 100644 --- a/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_config_kunit.c +++ b/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_config_kunit.c @@ -11,14 +11,26 @@ #include "xe_pci_test.h" #define TEST_MAX_VFS 63 +#define TEST_VRAM 0x37a800000ull static void pf_set_admin_mode(struct xe_device *xe, bool enable) { /* should match logic of xe_sriov_pf_admin_only() */ - xe->info.probe_display = !enable; + xe->sriov.pf.admin_only = enable; KUNIT_EXPECT_EQ(kunit_get_current_test(), enable, xe_sriov_pf_admin_only(xe)); } +static void pf_set_usable_vram(struct xe_device *xe, u64 usable) +{ + struct xe_tile *tile = xe_device_get_root_tile(xe); + struct kunit *test = kunit_get_current_test(); + + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, tile); + xe->mem.vram->usable_size = usable; + tile->mem.vram->usable_size = usable; + KUNIT_ASSERT_EQ(test, usable, xe_vram_region_usable_size(tile->mem.vram)); +} + static const void *num_vfs_gen_param(struct kunit *test, const void *prev, char *desc) { unsigned long next = 1 + (unsigned long)prev; @@ -34,9 +46,11 @@ static int pf_gt_config_test_init(struct kunit *test) { struct xe_pci_fake_data fake = { .sriov_mode = XE_SRIOV_MODE_PF, - .platform = XE_TIGERLAKE, /* any random platform with SR-IOV */ + .platform = XE_BATTLEMAGE, /* any random DGFX platform with SR-IOV */ .subplatform = XE_SUBPLATFORM_NONE, + .graphics_verx100 = 2001, }; + struct xe_vram_region *vram; struct xe_device *xe; struct xe_gt *gt; @@ -50,6 +64,19 @@ static int pf_gt_config_test_init(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gt); test->priv = gt; + /* pretend it has some VRAM */ + KUNIT_ASSERT_TRUE(test, IS_DGFX(xe)); + vram = kunit_kzalloc(test, sizeof(*vram), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, vram); + vram->usable_size = TEST_VRAM; + xe->mem.vram = vram; + xe->tiles[0].mem.vram = vram; + + /* pretend we have a valid LMTT */ + KUNIT_ASSERT_TRUE(test, xe_device_has_lmtt(xe)); + KUNIT_ASSERT_GE(test, GRAPHICS_VERx100(xe), 1260); + xe->tiles[0].sriov.pf.lmtt.ops = &lmtt_ml_ops; + /* pretend it can support up to 63 VFs */ xe->sriov.pf.device_total_vfs = TEST_MAX_VFS; xe->sriov.pf.driver_max_vfs = TEST_MAX_VFS; @@ -189,13 +216,80 @@ static void fair_ggtt(struct kunit *test) KUNIT_ASSERT_EQ(test, SZ_2G, pf_profile_fair_ggtt(gt, num_vfs)); } +static const u64 vram_sizes[] = { + SZ_4G - SZ_512M, + SZ_8G + SZ_4G - SZ_512M, + SZ_16G - SZ_512M, + SZ_32G - SZ_512M, + SZ_64G - SZ_512M, + TEST_VRAM, +}; + +static void u64_param_get_desc(const u64 *p, char *desc) +{ + string_get_size(*p, 1, STRING_UNITS_2, desc, KUNIT_PARAM_DESC_SIZE); +} + +KUNIT_ARRAY_PARAM(vram_size, vram_sizes, u64_param_get_desc); + +static void fair_vram_1vf(struct kunit *test) +{ + const u64 usable = *(const u64 *)test->param_value; + struct xe_gt *gt = test->priv; + struct xe_device *xe = gt_to_xe(gt); + + pf_set_admin_mode(xe, false); + pf_set_usable_vram(xe, usable); + + KUNIT_EXPECT_NE(test, 0, pf_profile_fair_lmem(gt, 1)); + KUNIT_EXPECT_GE(test, usable, pf_profile_fair_lmem(gt, 1)); + KUNIT_EXPECT_TRUE(test, is_power_of_2(pf_profile_fair_lmem(gt, 1))); + KUNIT_EXPECT_GE(test, usable - pf_profile_fair_lmem(gt, 1), pf_profile_fair_lmem(gt, 1)); +} + +static void fair_vram_1vf_admin_only(struct kunit *test) +{ + const u64 usable = *(const u64 *)test->param_value; + struct xe_gt *gt = test->priv; + struct xe_device *xe = gt_to_xe(gt); + + pf_set_admin_mode(xe, true); + pf_set_usable_vram(xe, usable); + + KUNIT_EXPECT_NE(test, 0, pf_profile_fair_lmem(gt, 1)); + KUNIT_EXPECT_GE(test, usable, pf_profile_fair_lmem(gt, 1)); + KUNIT_EXPECT_LT(test, usable - pf_profile_fair_lmem(gt, 1), pf_profile_fair_lmem(gt, 1)); + KUNIT_EXPECT_TRUE(test, IS_ALIGNED(pf_profile_fair_lmem(gt, 1), SZ_1G)); +} + +static void fair_vram(struct kunit *test) +{ + unsigned int num_vfs = (unsigned long)test->param_value; + struct xe_gt *gt = test->priv; + struct xe_device *xe = gt_to_xe(gt); + u64 alignment = pf_get_lmem_alignment(gt); + char size[10]; + + pf_set_admin_mode(xe, false); + + string_get_size(pf_profile_fair_lmem(gt, num_vfs), 1, STRING_UNITS_2, size, sizeof(size)); + kunit_info(test, "fair %s %llx\n", size, pf_profile_fair_lmem(gt, num_vfs)); + + KUNIT_EXPECT_TRUE(test, is_power_of_2(pf_profile_fair_lmem(gt, num_vfs))); + KUNIT_EXPECT_TRUE(test, IS_ALIGNED(pf_profile_fair_lmem(gt, num_vfs), alignment)); + KUNIT_EXPECT_GE(test, TEST_VRAM, num_vfs * pf_profile_fair_lmem(gt, num_vfs)); +} + static struct kunit_case pf_gt_config_test_cases[] = { KUNIT_CASE(fair_contexts_1vf), KUNIT_CASE(fair_doorbells_1vf), KUNIT_CASE(fair_ggtt_1vf), + KUNIT_CASE_PARAM(fair_vram_1vf, vram_size_gen_params), + KUNIT_CASE_PARAM(fair_vram_1vf_admin_only, vram_size_gen_params), KUNIT_CASE_PARAM(fair_contexts, num_vfs_gen_param), KUNIT_CASE_PARAM(fair_doorbells, num_vfs_gen_param), KUNIT_CASE_PARAM(fair_ggtt, num_vfs_gen_param), + KUNIT_CASE_PARAM(fair_vram, num_vfs_gen_param), {} }; diff --git a/drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c b/drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c index acddbedcf17c..51e1e04001ac 100644 --- a/drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c +++ b/drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c @@ -38,12 +38,8 @@ static struct xe_bo *replacement_xe_managed_bo_create_pin_map(struct xe_device * if (flags & XE_BO_FLAG_GGTT) { struct xe_ggtt *ggtt = tile->mem.ggtt; - bo->ggtt_node[tile->id] = xe_ggtt_node_init(ggtt); + bo->ggtt_node[tile->id] = xe_ggtt_insert_node(ggtt, xe_bo_size(bo), SZ_4K); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bo->ggtt_node[tile->id]); - - KUNIT_ASSERT_EQ(test, 0, - xe_ggtt_node_insert(bo->ggtt_node[tile->id], - xe_bo_size(bo), SZ_4K)); } return bo; diff --git a/drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c b/drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c index 3b213fcae916..ea8ca03effa9 100644 --- a/drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c +++ b/drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c @@ -48,6 +48,38 @@ struct g2g_test_payload { u32 seqno; }; +static int slot_index_from_gts(struct xe_gt *tx_gt, struct xe_gt *rx_gt) +{ + struct xe_device *xe = gt_to_xe(tx_gt); + int idx = 0, found = 0, id, tx_idx, rx_idx; + struct xe_gt *gt; + struct kunit *test = kunit_get_current_test(); + + for (id = 0; id < xe->info.tile_count * xe->info.max_gt_per_tile; id++) { + gt = xe_device_get_gt(xe, id); + if (!gt) + continue; + if (gt == tx_gt) { + tx_idx = idx; + found++; + } + if (gt == rx_gt) { + rx_idx = idx; + found++; + } + + if (found == 2) + break; + + idx++; + } + + if (found != 2) + KUNIT_FAIL(test, "GT index not found"); + + return (tx_idx * xe->info.gt_count) + rx_idx; +} + static void g2g_test_send(struct kunit *test, struct xe_guc *guc, u32 far_tile, u32 far_dev, struct g2g_test_payload *payload) @@ -163,7 +195,7 @@ int xe_guc_g2g_test_notification(struct xe_guc *guc, u32 *msg, u32 len) goto done; } - idx = (tx_gt->info.id * xe->info.gt_count) + rx_gt->info.id; + idx = slot_index_from_gts(tx_gt, rx_gt); if (xe->g2g_test_array[idx] != payload->seqno - 1) { xe_gt_err(rx_gt, "G2G: Seqno mismatch %d vs %d for %d:%d -> %d:%d!\n", @@ -180,13 +212,17 @@ done: return ret; } +#define G2G_WAIT_TIMEOUT_MS 100 +#define G2G_WAIT_POLL_MS 1 + /* * Send the given seqno from all GuCs to all other GuCs in tile/GT order */ static void g2g_test_in_order(struct kunit *test, struct xe_device *xe, u32 seqno) { struct xe_gt *near_gt, *far_gt; - int i, j; + int i, j, waited; + u32 idx; for_each_gt(near_gt, xe, i) { u32 near_tile = gt_to_tile(near_gt)->id; @@ -205,6 +241,27 @@ static void g2g_test_in_order(struct kunit *test, struct xe_device *xe, u32 seqn payload.rx_dev = far_dev; payload.rx_tile = far_tile; payload.seqno = seqno; + + /* Calculate idx for event-based wait */ + idx = slot_index_from_gts(near_gt, far_gt); + waited = 0; + + /* + * Wait for previous seqno to be acknowledged before sending, + * to avoid queuing too many back-to-back messages and + * causing a test timeout. Actual correctness of message + * will be checked later in xe_guc_g2g_test_notification() + */ + while (xe->g2g_test_array[idx] != (seqno - 1)) { + msleep(G2G_WAIT_POLL_MS); + waited += G2G_WAIT_POLL_MS; + if (waited >= G2G_WAIT_TIMEOUT_MS) { + kunit_info(test, "Timeout waiting! tx gt: %d, rx gt: %d\n", + near_gt->info.id, far_gt->info.id); + break; + } + } + g2g_test_send(test, &near_gt->uc.guc, far_tile, far_dev, &payload); } } diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.c b/drivers/gpu/drm/xe/tests/xe_pci_test.c index 4d10a7e2b570..acf5a5555130 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci_test.c +++ b/drivers/gpu/drm/xe/tests/xe_pci_test.c @@ -19,6 +19,8 @@ static void check_graphics_ip(struct kunit *test) const struct xe_ip *param = test->param_value; const struct xe_graphics_desc *graphics = param->desc; u64 mask = graphics->hw_engine_mask; + u8 fuse_regs = graphics->num_geometry_xecore_fuse_regs + + graphics->num_compute_xecore_fuse_regs; /* RCS, CCS, and BCS engines are allowed on the graphics IP */ mask &= ~(XE_HW_ENGINE_RCS_MASK | @@ -27,6 +29,12 @@ static void check_graphics_ip(struct kunit *test) /* Any remaining engines are an error */ KUNIT_ASSERT_EQ(test, mask, 0); + + /* + * All graphics IP should have at least one geometry and/or compute + * XeCore fuse register. + */ + KUNIT_ASSERT_GE(test, fuse_regs, 1); } static void check_media_ip(struct kunit *test) diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c index d2255a59e58f..e5a0f985a700 100644 --- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c +++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c @@ -322,7 +322,8 @@ static void xe_rtp_process_to_sr_tests(struct kunit *test) count_rtp_entries++; xe_rtp_process_ctx_enable_active_tracking(&ctx, &active, count_rtp_entries); - xe_rtp_process_to_sr(&ctx, param->entries, count_rtp_entries, reg_sr); + xe_rtp_process_to_sr(&ctx, param->entries, count_rtp_entries, + reg_sr, false); xa_for_each(®_sr->xa, idx, sre) { if (idx == param->expected_reg.addr) diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c index 4749aa7f9466..b896b6f6615c 100644 --- a/drivers/gpu/drm/xe/xe_bb.c +++ b/drivers/gpu/drm/xe/xe_bb.c @@ -59,16 +59,51 @@ err: return ERR_PTR(err); } -struct xe_bb *xe_bb_ccs_new(struct xe_gt *gt, u32 dwords, - enum xe_sriov_vf_ccs_rw_ctxs ctx_id) +/** + * xe_bb_alloc() - Allocate a new batch buffer structure + * @gt: the &xe_gt + * + * Allocates and initializes a new xe_bb structure with an associated + * uninitialized suballoc object. + * + * Returns: Batch buffer structure or an ERR_PTR(-ENOMEM). + */ +struct xe_bb *xe_bb_alloc(struct xe_gt *gt) { struct xe_bb *bb = kmalloc_obj(*bb); - struct xe_device *xe = gt_to_xe(gt); - struct xe_sa_manager *bb_pool; int err; if (!bb) return ERR_PTR(-ENOMEM); + + bb->bo = xe_sa_bo_alloc(GFP_KERNEL); + if (IS_ERR(bb->bo)) { + err = PTR_ERR(bb->bo); + goto err; + } + + return bb; + +err: + kfree(bb); + return ERR_PTR(err); +} + +/** + * xe_bb_init() - Initialize a batch buffer with memory from a sub-allocator pool + * @bb: Batch buffer structure to initialize + * @bb_pool: Suballoc memory pool to allocate from + * @dwords: Number of dwords to be allocated + * + * Initializes the batch buffer by allocating memory from the specified + * suballoc pool. + * + * Return: 0 on success, negative error code on failure. + */ +int xe_bb_init(struct xe_bb *bb, struct xe_sa_manager *bb_pool, u32 dwords) +{ + int err; + /* * We need to allocate space for the requested number of dwords & * one additional MI_BATCH_BUFFER_END dword. Since the whole SA @@ -76,22 +111,14 @@ struct xe_bb *xe_bb_ccs_new(struct xe_gt *gt, u32 dwords, * is not over written when the last chunk of SA is allocated for BB. * So, this extra DW acts as a guard here. */ - - bb_pool = xe->sriov.vf.ccs.contexts[ctx_id].mem.ccs_bb_pool; - bb->bo = xe_sa_bo_new(bb_pool, 4 * (dwords + 1)); - - if (IS_ERR(bb->bo)) { - err = PTR_ERR(bb->bo); - goto err; - } + err = xe_sa_bo_init(bb_pool, bb->bo, 4 * (dwords + 1)); + if (err) + return err; bb->cs = xe_sa_bo_cpu_addr(bb->bo); bb->len = 0; - return bb; -err: - kfree(bb); - return ERR_PTR(err); + return 0; } static struct xe_sched_job * diff --git a/drivers/gpu/drm/xe/xe_bb.h b/drivers/gpu/drm/xe/xe_bb.h index 2a8adc9a6dee..231870b24c2f 100644 --- a/drivers/gpu/drm/xe/xe_bb.h +++ b/drivers/gpu/drm/xe/xe_bb.h @@ -12,12 +12,12 @@ struct dma_fence; struct xe_gt; struct xe_exec_queue; +struct xe_sa_manager; struct xe_sched_job; -enum xe_sriov_vf_ccs_rw_ctxs; struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm); -struct xe_bb *xe_bb_ccs_new(struct xe_gt *gt, u32 dwords, - enum xe_sriov_vf_ccs_rw_ctxs ctx_id); +struct xe_bb *xe_bb_alloc(struct xe_gt *gt); +int xe_bb_init(struct xe_bb *bb, struct xe_sa_manager *bb_pool, u32 dwords); struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q, struct xe_bb *bb); struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q, diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index f72841807d71..8ff193600443 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -512,8 +512,8 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, /* * Display scanout is always non-coherent with the CPU cache. * - * For Xe_LPG and beyond, PPGTT PTE lookups are also - * non-coherent and require a CPU:WC mapping. + * For Xe_LPG and beyond up to NVL-P (excluding), PPGTT PTE + * lookups are also non-coherent and require a CPU:WC mapping. */ if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) || (!xe->info.has_cached_pt && bo->flags & XE_BO_FLAG_PAGETABLE)) diff --git a/drivers/gpu/drm/xe/xe_configfs.c b/drivers/gpu/drm/xe/xe_configfs.c index c59b1414df22..32102600a148 100644 --- a/drivers/gpu/drm/xe/xe_configfs.c +++ b/drivers/gpu/drm/xe/xe_configfs.c @@ -15,6 +15,7 @@ #include "instructions/xe_mi_commands.h" #include "xe_configfs.h" +#include "xe_defaults.h" #include "xe_gt_types.h" #include "xe_hw_engine_types.h" #include "xe_module.h" @@ -263,6 +264,7 @@ struct xe_config_group_device { bool enable_psmi; struct { unsigned int max_vfs; + bool admin_only_pf; } sriov; } config; @@ -280,7 +282,8 @@ static const struct xe_config_device device_defaults = { .survivability_mode = false, .enable_psmi = false, .sriov = { - .max_vfs = UINT_MAX, + .max_vfs = XE_DEFAULT_MAX_VFS, + .admin_only_pf = XE_DEFAULT_ADMIN_ONLY_PF, }, }; @@ -830,6 +833,7 @@ static void xe_config_device_release(struct config_item *item) mutex_destroy(&dev->lock); + kfree(dev->config.ctx_restore_mid_bb[0].cs); kfree(dev->config.ctx_restore_post_bb[0].cs); kfree(dev); } @@ -896,10 +900,40 @@ static ssize_t sriov_max_vfs_store(struct config_item *item, const char *page, s return len; } +static ssize_t sriov_admin_only_pf_show(struct config_item *item, char *page) +{ + struct xe_config_group_device *dev = to_xe_config_group_device(item->ci_parent); + + guard(mutex)(&dev->lock); + + return sprintf(page, "%s\n", str_yes_no(dev->config.sriov.admin_only_pf)); +} + +static ssize_t sriov_admin_only_pf_store(struct config_item *item, const char *page, size_t len) +{ + struct xe_config_group_device *dev = to_xe_config_group_device(item->ci_parent); + bool admin_only_pf; + int ret; + + guard(mutex)(&dev->lock); + + if (is_bound(dev)) + return -EBUSY; + + ret = kstrtobool(page, &admin_only_pf); + if (ret) + return ret; + + dev->config.sriov.admin_only_pf = admin_only_pf; + return len; +} + CONFIGFS_ATTR(sriov_, max_vfs); +CONFIGFS_ATTR(sriov_, admin_only_pf); static struct configfs_attribute *xe_config_sriov_attrs[] = { &sriov_attr_max_vfs, + &sriov_attr_admin_only_pf, NULL, }; @@ -910,6 +944,8 @@ static bool xe_config_sriov_is_visible(struct config_item *item, if (attr == &sriov_attr_max_vfs && dev->mode != XE_SRIOV_MODE_PF) return false; + if (attr == &sriov_attr_admin_only_pf && dev->mode != XE_SRIOV_MODE_PF) + return false; return true; } @@ -1063,6 +1099,7 @@ static void dump_custom_dev_config(struct pci_dev *pdev, PRI_CUSTOM_ATTR("%llx", engines_allowed); PRI_CUSTOM_ATTR("%d", enable_psmi); PRI_CUSTOM_ATTR("%d", survivability_mode); + PRI_CUSTOM_ATTR("%u", sriov.admin_only_pf); #undef PRI_CUSTOM_ATTR } @@ -1241,6 +1278,32 @@ u32 xe_configfs_get_ctx_restore_post_bb(struct pci_dev *pdev, } #ifdef CONFIG_PCI_IOV +/** + * xe_configfs_admin_only_pf() - Get PF's operational mode. + * @pdev: the &pci_dev device + * + * Find the configfs group that belongs to the PCI device and return a flag + * whether the PF driver should be dedicated for VFs management only. + * + * If configfs group is not present, use driver's default value. + * + * Return: true if PF driver is dedicated for VFs administration only. + */ +bool xe_configfs_admin_only_pf(struct pci_dev *pdev) +{ + struct xe_config_group_device *dev = find_xe_config_group_device(pdev); + bool admin_only_pf; + + if (!dev) + return XE_DEFAULT_ADMIN_ONLY_PF; + + scoped_guard(mutex, &dev->lock) + admin_only_pf = dev->config.sriov.admin_only_pf; + + config_group_put(&dev->group); + + return admin_only_pf; +} /** * xe_configfs_get_max_vfs() - Get number of VFs that could be managed * @pdev: the &pci_dev device diff --git a/drivers/gpu/drm/xe/xe_configfs.h b/drivers/gpu/drm/xe/xe_configfs.h index f3683bc7eb90..07d62bf0c152 100644 --- a/drivers/gpu/drm/xe/xe_configfs.h +++ b/drivers/gpu/drm/xe/xe_configfs.h @@ -8,7 +8,9 @@ #include #include -#include +#include "xe_defaults.h" +#include "xe_hw_engine_types.h" +#include "xe_module.h" struct pci_dev; @@ -29,6 +31,7 @@ u32 xe_configfs_get_ctx_restore_post_bb(struct pci_dev *pdev, const u32 **cs); #ifdef CONFIG_PCI_IOV unsigned int xe_configfs_get_max_vfs(struct pci_dev *pdev); +bool xe_configfs_admin_only_pf(struct pci_dev *pdev); #endif #else static inline int xe_configfs_init(void) { return 0; } @@ -45,7 +48,16 @@ static inline u32 xe_configfs_get_ctx_restore_mid_bb(struct pci_dev *pdev, static inline u32 xe_configfs_get_ctx_restore_post_bb(struct pci_dev *pdev, enum xe_engine_class class, const u32 **cs) { return 0; } -static inline unsigned int xe_configfs_get_max_vfs(struct pci_dev *pdev) { return UINT_MAX; } +#ifdef CONFIG_PCI_IOV +static inline unsigned int xe_configfs_get_max_vfs(struct pci_dev *pdev) +{ + return xe_modparam.max_vfs; +} +static inline bool xe_configfs_admin_only_pf(struct pci_dev *pdev) +{ + return XE_DEFAULT_ADMIN_ONLY_PF; +} +#endif #endif #endif diff --git a/drivers/gpu/drm/xe/xe_defaults.h b/drivers/gpu/drm/xe/xe_defaults.h new file mode 100644 index 000000000000..c8ae1d5f3d60 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_defaults.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2026 Intel Corporation + */ +#ifndef _XE_DEFAULTS_H_ +#define _XE_DEFAULTS_H_ + +#include "xe_device_types.h" + +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) +#define XE_DEFAULT_GUC_LOG_LEVEL 3 +#else +#define XE_DEFAULT_GUC_LOG_LEVEL 1 +#endif + +#define XE_DEFAULT_PROBE_DISPLAY IS_ENABLED(CONFIG_DRM_XE_DISPLAY) +#define XE_DEFAULT_VRAM_BAR_SIZE 0 +#define XE_DEFAULT_FORCE_PROBE CONFIG_DRM_XE_FORCE_PROBE +#define XE_DEFAULT_MAX_VFS ~0 +#define XE_DEFAULT_MAX_VFS_STR "unlimited" +#define XE_DEFAULT_ADMIN_ONLY_PF false +#define XE_DEFAULT_WEDGED_MODE XE_WEDGED_MODE_UPON_CRITICAL_ERROR +#define XE_DEFAULT_WEDGED_MODE_STR "upon-critical-error" +#define XE_DEFAULT_SVM_NOTIFIER_SIZE 512 + +#endif diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index cf41bb6d2172..558a1a9841a0 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -356,7 +356,7 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, xe_engine_snapshot_capture_for_queue(q); - queue_work(system_unbound_wq, &ss->work); + queue_work(system_dfl_wq, &ss->work); dma_fence_end_signalling(cookie); } diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 52ee24e9cd3a..3462645ca13c 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -26,6 +26,7 @@ #include "xe_bo.h" #include "xe_bo_evict.h" #include "xe_debugfs.h" +#include "xe_defaults.h" #include "xe_devcoredump.h" #include "xe_device_sysfs.h" #include "xe_dma_buf.h" @@ -455,16 +456,16 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, xe->drm.anon_inode->i_mapping, xe->drm.vma_offset_manager, 0); if (WARN_ON(err)) - goto err; + return ERR_PTR(err); xe_bo_dev_init(&xe->bo_device); err = drmm_add_action_or_reset(&xe->drm, xe_device_destroy, NULL); if (err) - goto err; + return ERR_PTR(err); err = xe_shrinker_create(xe); if (err) - goto err; + return ERR_PTR(err); xe->info.devid = pdev->device; xe->info.revid = pdev->revision; @@ -474,7 +475,7 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, err = xe_irq_init(xe); if (err) - goto err; + return ERR_PTR(err); xe_validation_device_init(&xe->val); @@ -484,7 +485,7 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, err = xe_pagemap_shrinker_create(xe); if (err) - goto err; + return ERR_PTR(err); xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC); @@ -503,13 +504,13 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, err = xe_bo_pinned_init(xe); if (err) - goto err; + return ERR_PTR(err); xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq", WQ_MEM_RECLAIM); xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0); - xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0); - xe->destroy_wq = alloc_workqueue("xe-destroy-wq", 0, 0); + xe->unordered_wq = alloc_workqueue("xe-unordered-wq", WQ_PERCPU, 0); + xe->destroy_wq = alloc_workqueue("xe-destroy-wq", WQ_PERCPU, 0); if (!xe->ordered_wq || !xe->unordered_wq || !xe->preempt_fence_wq || !xe->destroy_wq) { /* @@ -517,18 +518,14 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, * drmm_add_action_or_reset register above */ drm_err(&xe->drm, "Failed to allocate xe workqueues\n"); - err = -ENOMEM; - goto err; + return ERR_PTR(-ENOMEM); } err = drmm_mutex_init(&xe->drm, &xe->pmt.lock); if (err) - goto err; + return ERR_PTR(err); return xe; - -err: - return ERR_PTR(err); } ALLOW_ERROR_INJECTION(xe_device_create, ERRNO); /* See xe_pci_probe() */ @@ -743,7 +740,7 @@ int xe_device_probe_early(struct xe_device *xe) assert_lmem_ready(xe); xe->wedged.mode = xe_device_validate_wedged_mode(xe, xe_modparam.wedged_mode) ? - XE_WEDGED_MODE_DEFAULT : xe_modparam.wedged_mode; + XE_DEFAULT_WEDGED_MODE : xe_modparam.wedged_mode; drm_dbg(&xe->drm, "wedged_mode: setting mode (%u) %s\n", xe->wedged.mode, xe_wedged_mode_to_string(xe->wedged.mode)); @@ -1311,7 +1308,8 @@ void xe_device_declare_wedged(struct xe_device *xe) xe->needs_flr_on_fini = true; drm_err(&xe->drm, "CRITICAL: Xe has declared device %s as wedged.\n" - "IOCTLs and executions are blocked. Only a rebind may clear the failure\n" + "IOCTLs and executions are blocked.\n" + "For recovery procedure, refer to https://docs.kernel.org/gpu/drm-uapi.html#device-wedging\n" "Please file a _new_ bug report at https://gitlab.freedesktop.org/drm/xe/kernel/issues/new\n", dev_name(xe->drm.dev)); } @@ -1374,3 +1372,28 @@ const char *xe_wedged_mode_to_string(enum xe_wedged_mode mode) return ""; } } + +/** + * xe_device_asid_to_vm() - Find VM from ASID + * @xe: the &xe_device + * @asid: Address space ID + * + * Find a VM from ASID and take a reference to VM which caller must drop. + * Reclaim safe. + * + * Return: VM on success, ERR_PTR on failure + */ +struct xe_vm *xe_device_asid_to_vm(struct xe_device *xe, u32 asid) +{ + struct xe_vm *vm; + + down_read(&xe->usm.lock); + vm = xa_load(&xe->usm.asid_to_vm, asid); + if (vm) + xe_vm_get(vm); + else + vm = ERR_PTR(-EINVAL); + up_read(&xe->usm.lock); + + return vm; +} diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 3740143790db..39464650533b 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -12,6 +12,8 @@ #include "xe_gt_types.h" #include "xe_sriov.h" +struct xe_vm; + static inline struct xe_device *to_xe_device(const struct drm_device *dev) { return container_of(dev, struct xe_device, drm); @@ -60,13 +62,6 @@ static inline struct xe_tile *xe_device_get_root_tile(struct xe_device *xe) return &xe->tiles[0]; } -/* - * Highest GT/tile count for any platform. Used only for memory allocation - * sizing. Any logic looping over GTs or mapping userspace GT IDs into GT - * structures should use the per-platform xe->info.max_gt_per_tile instead. - */ -#define XE_MAX_GT_PER_TILE 2 - static inline struct xe_gt *xe_device_get_gt(struct xe_device *xe, u8 gt_id) { struct xe_tile *tile; @@ -114,6 +109,11 @@ static inline struct xe_gt *xe_root_mmio_gt(struct xe_device *xe) return xe_device_get_root_tile(xe)->primary_gt; } +static inline struct xe_mmio *xe_root_tile_mmio(struct xe_device *xe) +{ + return &xe->tiles[0].mmio; +} + static inline bool xe_device_uc_enabled(struct xe_device *xe) { return !xe->info.force_execlist; @@ -204,6 +204,8 @@ int xe_is_injection_active(void); bool xe_is_xe_file(const struct file *file); +struct xe_vm *xe_device_asid_to_vm(struct xe_device *xe, u32 asid); + /* * Occasionally it is seen that the G2H worker starts running after a delay of more than * a second even after being queued and activated by the Linux workqueue subsystem. This diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index f689766adcb1..caa8f34a6744 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -15,9 +15,6 @@ #include "xe_devcoredump_types.h" #include "xe_heci_gsc.h" #include "xe_late_bind_fw_types.h" -#include "xe_lmtt_types.h" -#include "xe_memirq_types.h" -#include "xe_mert.h" #include "xe_oa_types.h" #include "xe_pagefault_types.h" #include "xe_platform_types.h" @@ -29,14 +26,13 @@ #include "xe_sriov_vf_ccs_types.h" #include "xe_step_types.h" #include "xe_survivability_mode_types.h" -#include "xe_tile_sriov_vf_types.h" +#include "xe_tile_types.h" #include "xe_validation.h" #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) #define TEST_VM_OPS_ERROR #endif -struct dram_info; struct drm_pagemap_shrinker; struct intel_display; struct intel_dg_nvm_dev; @@ -62,9 +58,6 @@ enum xe_wedged_mode { XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET = 2, }; -#define XE_WEDGED_MODE_DEFAULT XE_WEDGED_MODE_UPON_CRITICAL_ERROR -#define XE_WEDGED_MODE_DEFAULT_STR "upon-critical-error" - #define XE_BO_INVALID_OFFSET LONG_MAX #define GRAPHICS_VER(xe) ((xe)->info.graphics_verx100 / 100) @@ -79,6 +72,13 @@ enum xe_wedged_mode { #define XE_GT1 1 #define XE_MAX_TILES_PER_DEVICE (XE_GT1 + 1) +/* + * Highest GT/tile count for any platform. Used only for memory allocation + * sizing. Any logic looping over GTs or mapping userspace GT IDs into GT + * structures should use the per-platform xe->info.max_gt_per_tile instead. + */ +#define XE_MAX_GT_PER_TILE 2 + #define XE_MAX_ASID (BIT(20)) #define IS_PLATFORM_STEP(_xe, _platform, min_step, max_step) \ @@ -91,168 +91,6 @@ enum xe_wedged_mode { (_xe)->info.step.graphics >= (min_step) && \ (_xe)->info.step.graphics < (max_step)) -#define tile_to_xe(tile__) \ - _Generic(tile__, \ - const struct xe_tile * : (const struct xe_device *)((tile__)->xe), \ - struct xe_tile * : (tile__)->xe) - -/** - * struct xe_mmio - register mmio structure - * - * Represents an MMIO region that the CPU may use to access registers. A - * region may share its IO map with other regions (e.g., all GTs within a - * tile share the same map with their parent tile, but represent different - * subregions of the overall IO space). - */ -struct xe_mmio { - /** @tile: Backpointer to tile, used for tracing */ - struct xe_tile *tile; - - /** @regs: Map used to access registers. */ - void __iomem *regs; - - /** - * @sriov_vf_gt: Backpointer to GT. - * - * This pointer is only set for GT MMIO regions and only when running - * as an SRIOV VF structure - */ - struct xe_gt *sriov_vf_gt; - - /** - * @regs_size: Length of the register region within the map. - * - * The size of the iomap set in *regs is generally larger than the - * register mmio space since it includes unused regions and/or - * non-register regions such as the GGTT PTEs. - */ - size_t regs_size; - - /** @adj_limit: adjust MMIO address if address is below this value */ - u32 adj_limit; - - /** @adj_offset: offset to add to MMIO address when adjusting */ - u32 adj_offset; -}; - -/** - * struct xe_tile - hardware tile structure - * - * From a driver perspective, a "tile" is effectively a complete GPU, containing - * an SGunit, 1-2 GTs, and (for discrete platforms) VRAM. - * - * Multi-tile platforms effectively bundle multiple GPUs behind a single PCI - * device and designate one "root" tile as being responsible for external PCI - * communication. PCI BAR0 exposes the GGTT and MMIO register space for each - * tile in a stacked layout, and PCI BAR2 exposes the local memory associated - * with each tile similarly. Device-wide interrupts can be enabled/disabled - * at the root tile, and the MSTR_TILE_INTR register will report which tiles - * have interrupts that need servicing. - */ -struct xe_tile { - /** @xe: Backpointer to tile's PCI device */ - struct xe_device *xe; - - /** @id: ID of the tile */ - u8 id; - - /** - * @primary_gt: Primary GT - */ - struct xe_gt *primary_gt; - - /** - * @media_gt: Media GT - * - * Only present on devices with media version >= 13. - */ - struct xe_gt *media_gt; - - /** - * @mmio: MMIO info for a tile. - * - * Each tile has its own 16MB space in BAR0, laid out as: - * * 0-4MB: registers - * * 4MB-8MB: reserved - * * 8MB-16MB: global GTT - */ - struct xe_mmio mmio; - - /** @mem: memory management info for tile */ - struct { - /** - * @mem.kernel_vram: kernel-dedicated VRAM info for tile. - * - * Although VRAM is associated with a specific tile, it can - * still be accessed by all tiles' GTs. - */ - struct xe_vram_region *kernel_vram; - - /** - * @mem.vram: general purpose VRAM info for tile. - * - * Although VRAM is associated with a specific tile, it can - * still be accessed by all tiles' GTs. - */ - struct xe_vram_region *vram; - - /** @mem.ggtt: Global graphics translation table */ - struct xe_ggtt *ggtt; - - /** - * @mem.kernel_bb_pool: Pool from which batchbuffers are allocated. - * - * Media GT shares a pool with its primary GT. - */ - struct xe_sa_manager *kernel_bb_pool; - - /** - * @mem.reclaim_pool: Pool for PRLs allocated. - * - * Only main GT has page reclaim list allocations. - */ - struct xe_sa_manager *reclaim_pool; - } mem; - - /** @sriov: tile level virtualization data */ - union { - struct { - /** @sriov.pf.lmtt: Local Memory Translation Table. */ - struct xe_lmtt lmtt; - } pf; - struct { - /** @sriov.vf.ggtt_balloon: GGTT regions excluded from use. */ - struct xe_ggtt_node *ggtt_balloon[2]; - /** @sriov.vf.self_config: VF configuration data */ - struct xe_tile_sriov_vf_selfconfig self_config; - } vf; - } sriov; - - /** @memirq: Memory Based Interrupts. */ - struct xe_memirq memirq; - - /** @csc_hw_error_work: worker to report CSC HW errors */ - struct work_struct csc_hw_error_work; - - /** @pcode: tile's PCODE */ - struct { - /** @pcode.lock: protecting tile's PCODE mailbox data */ - struct mutex lock; - } pcode; - - /** @migrate: Migration helper for vram blits and clearing */ - struct xe_migrate *migrate; - - /** @sysfs: sysfs' kobj used by xe_tile_sysfs */ - struct kobject *sysfs; - - /** @debugfs: debugfs directory associated with this tile */ - struct dentry *debugfs; - - /** @mert: MERT-related data */ - struct xe_mert mert; -}; - /** * struct xe_device - Top level struct of Xe device */ @@ -300,6 +138,8 @@ struct xe_device { u8 tile_count; /** @info.max_gt_per_tile: Number of GT IDs allocated to each tile */ u8 max_gt_per_tile; + /** @info.multi_lrc_mask: bitmask of engine classes which support multi-lrc */ + u8 multi_lrc_mask; /** @info.gt_count: Total number of GTs for entire device */ u8 gt_count; /** @info.vm_max_level: Max VM level */ @@ -353,6 +193,8 @@ struct xe_device { u8 has_pre_prod_wa:1; /** @info.has_pxp: Device has PXP support */ u8 has_pxp:1; + /** @info.has_ctx_tlb_inval: Has context based TLB invalidations */ + u8 has_ctx_tlb_inval:1; /** @info.has_range_tlb_inval: Has range based TLB invalidations */ u8 has_range_tlb_inval:1; /** @info.has_soc_remapper_sysctrl: Has SoC remapper system controller */ @@ -559,10 +401,12 @@ struct xe_device { const struct xe_pat_table_entry *table; /** @pat.n_entries: Number of PAT entries */ int n_entries; - /** @pat.ats_entry: PAT entry for PCIe ATS responses */ + /** @pat.pat_ats: PAT entry for PCIe ATS responses */ const struct xe_pat_table_entry *pat_ats; - /** @pat.pta_entry: PAT entry for page table accesses */ - const struct xe_pat_table_entry *pat_pta; + /** @pat.pat_primary_pta: primary GT PAT entry for page table accesses */ + const struct xe_pat_table_entry *pat_primary_pta; + /** @pat.pat_media_pta: media GT PAT entry for page table accesses */ + const struct xe_pat_table_entry *pat_media_pta; u32 idx[__XE_CACHE_LEVEL_COUNT]; } pat; diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 0ddae7fcfc97..2d0e73a6a6ee 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -152,8 +152,10 @@ static void __xe_exec_queue_free(struct xe_exec_queue *q) if (xe_exec_queue_is_multi_queue(q)) xe_exec_queue_group_cleanup(q); - if (q->vm) + if (q->vm) { + xe_vm_remove_exec_queue(q->vm, q); xe_vm_put(q->vm); + } if (q->xef) xe_file_put(q->xef); @@ -224,9 +226,12 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, q->ring_ops = gt->ring_ops[hwe->class]; q->ops = gt->exec_queue_ops; INIT_LIST_HEAD(&q->lr.link); + INIT_LIST_HEAD(&q->vm_exec_queue_link); INIT_LIST_HEAD(&q->multi_gt_link); INIT_LIST_HEAD(&q->hw_engine_group_link); INIT_LIST_HEAD(&q->pxp.link); + spin_lock_init(&q->multi_queue.lock); + spin_lock_init(&q->lrc_lookup_lock); q->multi_queue.priority = XE_MULTI_QUEUE_PRIORITY_NORMAL; q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us; @@ -266,6 +271,66 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, return q; } +static void xe_exec_queue_set_lrc(struct xe_exec_queue *q, struct xe_lrc *lrc, u16 idx) +{ + xe_assert(gt_to_xe(q->gt), idx < q->width); + + scoped_guard(spinlock, &q->lrc_lookup_lock) + q->lrc[idx] = lrc; +} + +/** + * xe_exec_queue_get_lrc() - Get the LRC from exec queue. + * @q: The exec queue instance. + * @idx: Index within multi-LRC array. + * + * Retrieves LRC of given index for the exec queue under lock + * and takes reference. + * + * Return: Pointer to LRC on success, error on failure, NULL on + * lookup failure. + */ +struct xe_lrc *xe_exec_queue_get_lrc(struct xe_exec_queue *q, u16 idx) +{ + struct xe_lrc *lrc; + + xe_assert(gt_to_xe(q->gt), idx < q->width); + + scoped_guard(spinlock, &q->lrc_lookup_lock) { + lrc = q->lrc[idx]; + if (lrc) + xe_lrc_get(lrc); + } + + return lrc; +} + +/** + * xe_exec_queue_lrc() - Get the LRC from exec queue. + * @q: The exec queue instance. + * + * Retrieves the primary LRC for the exec queue. Note that this function + * returns only the first LRC instance, even when multiple parallel LRCs + * are configured. This function does not increment reference count, + * so the reference can be just forgotten after use. + * + * Return: Pointer to LRC on success, error on failure + */ +struct xe_lrc *xe_exec_queue_lrc(struct xe_exec_queue *q) +{ + return q->lrc[0]; +} + +static void __xe_exec_queue_fini(struct xe_exec_queue *q) +{ + int i; + + q->ops->fini(q); + + for (i = 0; i < q->width; ++i) + xe_lrc_put(q->lrc[i]); +} + static int __xe_exec_queue_init(struct xe_exec_queue *q, u32 exec_queue_flags) { int i, err; @@ -303,38 +368,37 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q, u32 exec_queue_flags) * from the moment vCPU resumes execution. */ for (i = 0; i < q->width; ++i) { - struct xe_lrc *lrc; + struct xe_lrc *__lrc = NULL; + int marker; - xe_gt_sriov_vf_wait_valid_ggtt(q->gt); - lrc = xe_lrc_create(q->hwe, q->vm, q->replay_state, - xe_lrc_ring_size(), q->msix_vec, flags); - if (IS_ERR(lrc)) { - err = PTR_ERR(lrc); - goto err_lrc; - } + do { + struct xe_lrc *lrc; - /* Pairs with READ_ONCE to xe_exec_queue_contexts_hwsp_rebase */ - WRITE_ONCE(q->lrc[i], lrc); + marker = xe_gt_sriov_vf_wait_valid_ggtt(q->gt); + + lrc = xe_lrc_create(q->hwe, q->vm, q->replay_state, + xe_lrc_ring_size(), q->msix_vec, flags); + if (IS_ERR(lrc)) { + err = PTR_ERR(lrc); + goto err_lrc; + } + + xe_exec_queue_set_lrc(q, lrc, i); + + if (__lrc) + xe_lrc_put(__lrc); + __lrc = lrc; + + } while (marker != xe_vf_migration_fixups_complete_count(q->gt)); } return 0; err_lrc: - for (i = i - 1; i >= 0; --i) - xe_lrc_put(q->lrc[i]); + __xe_exec_queue_fini(q); return err; } -static void __xe_exec_queue_fini(struct xe_exec_queue *q) -{ - int i; - - q->ops->fini(q); - - for (i = 0; i < q->width; ++i) - xe_lrc_put(q->lrc[i]); -} - struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm, u32 logical_mask, u16 width, struct xe_hw_engine *hwe, u32 flags, @@ -1180,6 +1244,11 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, !hwe)) return -EINVAL; + /* multi-lrc is only supported on select engine classes */ + if (XE_IOCTL_DBG(xe, args->width > 1 && + !(xe->info.multi_lrc_mask & BIT(hwe->class)))) + return -EOPNOTSUPP; + vm = xe_vm_lookup(xef, args->vm_id); if (XE_IOCTL_DBG(xe, !vm)) return -ENOENT; @@ -1233,6 +1302,8 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, } q->xef = xe_file_get(xef); + if (eci[0].engine_class != DRM_XE_ENGINE_CLASS_VM_BIND) + xe_vm_add_exec_queue(vm, q); /* user id alloc must always be last in ioctl to prevent UAF */ err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL); @@ -1283,21 +1354,6 @@ int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data, return ret; } -/** - * xe_exec_queue_lrc() - Get the LRC from exec queue. - * @q: The exec_queue. - * - * Retrieves the primary LRC for the exec queue. Note that this function - * returns only the first LRC instance, even when multiple parallel LRCs - * are configured. - * - * Return: Pointer to LRC on success, error on failure - */ -struct xe_lrc *xe_exec_queue_lrc(struct xe_exec_queue *q) -{ - return q->lrc[0]; -} - /** * xe_exec_queue_is_lr() - Whether an exec_queue is long-running * @q: The exec_queue @@ -1657,14 +1713,14 @@ int xe_exec_queue_contexts_hwsp_rebase(struct xe_exec_queue *q, void *scratch) for (i = 0; i < q->width; ++i) { struct xe_lrc *lrc; - /* Pairs with WRITE_ONCE in __xe_exec_queue_init */ - lrc = READ_ONCE(q->lrc[i]); + lrc = xe_exec_queue_get_lrc(q, i); if (!lrc) continue; xe_lrc_update_memirq_regs_with_address(lrc, q->hwe, scratch); xe_lrc_update_hwctx_regs_with_address(lrc); err = xe_lrc_setup_wa_bb_with_scratch(lrc, q->hwe, scratch); + xe_lrc_put(lrc); if (err) break; } diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h index c9e3a7c2d249..a82d99bd77bc 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.h +++ b/drivers/gpu/drm/xe/xe_exec_queue.h @@ -160,6 +160,7 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q); int xe_exec_queue_contexts_hwsp_rebase(struct xe_exec_queue *q, void *scratch); struct xe_lrc *xe_exec_queue_lrc(struct xe_exec_queue *q); +struct xe_lrc *xe_exec_queue_get_lrc(struct xe_exec_queue *q, u16 idx); /** * xe_exec_queue_idle_skip_suspend() - Can exec queue skip suspend diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index f1142e5d3e3e..a1f3938f4173 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -66,6 +66,8 @@ struct xe_exec_queue_group { bool sync_pending; /** @banned: Group banned */ bool banned; + /** @stopped: Group is stopped, protected by list_lock */ + bool stopped; }; /** @@ -159,8 +161,13 @@ struct xe_exec_queue { struct xe_exec_queue_group *group; /** @multi_queue.link: Link into group's secondary queues list */ struct list_head link; - /** @multi_queue.priority: Queue priority within the multi-queue group */ + /** + * @multi_queue.priority: Queue priority within the multi-queue group. + * It is protected by @multi_queue.lock. + */ enum xe_multi_queue_priority priority; + /** @multi_queue.lock: Lock for protecting certain members */ + spinlock_t lock; /** @multi_queue.pos: Position of queue within the multi-queue group */ u8 pos; /** @multi_queue.valid: Queue belongs to a multi queue group */ @@ -211,6 +218,9 @@ struct xe_exec_queue { struct dma_fence *last_fence; } tlb_inval[XE_EXEC_QUEUE_TLB_INVAL_COUNT]; + /** @vm_exec_queue_link: Link to track exec queue within a VM's list of exec queues. */ + struct list_head vm_exec_queue_link; + /** @pxp: PXP info tracking */ struct { /** @pxp.type: PXP session type used by this queue */ @@ -247,6 +257,11 @@ struct xe_exec_queue { u64 tlb_flush_seqno; /** @hw_engine_group_link: link into exec queues in the same hw engine group */ struct list_head hw_engine_group_link; + /** + * @lrc_lookup_lock: Lock for protecting lrc array access. Only used when + * running in parallel to queue creation is possible. + */ + spinlock_t lrc_lookup_lock; /** @lrc: logical ring context for this exec queue */ struct xe_lrc *lrc[] __counted_by(width); }; @@ -301,6 +316,8 @@ struct xe_exec_queue_ops { void (*resume)(struct xe_exec_queue *q); /** @reset_status: check exec queue reset status */ bool (*reset_status)(struct xe_exec_queue *q); + /** @active: check exec queue is active */ + bool (*active)(struct xe_exec_queue *q); }; #endif diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index 371983c94a1b..7e8a3a7db741 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -421,7 +421,7 @@ static void execlist_exec_queue_kill(struct xe_exec_queue *q) static void execlist_exec_queue_destroy(struct xe_exec_queue *q) { INIT_WORK(&q->execlist->destroy_async, execlist_exec_queue_destroy_async); - queue_work(system_unbound_wq, &q->execlist->destroy_async); + queue_work(system_dfl_wq, &q->execlist->destroy_async); } static int execlist_exec_queue_set_priority(struct xe_exec_queue *q, @@ -468,6 +468,12 @@ static bool execlist_exec_queue_reset_status(struct xe_exec_queue *q) return false; } +static bool execlist_exec_queue_active(struct xe_exec_queue *q) +{ + /* NIY */ + return false; +} + static const struct xe_exec_queue_ops execlist_exec_queue_ops = { .init = execlist_exec_queue_init, .kill = execlist_exec_queue_kill, @@ -480,6 +486,7 @@ static const struct xe_exec_queue_ops execlist_exec_queue_ops = { .suspend_wait = execlist_exec_queue_suspend_wait, .resume = execlist_exec_queue_resume, .reset_status = execlist_exec_queue_reset_status, + .active = execlist_exec_queue_active, }; int xe_execlist_init(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c index 76e054f314ee..197e2197bd0a 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.c +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -148,12 +148,6 @@ static int domain_sleep_wait(struct xe_gt *gt, return __domain_wait(gt, domain, false); } -#define for_each_fw_domain_masked(domain__, mask__, fw__, tmp__) \ - for (tmp__ = (mask__); tmp__; tmp__ &= ~BIT(ffs(tmp__) - 1)) \ - for_each_if((domain__ = ((fw__)->domains + \ - (ffs(tmp__) - 1))) && \ - domain__->reg_ctl.addr) - /** * xe_force_wake_get() : Increase the domain refcount * @fw: struct xe_force_wake @@ -266,3 +260,43 @@ void xe_force_wake_put(struct xe_force_wake *fw, unsigned int fw_ref) xe_gt_WARN(gt, ack_fail, "Forcewake domain%s %#x failed to acknowledge sleep request\n", str_plural(hweight_long(ack_fail)), ack_fail); } + +const char *xe_force_wake_domain_to_str(enum xe_force_wake_domain_id id) +{ + switch (id) { + case XE_FW_DOMAIN_ID_GT: + return "GT"; + case XE_FW_DOMAIN_ID_RENDER: + return "Render"; + case XE_FW_DOMAIN_ID_MEDIA: + return "Media"; + case XE_FW_DOMAIN_ID_MEDIA_VDBOX0: + return "VDBox0"; + case XE_FW_DOMAIN_ID_MEDIA_VDBOX1: + return "VDBox1"; + case XE_FW_DOMAIN_ID_MEDIA_VDBOX2: + return "VDBox2"; + case XE_FW_DOMAIN_ID_MEDIA_VDBOX3: + return "VDBox3"; + case XE_FW_DOMAIN_ID_MEDIA_VDBOX4: + return "VDBox4"; + case XE_FW_DOMAIN_ID_MEDIA_VDBOX5: + return "VDBox5"; + case XE_FW_DOMAIN_ID_MEDIA_VDBOX6: + return "VDBox6"; + case XE_FW_DOMAIN_ID_MEDIA_VDBOX7: + return "VDBox7"; + case XE_FW_DOMAIN_ID_MEDIA_VEBOX0: + return "VEBox0"; + case XE_FW_DOMAIN_ID_MEDIA_VEBOX1: + return "VEBox1"; + case XE_FW_DOMAIN_ID_MEDIA_VEBOX2: + return "VEBox2"; + case XE_FW_DOMAIN_ID_MEDIA_VEBOX3: + return "VEBox3"; + case XE_FW_DOMAIN_ID_GSC: + return "GSC"; + default: + return "Unknown"; + } +} diff --git a/drivers/gpu/drm/xe/xe_force_wake.h b/drivers/gpu/drm/xe/xe_force_wake.h index 1e2198f6a007..e2721f205d6c 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.h +++ b/drivers/gpu/drm/xe/xe_force_wake.h @@ -19,6 +19,17 @@ unsigned int __must_check xe_force_wake_get(struct xe_force_wake *fw, enum xe_force_wake_domains domains); void xe_force_wake_put(struct xe_force_wake *fw, unsigned int fw_ref); +const char *xe_force_wake_domain_to_str(enum xe_force_wake_domain_id id); + +#define for_each_fw_domain_masked(domain__, mask__, fw__, tmp__) \ + for (tmp__ = (mask__); tmp__; tmp__ &= ~BIT(ffs(tmp__) - 1)) \ + for_each_if(((domain__) = ((fw__)->domains + \ + (ffs(tmp__) - 1))) && \ + (domain__)->reg_ctl.addr) + +#define for_each_fw_domain(domain__, fw__, tmp__) \ + for_each_fw_domain_masked((domain__), (fw__)->initialized_domains, (fw__), (tmp__)) + static inline int xe_force_wake_ref(struct xe_force_wake *fw, enum xe_force_wake_domains domain) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 2bda426a6986..0f2e3af49912 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -69,9 +69,8 @@ /** * struct xe_ggtt_node - A node in GGTT. * - * This struct needs to be initialized (only-once) with xe_ggtt_node_init() before any node - * insertion, reservation, or 'ballooning'. - * It will, then, be finalized by either xe_ggtt_node_remove() or xe_ggtt_node_deballoon(). + * This struct is allocated with xe_ggtt_insert_node(,_transform) or xe_ggtt_insert_bo(,_at). + * It will be deallocated using xe_ggtt_node_remove(). */ struct xe_ggtt_node { /** @ggtt: Back pointer to xe_ggtt where this region will be inserted at */ @@ -84,6 +83,61 @@ struct xe_ggtt_node { bool invalidate_on_remove; }; +/** + * struct xe_ggtt_pt_ops - GGTT Page table operations + * Which can vary from platform to platform. + */ +struct xe_ggtt_pt_ops { + /** @pte_encode_flags: Encode PTE flags for a given BO */ + u64 (*pte_encode_flags)(struct xe_bo *bo, u16 pat_index); + + /** @ggtt_set_pte: Directly write into GGTT's PTE */ + xe_ggtt_set_pte_fn ggtt_set_pte; + + /** @ggtt_get_pte: Directly read from GGTT's PTE */ + u64 (*ggtt_get_pte)(struct xe_ggtt *ggtt, u64 addr); +}; + +/** + * struct xe_ggtt - Main GGTT struct + * + * In general, each tile can contains its own Global Graphics Translation Table + * (GGTT) instance. + */ +struct xe_ggtt { + /** @tile: Back pointer to tile where this GGTT belongs */ + struct xe_tile *tile; + /** @start: Start offset of GGTT */ + u64 start; + /** @size: Total usable size of this GGTT */ + u64 size; + +#define XE_GGTT_FLAGS_64K BIT(0) + /** + * @flags: Flags for this GGTT + * Acceptable flags: + * - %XE_GGTT_FLAGS_64K - if PTE size is 64K. Otherwise, regular is 4K. + */ + unsigned int flags; + /** @scratch: Internal object allocation used as a scratch page */ + struct xe_bo *scratch; + /** @lock: Mutex lock to protect GGTT data */ + struct mutex lock; + /** + * @gsm: The iomem pointer to the actual location of the translation + * table located in the GSM for easy PTE manipulation + */ + u64 __iomem *gsm; + /** @pt_ops: Page Table operations per platform */ + const struct xe_ggtt_pt_ops *pt_ops; + /** @mm: The memory manager used to manage individual GGTT allocations */ + struct drm_mm mm; + /** @access_count: counts GGTT writes */ + unsigned int access_count; + /** @wq: Dedicated unordered work queue to process node removals */ + struct workqueue_struct *wq; +}; + static u64 xelp_ggtt_pte_flags(struct xe_bo *bo, u16 pat_index) { u64 pte = XE_PAGE_PRESENT; @@ -193,7 +247,7 @@ static void xe_ggtt_set_pte_and_flush(struct xe_ggtt *ggtt, u64 addr, u64 pte) static u64 xe_ggtt_get_pte(struct xe_ggtt *ggtt, u64 addr) { xe_tile_assert(ggtt->tile, !(addr & XE_PTE_MASK)); - xe_tile_assert(ggtt->tile, addr < ggtt->size); + xe_tile_assert(ggtt->tile, addr < ggtt->start + ggtt->size); return readq(&ggtt->gsm[addr >> XE_PTE_SHIFT]); } @@ -299,7 +353,7 @@ static void __xe_ggtt_init_early(struct xe_ggtt *ggtt, u64 start, u64 size) { ggtt->start = start; ggtt->size = size; - drm_mm_init(&ggtt->mm, start, size); + drm_mm_init(&ggtt->mm, 0, size); } int xe_ggtt_init_kunit(struct xe_ggtt *ggtt, u32 start, u32 size) @@ -347,9 +401,15 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) ggtt_start = wopcm; ggtt_size = (gsm_size / 8) * (u64)XE_PAGE_SIZE - ggtt_start; } else { - /* GGTT is expected to be 4GiB */ - ggtt_start = wopcm; - ggtt_size = SZ_4G - ggtt_start; + ggtt_start = xe_tile_sriov_vf_ggtt_base(ggtt->tile); + ggtt_size = xe_tile_sriov_vf_ggtt(ggtt->tile); + + if (ggtt_start < wopcm || + ggtt_start + ggtt_size > GUC_GGTT_TOP) { + xe_tile_err(ggtt->tile, "Invalid GGTT configuration: %#llx-%#llx\n", + ggtt_start, ggtt_start + ggtt_size - 1); + return -ERANGE; + } } ggtt->gsm = ggtt->tile->mmio.regs + SZ_8M; @@ -367,7 +427,7 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) else ggtt->pt_ops = &xelp_pt_ops; - ggtt->wq = alloc_workqueue("xe-ggtt-wq", WQ_MEM_RECLAIM, 0); + ggtt->wq = alloc_workqueue("xe-ggtt-wq", WQ_MEM_RECLAIM | WQ_PERCPU, 0); if (!ggtt->wq) return -ENOMEM; @@ -377,17 +437,7 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) if (err) return err; - err = devm_add_action_or_reset(xe->drm.dev, dev_fini_ggtt, ggtt); - if (err) - return err; - - if (IS_SRIOV_VF(xe)) { - err = xe_tile_sriov_vf_prepare_ggtt(ggtt->tile); - if (err) - return err; - } - - return 0; + return devm_add_action_or_reset(xe->drm.dev, dev_fini_ggtt, ggtt); } ALLOW_ERROR_INJECTION(xe_ggtt_init_early, ERRNO); /* See xe_pci_probe() */ @@ -401,12 +451,17 @@ static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt) /* Display may have allocated inside ggtt, so be careful with clearing here */ mutex_lock(&ggtt->lock); drm_mm_for_each_hole(hole, &ggtt->mm, start, end) - xe_ggtt_clear(ggtt, start, end - start); + xe_ggtt_clear(ggtt, ggtt->start + start, end - start); xe_ggtt_invalidate(ggtt); mutex_unlock(&ggtt->lock); } +static void ggtt_node_fini(struct xe_ggtt_node *node) +{ + kfree(node); +} + static void ggtt_node_remove(struct xe_ggtt_node *node) { struct xe_ggtt *ggtt = node->ggtt; @@ -418,7 +473,7 @@ static void ggtt_node_remove(struct xe_ggtt_node *node) mutex_lock(&ggtt->lock); if (bound) - xe_ggtt_clear(ggtt, node->base.start, node->base.size); + xe_ggtt_clear(ggtt, xe_ggtt_node_addr(node), xe_ggtt_node_size(node)); drm_mm_remove_node(&node->base); node->base.size = 0; mutex_unlock(&ggtt->lock); @@ -432,7 +487,7 @@ static void ggtt_node_remove(struct xe_ggtt_node *node) drm_dev_exit(idx); free_node: - xe_ggtt_node_fini(node); + ggtt_node_fini(node); } static void ggtt_node_remove_work_func(struct work_struct *work) @@ -538,169 +593,38 @@ static void xe_ggtt_invalidate(struct xe_ggtt *ggtt) ggtt_invalidate_gt_tlb(ggtt->tile->media_gt); } -static void xe_ggtt_dump_node(struct xe_ggtt *ggtt, - const struct drm_mm_node *node, const char *description) -{ - char buf[10]; - - if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) { - string_get_size(node->size, 1, STRING_UNITS_2, buf, sizeof(buf)); - xe_tile_dbg(ggtt->tile, "GGTT %#llx-%#llx (%s) %s\n", - node->start, node->start + node->size, buf, description); - } -} - /** - * xe_ggtt_node_insert_balloon_locked - prevent allocation of specified GGTT addresses - * @node: the &xe_ggtt_node to hold reserved GGTT node - * @start: the starting GGTT address of the reserved region - * @end: then end GGTT address of the reserved region - * - * To be used in cases where ggtt->lock is already taken. - * Use xe_ggtt_node_remove_balloon_locked() to release a reserved GGTT node. - * - * Return: 0 on success or a negative error code on failure. - */ -int xe_ggtt_node_insert_balloon_locked(struct xe_ggtt_node *node, u64 start, u64 end) -{ - struct xe_ggtt *ggtt = node->ggtt; - int err; - - xe_tile_assert(ggtt->tile, start < end); - xe_tile_assert(ggtt->tile, IS_ALIGNED(start, XE_PAGE_SIZE)); - xe_tile_assert(ggtt->tile, IS_ALIGNED(end, XE_PAGE_SIZE)); - xe_tile_assert(ggtt->tile, !drm_mm_node_allocated(&node->base)); - lockdep_assert_held(&ggtt->lock); - - node->base.color = 0; - node->base.start = start; - node->base.size = end - start; - - err = drm_mm_reserve_node(&ggtt->mm, &node->base); - - if (xe_tile_WARN(ggtt->tile, err, "Failed to balloon GGTT %#llx-%#llx (%pe)\n", - node->base.start, node->base.start + node->base.size, ERR_PTR(err))) - return err; - - xe_ggtt_dump_node(ggtt, &node->base, "balloon"); - return 0; -} - -/** - * xe_ggtt_node_remove_balloon_locked - release a reserved GGTT region - * @node: the &xe_ggtt_node with reserved GGTT region - * - * To be used in cases where ggtt->lock is already taken. - * See xe_ggtt_node_insert_balloon_locked() for details. - */ -void xe_ggtt_node_remove_balloon_locked(struct xe_ggtt_node *node) -{ - if (!xe_ggtt_node_allocated(node)) - return; - - lockdep_assert_held(&node->ggtt->lock); - - xe_ggtt_dump_node(node->ggtt, &node->base, "remove-balloon"); - - drm_mm_remove_node(&node->base); -} - -static void xe_ggtt_assert_fit(struct xe_ggtt *ggtt, u64 start, u64 size) -{ - struct xe_tile *tile = ggtt->tile; - - xe_tile_assert(tile, start >= ggtt->start); - xe_tile_assert(tile, start + size <= ggtt->start + ggtt->size); -} - -/** - * xe_ggtt_shift_nodes_locked - Shift GGTT nodes to adjust for a change in usable address range. + * xe_ggtt_shift_nodes() - Shift GGTT nodes to adjust for a change in usable address range. * @ggtt: the &xe_ggtt struct instance - * @shift: change to the location of area provisioned for current VF + * @new_start: new location of area provisioned for current VF * - * This function moves all nodes from the GGTT VM, to a temp list. These nodes are expected - * to represent allocations in range formerly assigned to current VF, before the range changed. - * When the GGTT VM is completely clear of any nodes, they are re-added with shifted offsets. + * Ensure that all struct &xe_ggtt_node are moved to the @new_start base address + * by changing the base offset of the GGTT. * - * The function has no ability of failing - because it shifts existing nodes, without - * any additional processing. If the nodes were successfully existing at the old address, - * they will do the same at the new one. A fail inside this function would indicate that - * the list of nodes was either already damaged, or that the shift brings the address range - * outside of valid bounds. Both cases justify an assert rather than error code. + * This function may be called multiple times during recovery, but if + * @new_start is unchanged from the current base, it's a noop. + * + * @new_start should be a value between xe_wopcm_size() and #GUC_GGTT_TOP. */ -void xe_ggtt_shift_nodes_locked(struct xe_ggtt *ggtt, s64 shift) +void xe_ggtt_shift_nodes(struct xe_ggtt *ggtt, u64 new_start) { - struct xe_tile *tile __maybe_unused = ggtt->tile; - struct drm_mm_node *node, *tmpn; - LIST_HEAD(temp_list_head); + guard(mutex)(&ggtt->lock); - lockdep_assert_held(&ggtt->lock); + xe_tile_assert(ggtt->tile, new_start >= xe_wopcm_size(tile_to_xe(ggtt->tile))); + xe_tile_assert(ggtt->tile, new_start + ggtt->size <= GUC_GGTT_TOP); - if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) - drm_mm_for_each_node_safe(node, tmpn, &ggtt->mm) - xe_ggtt_assert_fit(ggtt, node->start + shift, node->size); - - drm_mm_for_each_node_safe(node, tmpn, &ggtt->mm) { - drm_mm_remove_node(node); - list_add(&node->node_list, &temp_list_head); - } - - list_for_each_entry_safe(node, tmpn, &temp_list_head, node_list) { - list_del(&node->node_list); - node->start += shift; - drm_mm_reserve_node(&ggtt->mm, node); - xe_tile_assert(tile, drm_mm_node_allocated(node)); - } + /* pairs with READ_ONCE in xe_ggtt_node_addr() */ + WRITE_ONCE(ggtt->start, new_start); } -static int xe_ggtt_node_insert_locked(struct xe_ggtt_node *node, +static int xe_ggtt_insert_node_locked(struct xe_ggtt_node *node, u32 size, u32 align, u32 mm_flags) { return drm_mm_insert_node_generic(&node->ggtt->mm, &node->base, size, align, 0, mm_flags); } -/** - * xe_ggtt_node_insert - Insert a &xe_ggtt_node into the GGTT - * @node: the &xe_ggtt_node to be inserted - * @size: size of the node - * @align: alignment constrain of the node - * - * It cannot be called without first having called xe_ggtt_init() once. - * - * Return: 0 on success or a negative error code on failure. - */ -int xe_ggtt_node_insert(struct xe_ggtt_node *node, u32 size, u32 align) -{ - int ret; - - if (!node || !node->ggtt) - return -ENOENT; - - mutex_lock(&node->ggtt->lock); - ret = xe_ggtt_node_insert_locked(node, size, align, - DRM_MM_INSERT_HIGH); - mutex_unlock(&node->ggtt->lock); - - return ret; -} - -/** - * xe_ggtt_node_init - Initialize %xe_ggtt_node struct - * @ggtt: the &xe_ggtt where the new node will later be inserted/reserved. - * - * This function will allocate the struct %xe_ggtt_node and return its pointer. - * This struct will then be freed after the node removal upon xe_ggtt_node_remove() - * or xe_ggtt_node_remove_balloon_locked(). - * - * Having %xe_ggtt_node struct allocated doesn't mean that the node is already - * allocated in GGTT. Only xe_ggtt_node_insert(), allocation through - * xe_ggtt_node_insert_transform(), or xe_ggtt_node_insert_balloon_locked() will ensure the node is inserted or reserved - * in GGTT. - * - * Return: A pointer to %xe_ggtt_node struct on success. An ERR_PTR otherwise. - **/ -struct xe_ggtt_node *xe_ggtt_node_init(struct xe_ggtt *ggtt) +static struct xe_ggtt_node *ggtt_node_init(struct xe_ggtt *ggtt) { struct xe_ggtt_node *node = kzalloc_obj(*node, GFP_NOFS); @@ -714,30 +638,31 @@ struct xe_ggtt_node *xe_ggtt_node_init(struct xe_ggtt *ggtt) } /** - * xe_ggtt_node_fini - Forcebly finalize %xe_ggtt_node struct - * @node: the &xe_ggtt_node to be freed + * xe_ggtt_insert_node - Insert a &xe_ggtt_node into the GGTT + * @ggtt: the &xe_ggtt into which the node should be inserted. + * @size: size of the node + * @align: alignment constrain of the node * - * If anything went wrong with either xe_ggtt_node_insert(), xe_ggtt_node_insert_locked(), - * or xe_ggtt_node_insert_balloon_locked(); and this @node is not going to be reused, then, - * this function needs to be called to free the %xe_ggtt_node struct - **/ -void xe_ggtt_node_fini(struct xe_ggtt_node *node) -{ - kfree(node); -} - -/** - * xe_ggtt_node_allocated - Check if node is allocated in GGTT - * @node: the &xe_ggtt_node to be inspected - * - * Return: True if allocated, False otherwise. + * Return: &xe_ggtt_node on success or a ERR_PTR on failure. */ -bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node) +struct xe_ggtt_node *xe_ggtt_insert_node(struct xe_ggtt *ggtt, u32 size, u32 align) { - if (!node || !node->ggtt) - return false; + struct xe_ggtt_node *node; + int ret; - return drm_mm_node_allocated(&node->base); + node = ggtt_node_init(ggtt); + if (IS_ERR(node)) + return node; + + guard(mutex)(&ggtt->lock); + ret = xe_ggtt_insert_node_locked(node, size, align, + DRM_MM_INSERT_HIGH); + if (ret) { + ggtt_node_fini(node); + return ERR_PTR(ret); + } + + return node; } /** @@ -770,7 +695,7 @@ static void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, if (XE_WARN_ON(!node)) return; - start = node->base.start; + start = xe_ggtt_node_addr(node); end = start + xe_bo_size(bo); if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) { @@ -811,7 +736,7 @@ void xe_ggtt_map_bo_unlocked(struct xe_ggtt *ggtt, struct xe_bo *bo) } /** - * xe_ggtt_node_insert_transform - Insert a newly allocated &xe_ggtt_node into the GGTT + * xe_ggtt_insert_node_transform - Insert a newly allocated &xe_ggtt_node into the GGTT * @ggtt: the &xe_ggtt where the node will inserted/reserved. * @bo: The bo to be transformed * @pte_flags: The extra GGTT flags to add to mapping. @@ -825,7 +750,7 @@ void xe_ggtt_map_bo_unlocked(struct xe_ggtt *ggtt, struct xe_bo *bo) * * Return: A pointer to %xe_ggtt_node struct on success. An ERR_PTR otherwise. */ -struct xe_ggtt_node *xe_ggtt_node_insert_transform(struct xe_ggtt *ggtt, +struct xe_ggtt_node *xe_ggtt_insert_node_transform(struct xe_ggtt *ggtt, struct xe_bo *bo, u64 pte_flags, u64 size, u32 align, xe_ggtt_transform_cb transform, void *arg) @@ -833,7 +758,7 @@ struct xe_ggtt_node *xe_ggtt_node_insert_transform(struct xe_ggtt *ggtt, struct xe_ggtt_node *node; int ret; - node = xe_ggtt_node_init(ggtt); + node = ggtt_node_init(ggtt); if (IS_ERR(node)) return ERR_CAST(node); @@ -842,7 +767,7 @@ struct xe_ggtt_node *xe_ggtt_node_insert_transform(struct xe_ggtt *ggtt, goto err; } - ret = xe_ggtt_node_insert_locked(node, size, align, 0); + ret = xe_ggtt_insert_node_locked(node, size, align, 0); if (ret) goto err_unlock; @@ -857,7 +782,7 @@ struct xe_ggtt_node *xe_ggtt_node_insert_transform(struct xe_ggtt *ggtt, err_unlock: mutex_unlock(&ggtt->lock); err: - xe_ggtt_node_fini(node); + ggtt_node_fini(node); return ERR_PTR(ret); } @@ -883,7 +808,7 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile)); - bo->ggtt_node[tile_id] = xe_ggtt_node_init(ggtt); + bo->ggtt_node[tile_id] = ggtt_node_init(ggtt); if (IS_ERR(bo->ggtt_node[tile_id])) { err = PTR_ERR(bo->ggtt_node[tile_id]); bo->ggtt_node[tile_id] = NULL; @@ -891,10 +816,30 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, } mutex_lock(&ggtt->lock); + /* + * When inheriting the initial framebuffer, the framebuffer is + * physically located at VRAM address 0, and usually at GGTT address 0 too. + * + * The display code will ask for a GGTT allocation between end of BO and + * remainder of GGTT, unaware that the start is reserved by WOPCM. + */ + if (start >= ggtt->start) + start -= ggtt->start; + else + start = 0; + + /* Should never happen, but since we handle start, fail graciously for end */ + if (end >= ggtt->start) + end -= ggtt->start; + else + end = 0; + + xe_tile_assert(ggtt->tile, end >= start + xe_bo_size(bo)); + err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node[tile_id]->base, xe_bo_size(bo), alignment, 0, start, end, 0); if (err) { - xe_ggtt_node_fini(bo->ggtt_node[tile_id]); + ggtt_node_fini(bo->ggtt_node[tile_id]); bo->ggtt_node[tile_id] = NULL; } else { u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB; @@ -1002,18 +947,16 @@ static u64 xe_encode_vfid_pte(u16 vfid) return FIELD_PREP(GGTT_PTE_VFID, vfid) | XE_PAGE_PRESENT; } -static void xe_ggtt_assign_locked(struct xe_ggtt *ggtt, const struct drm_mm_node *node, u16 vfid) +static void xe_ggtt_assign_locked(const struct xe_ggtt_node *node, u16 vfid) { - u64 start = node->start; - u64 size = node->size; + struct xe_ggtt *ggtt = node->ggtt; + u64 start = xe_ggtt_node_addr(node); + u64 size = xe_ggtt_node_size(node); u64 end = start + size - 1; u64 pte = xe_encode_vfid_pte(vfid); lockdep_assert_held(&ggtt->lock); - if (!drm_mm_node_allocated(node)) - return; - while (start < end) { ggtt->pt_ops->ggtt_set_pte(ggtt, start, pte); start += XE_PAGE_SIZE; @@ -1033,9 +976,8 @@ static void xe_ggtt_assign_locked(struct xe_ggtt *ggtt, const struct drm_mm_node */ void xe_ggtt_assign(const struct xe_ggtt_node *node, u16 vfid) { - mutex_lock(&node->ggtt->lock); - xe_ggtt_assign_locked(node->ggtt, &node->base, vfid); - mutex_unlock(&node->ggtt->lock); + guard(mutex)(&node->ggtt->lock); + xe_ggtt_assign_locked(node, vfid); } /** @@ -1057,14 +999,14 @@ int xe_ggtt_node_save(struct xe_ggtt_node *node, void *dst, size_t size, u16 vfi if (!node) return -ENOENT; - guard(mutex)(&node->ggtt->lock); + ggtt = node->ggtt; + guard(mutex)(&ggtt->lock); if (xe_ggtt_node_pt_size(node) != size) return -EINVAL; - ggtt = node->ggtt; - start = node->base.start; - end = start + node->base.size - 1; + start = xe_ggtt_node_addr(node); + end = start + xe_ggtt_node_size(node) - 1; while (start < end) { pte = ggtt->pt_ops->ggtt_get_pte(ggtt, start); @@ -1097,14 +1039,14 @@ int xe_ggtt_node_load(struct xe_ggtt_node *node, const void *src, size_t size, u if (!node) return -ENOENT; - guard(mutex)(&node->ggtt->lock); + ggtt = node->ggtt; + guard(mutex)(&ggtt->lock); if (xe_ggtt_node_pt_size(node) != size) return -EINVAL; - ggtt = node->ggtt; - start = node->base.start; - end = start + node->base.size - 1; + start = xe_ggtt_node_addr(node); + end = start + xe_ggtt_node_size(node) - 1; while (start < end) { vfid_pte = u64_replace_bits(*buf++, vfid, GGTT_PTE_VFID); @@ -1211,7 +1153,8 @@ u64 xe_ggtt_read_pte(struct xe_ggtt *ggtt, u64 offset) */ u64 xe_ggtt_node_addr(const struct xe_ggtt_node *node) { - return node->base.start; + /* pairs with WRITE_ONCE in xe_ggtt_shift_nodes() */ + return node->base.start + READ_ONCE(node->ggtt->start); } /** diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index 70d5e07ac4b6..c864cc975a69 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -9,6 +9,7 @@ #include "xe_ggtt_types.h" struct drm_printer; +struct xe_bo; struct xe_tile; struct drm_exec; @@ -17,23 +18,18 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt); int xe_ggtt_init_kunit(struct xe_ggtt *ggtt, u32 reserved, u32 size); int xe_ggtt_init(struct xe_ggtt *ggtt); -struct xe_ggtt_node *xe_ggtt_node_init(struct xe_ggtt *ggtt); -void xe_ggtt_node_fini(struct xe_ggtt_node *node); -int xe_ggtt_node_insert_balloon_locked(struct xe_ggtt_node *node, - u64 start, u64 size); -void xe_ggtt_node_remove_balloon_locked(struct xe_ggtt_node *node); -void xe_ggtt_shift_nodes_locked(struct xe_ggtt *ggtt, s64 shift); +void xe_ggtt_shift_nodes(struct xe_ggtt *ggtt, u64 new_base); u64 xe_ggtt_start(struct xe_ggtt *ggtt); u64 xe_ggtt_size(struct xe_ggtt *ggtt); -int xe_ggtt_node_insert(struct xe_ggtt_node *node, u32 size, u32 align); struct xe_ggtt_node * -xe_ggtt_node_insert_transform(struct xe_ggtt *ggtt, +xe_ggtt_insert_node(struct xe_ggtt *ggtt, u32 size, u32 align); +struct xe_ggtt_node * +xe_ggtt_insert_node_transform(struct xe_ggtt *ggtt, struct xe_bo *bo, u64 pte, u64 size, u32 align, xe_ggtt_transform_cb transform, void *arg); void xe_ggtt_node_remove(struct xe_ggtt_node *node, bool invalidate); -bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node); size_t xe_ggtt_node_pt_size(const struct xe_ggtt_node *node); void xe_ggtt_map_bo_unlocked(struct xe_ggtt *ggtt, struct xe_bo *bo); int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo, struct drm_exec *exec); diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h index d82b71a198bc..cf754e4d502a 100644 --- a/drivers/gpu/drm/xe/xe_ggtt_types.h +++ b/drivers/gpu/drm/xe/xe_ggtt_types.h @@ -6,72 +6,16 @@ #ifndef _XE_GGTT_TYPES_H_ #define _XE_GGTT_TYPES_H_ +#include #include -#include "xe_pt_types.h" - -struct xe_bo; +struct xe_ggtt; struct xe_ggtt_node; -struct xe_gt; - -/** - * struct xe_ggtt - Main GGTT struct - * - * In general, each tile can contains its own Global Graphics Translation Table - * (GGTT) instance. - */ -struct xe_ggtt { - /** @tile: Back pointer to tile where this GGTT belongs */ - struct xe_tile *tile; - /** @start: Start offset of GGTT */ - u64 start; - /** @size: Total usable size of this GGTT */ - u64 size; - -#define XE_GGTT_FLAGS_64K BIT(0) - /** - * @flags: Flags for this GGTT - * Acceptable flags: - * - %XE_GGTT_FLAGS_64K - if PTE size is 64K. Otherwise, regular is 4K. - */ - unsigned int flags; - /** @scratch: Internal object allocation used as a scratch page */ - struct xe_bo *scratch; - /** @lock: Mutex lock to protect GGTT data */ - struct mutex lock; - /** - * @gsm: The iomem pointer to the actual location of the translation - * table located in the GSM for easy PTE manipulation - */ - u64 __iomem *gsm; - /** @pt_ops: Page Table operations per platform */ - const struct xe_ggtt_pt_ops *pt_ops; - /** @mm: The memory manager used to manage individual GGTT allocations */ - struct drm_mm mm; - /** @access_count: counts GGTT writes */ - unsigned int access_count; - /** @wq: Dedicated unordered work queue to process node removals */ - struct workqueue_struct *wq; -}; typedef void (*xe_ggtt_set_pte_fn)(struct xe_ggtt *ggtt, u64 addr, u64 pte); typedef void (*xe_ggtt_transform_cb)(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, u64 pte_flags, xe_ggtt_set_pte_fn set_pte, void *arg); -/** - * struct xe_ggtt_pt_ops - GGTT Page table operations - * Which can vary from platform to platform. - */ -struct xe_ggtt_pt_ops { - /** @pte_encode_flags: Encode PTE flags for a given BO */ - u64 (*pte_encode_flags)(struct xe_bo *bo, u16 pat_index); - - /** @ggtt_set_pte: Directly write into GGTT's PTE */ - xe_ggtt_set_pte_fn ggtt_set_pte; - - /** @ggtt_get_pte: Directly read from GGTT's PTE */ - u64 (*ggtt_get_pte)(struct xe_ggtt *ggtt, u64 addr); -}; #endif diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c index 42438b21f235..707db650a2ae 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.c +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c @@ -435,15 +435,11 @@ static int proxy_channel_alloc(struct xe_gsc *gsc) return 0; } -static void xe_gsc_proxy_remove(void *arg) +static void xe_gsc_proxy_stop(struct xe_gsc *gsc) { - struct xe_gsc *gsc = arg; struct xe_gt *gt = gsc_to_gt(gsc); struct xe_device *xe = gt_to_xe(gt); - if (!gsc->proxy.component_added) - return; - /* disable HECI2 IRQs */ scoped_guard(xe_pm_runtime, xe) { CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GSC); @@ -455,6 +451,30 @@ static void xe_gsc_proxy_remove(void *arg) } xe_gsc_wait_for_worker_completion(gsc); + gsc->proxy.started = false; +} + +static void xe_gsc_proxy_remove(void *arg) +{ + struct xe_gsc *gsc = arg; + struct xe_gt *gt = gsc_to_gt(gsc); + struct xe_device *xe = gt_to_xe(gt); + + if (!gsc->proxy.component_added) + return; + + /* + * GSC proxy start is an async process that can be ongoing during + * Xe module load/unload. Using devm managed action to register + * xe_gsc_proxy_stop could cause issues if Xe module unload has + * already started when the action is registered, potentially leading + * to the cleanup being called at the wrong time. Therefore, instead + * of registering a separate devm action to undo what is done in + * proxy start, we call it from here, but only if the start has + * completed successfully (tracked with the 'started' flag). + */ + if (gsc->proxy.started) + xe_gsc_proxy_stop(gsc); component_del(xe->drm.dev, &xe_gsc_proxy_component_ops); gsc->proxy.component_added = false; @@ -510,6 +530,7 @@ int xe_gsc_proxy_init(struct xe_gsc *gsc) */ int xe_gsc_proxy_start(struct xe_gsc *gsc) { + struct xe_gt *gt = gsc_to_gt(gsc); int err; /* enable the proxy interrupt in the GSC shim layer */ @@ -521,12 +542,18 @@ int xe_gsc_proxy_start(struct xe_gsc *gsc) */ err = xe_gsc_proxy_request_handler(gsc); if (err) - return err; + goto err_irq_disable; if (!xe_gsc_proxy_init_done(gsc)) { - xe_gt_err(gsc_to_gt(gsc), "GSC FW reports proxy init not completed\n"); - return -EIO; + xe_gt_err(gt, "GSC FW reports proxy init not completed\n"); + err = -EIO; + goto err_irq_disable; } + gsc->proxy.started = true; return 0; + +err_irq_disable: + gsc_proxy_irq_toggle(gsc, false); + return err; } diff --git a/drivers/gpu/drm/xe/xe_gsc_types.h b/drivers/gpu/drm/xe/xe_gsc_types.h index 97c056656df0..5aaa2a75861f 100644 --- a/drivers/gpu/drm/xe/xe_gsc_types.h +++ b/drivers/gpu/drm/xe/xe_gsc_types.h @@ -58,6 +58,8 @@ struct xe_gsc { struct mutex mutex; /** @proxy.component_added: whether the component has been added */ bool component_added; + /** @proxy.started: whether the proxy has been started */ + bool started; /** @proxy.bo: object to store message to and from the GSC */ struct xe_bo *bo; /** @proxy.to_gsc: map of the memory used to send messages to the GSC */ diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index df6d04704823..b455af1e6072 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -33,6 +33,7 @@ #include "xe_gt_printk.h" #include "xe_gt_sriov_pf.h" #include "xe_gt_sriov_vf.h" +#include "xe_gt_stats.h" #include "xe_gt_sysfs.h" #include "xe_gt_topology.h" #include "xe_guc_exec_queue_types.h" @@ -141,15 +142,14 @@ static void xe_gt_disable_host_l2_vram(struct xe_gt *gt) static void xe_gt_enable_comp_1wcoh(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); - unsigned int fw_ref; u32 reg; if (IS_SRIOV_VF(xe)) return; if (GRAPHICS_VER(xe) >= 30 && xe->info.has_flat_ccs) { - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (!fw_ref) + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref.domains) return; reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL); @@ -163,8 +163,6 @@ static void xe_gt_enable_comp_1wcoh(struct xe_gt *gt) reg |= EN_CMP_1WCOH_GW; xe_gt_mcr_multicast_write(gt, XE2_GAMWALK_CTRL_3D, reg); } - - xe_force_wake_put(gt_to_fw(gt), fw_ref); } } @@ -500,6 +498,10 @@ int xe_gt_init_early(struct xe_gt *gt) if (err) return err; + err = xe_gt_stats_init(gt); + if (err) + return err; + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT); if (!fw_ref.domains) return -ETIMEDOUT; @@ -894,7 +896,6 @@ static void gt_reset_worker(struct work_struct *w) if (IS_SRIOV_PF(gt_to_xe(gt))) xe_gt_sriov_pf_stop_prepare(gt); - xe_uc_gucrc_disable(>->uc); xe_uc_stop_prepare(>->uc); xe_pagefault_reset(gt_to_xe(gt), gt); diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c index fe944687728c..b35be36b0eaa 100644 --- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c +++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c @@ -13,6 +13,7 @@ #include "xe_gt_sysfs.h" #include "xe_mmio.h" #include "xe_sriov.h" +#include "xe_sriov_pf.h" static void __xe_gt_apply_ccs_mode(struct xe_gt *gt, u32 num_engines) { @@ -88,6 +89,11 @@ void xe_gt_apply_ccs_mode(struct xe_gt *gt) __xe_gt_apply_ccs_mode(gt, gt->ccs_mode); } +static bool gt_ccs_mode_default(struct xe_gt *gt) +{ + return gt->ccs_mode == 1; +} + static ssize_t num_cslices_show(struct device *kdev, struct device_attribute *attr, char *buf) @@ -117,12 +123,6 @@ ccs_mode_store(struct device *kdev, struct device_attribute *attr, u32 num_engines, num_slices; int ret; - if (IS_SRIOV(xe)) { - xe_gt_dbg(gt, "Can't change compute mode when running as %s\n", - xe_sriov_mode_to_string(xe_device_sriov_mode(xe))); - return -EOPNOTSUPP; - } - ret = kstrtou32(buff, 0, &num_engines); if (ret) return ret; @@ -139,21 +139,35 @@ ccs_mode_store(struct device *kdev, struct device_attribute *attr, } /* CCS mode can only be updated when there are no drm clients */ - mutex_lock(&xe->drm.filelist_mutex); + guard(mutex)(&xe->drm.filelist_mutex); if (!list_empty(&xe->drm.filelist)) { - mutex_unlock(&xe->drm.filelist_mutex); xe_gt_dbg(gt, "Rejecting compute mode change as there are active drm clients\n"); return -EBUSY; } - if (gt->ccs_mode != num_engines) { - xe_gt_info(gt, "Setting compute mode to %d\n", num_engines); - gt->ccs_mode = num_engines; - xe_gt_record_user_engines(gt); - xe_gt_reset(gt); + if (gt->ccs_mode == num_engines) + return count; + + /* + * Changing default CCS mode is only allowed when there + * are no VFs. Try to lockdown PF to find out. + */ + if (gt_ccs_mode_default(gt) && IS_SRIOV_PF(xe)) { + ret = xe_sriov_pf_lockdown(xe); + if (ret) { + xe_gt_dbg(gt, "Can't change CCS Mode: VFs are enabled\n"); + return ret; + } } - mutex_unlock(&xe->drm.filelist_mutex); + xe_gt_info(gt, "Setting compute mode to %d\n", num_engines); + gt->ccs_mode = num_engines; + xe_gt_record_user_engines(gt); + xe_gt_reset(gt); + + /* We may end PF lockdown once CCS mode is default again */ + if (gt_ccs_mode_default(gt) && IS_SRIOV_PF(xe)) + xe_sriov_pf_end_lockdown(xe); return count; } diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index 4363bc9c3606..f45306308cd6 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -155,6 +155,30 @@ static int register_save_restore(struct xe_gt *gt, struct drm_printer *p) return 0; } +/* + * Check the registers referenced on a save-restore list and report any + * save-restore entries that did not get applied. + */ +static int register_save_restore_check(struct xe_gt *gt, struct drm_printer *p) +{ + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)) { + drm_printf(p, "ERROR: Could not acquire forcewake\n"); + return -ETIMEDOUT; + } + + xe_reg_sr_readback_check(>->reg_sr, gt, p); + for_each_hw_engine(hwe, gt, id) + xe_reg_sr_readback_check(&hwe->reg_sr, gt, p); + for_each_hw_engine(hwe, gt, id) + xe_reg_sr_lrc_check(&hwe->reg_lrc, gt, hwe, p); + + return 0; +} + static int rcs_default_lrc(struct xe_gt *gt, struct drm_printer *p) { xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_RENDER); @@ -209,6 +233,8 @@ static const struct drm_info_list vf_safe_debugfs_list[] = { { "default_lrc_vecs", .show = xe_gt_debugfs_show_with_rpm, .data = vecs_default_lrc }, { "hwconfig", .show = xe_gt_debugfs_show_with_rpm, .data = hwconfig }, { "pat_sw_config", .show = xe_gt_debugfs_simple_show, .data = xe_pat_dump_sw_config }, + { "register-save-restore-check", + .show = xe_gt_debugfs_show_with_rpm, .data = register_save_restore_check }, }; /* everything else should be added here */ diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index 7a569e1730a4..4a2d9edb6a4c 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -168,6 +168,24 @@ void xe_gt_idle_disable_pg(struct xe_gt *gt) xe_mmio_write32(>->mmio, POWERGATE_ENABLE, gtidle->powergate_enable); } +static void force_wake_domains_show(struct xe_gt *gt, struct drm_printer *p) +{ + struct xe_force_wake_domain *domain; + struct xe_force_wake *fw = gt_to_fw(gt); + unsigned int tmp; + unsigned long flags; + + spin_lock_irqsave(&fw->lock, flags); + for_each_fw_domain(domain, fw, tmp) { + drm_printf(p, "%s.ref_count=%u, %s.fwake=0x%x\n", + xe_force_wake_domain_to_str(domain->id), + READ_ONCE(domain->ref), + xe_force_wake_domain_to_str(domain->id), + xe_mmio_read32(>->mmio, domain->reg_ctl)); + } + spin_unlock_irqrestore(&fw->lock, flags); +} + /** * xe_gt_idle_pg_print - Xe powergating info * @gt: GT object @@ -254,6 +272,13 @@ int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p) drm_printf(p, "Media Samplers Power Gating Enabled: %s\n", str_yes_no(pg_enabled & MEDIA_SAMPLERS_POWERGATE_ENABLE)); + if (gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0)) { + drm_printf(p, "GSC Power Gate Status: %s\n", + str_up_down(pg_status & GSC_AWAKE_STATUS)); + } + + force_wake_domains_show(gt, p); + return 0; } diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 7c1fe9ac120d..7c6f039c880d 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -201,7 +201,7 @@ static const struct xe_mmio_range xe2lpg_dss_steering_table[] = { { 0x009680, 0x0096FF }, /* DSS */ { 0x00D800, 0x00D87F }, /* SLICE */ { 0x00DC00, 0x00DCFF }, /* SLICE */ - { 0x00DE80, 0x00E8FF }, /* DSS (0xE000-0xE0FF reserved) */ + { 0x00DE00, 0x00E8FF }, /* DSS (0xE000-0xE0FF reserved) */ { 0x00E980, 0x00E9FF }, /* SLICE */ { 0x013000, 0x0133FF }, /* DSS (0x13000-0x131FF), SLICE (0x13200-0x133FF) */ {}, @@ -280,6 +280,19 @@ static const struct xe_mmio_range xe3p_xpc_instance0_steering_table[] = { {}, }; +static const struct xe_mmio_range xe3p_lpg_instance0_steering_table[] = { + { 0x004000, 0x004AFF }, /* GAM, rsvd, GAMWKR */ + { 0x008700, 0x00887F }, /* NODE */ + { 0x00B000, 0x00B3FF }, /* NODE, L3BANK */ + { 0x00B500, 0x00B6FF }, /* PSMI */ + { 0x00C800, 0x00CFFF }, /* GAM */ + { 0x00D880, 0x00D8FF }, /* NODE */ + { 0x00DD00, 0x00DD7F }, /* MEMPIPE */ + { 0x00F000, 0x00FFFF }, /* GAM, GAMWKR */ + { 0x013400, 0x0135FF }, /* MEMPIPE */ + {}, +}; + static void init_steering_l3bank(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); @@ -505,9 +518,6 @@ void xe_gt_mcr_init_early(struct xe_gt *gt) spin_lock_init(>->mcr_lock); - if (IS_SRIOV_VF(xe)) - return; - if (gt->info.type == XE_GT_TYPE_MEDIA) { drm_WARN_ON(&xe->drm, MEDIA_VER(xe) < 13); @@ -522,17 +532,14 @@ void xe_gt_mcr_init_early(struct xe_gt *gt) } } else { if (GRAPHICS_VERx100(xe) == 3511) { - /* - * TODO: there are some ranges in bspec with missing - * termination: [0x00B000, 0x00B0FF] and - * [0x00D880, 0x00D8FF] (NODE); [0x00B100, 0x00B3FF] - * (L3BANK). Update them here once bspec is updated. - */ gt->steering[DSS].ranges = xe3p_xpc_xecore_steering_table; gt->steering[GAM1].ranges = xe3p_xpc_gam_grp1_steering_table; gt->steering[INSTANCE0].ranges = xe3p_xpc_instance0_steering_table; gt->steering[L3BANK].ranges = xelpg_l3bank_steering_table; gt->steering[NODE].ranges = xe3p_xpc_node_steering_table; + } else if (GRAPHICS_VERx100(xe) >= 3510) { + gt->steering[DSS].ranges = xe2lpg_dss_steering_table; + gt->steering[INSTANCE0].ranges = xe3p_lpg_instance0_steering_table; } else if (GRAPHICS_VER(xe) >= 20) { gt->steering[DSS].ranges = xe2lpg_dss_steering_table; gt->steering[SQIDI_PSMI].ranges = xe2lpg_sqidi_psmi_steering_table; @@ -568,9 +575,6 @@ void xe_gt_mcr_init_early(struct xe_gt *gt) */ void xe_gt_mcr_init(struct xe_gt *gt) { - if (IS_SRIOV_VF(gt_to_xe(gt))) - return; - /* Select non-terminated steering target for each type */ for (int i = 0; i < NUM_STEERING_TYPES; i++) { gt->steering[i].initialized = true; diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 23601ce79348..b867203b4997 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -279,7 +279,7 @@ static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config, { struct xe_ggtt_node *node = config->ggtt_region; - if (!xe_ggtt_node_allocated(node)) + if (!node) return 0; return encode_ggtt(cfg, xe_ggtt_node_addr(node), xe_ggtt_node_size(node), details); @@ -482,23 +482,9 @@ static int pf_distribute_config_ggtt(struct xe_tile *tile, unsigned int vfid, u6 return err ?: err2; } -static void pf_release_ggtt(struct xe_tile *tile, struct xe_ggtt_node *node) -{ - if (xe_ggtt_node_allocated(node)) { - /* - * explicit GGTT PTE assignment to the PF using xe_ggtt_assign() - * is redundant, as PTE will be implicitly re-assigned to PF by - * the xe_ggtt_clear() called by below xe_ggtt_remove_node(). - */ - xe_ggtt_node_remove(node, false); - } else { - xe_ggtt_node_fini(node); - } -} - static void pf_release_vf_config_ggtt(struct xe_gt *gt, struct xe_gt_sriov_config *config) { - pf_release_ggtt(gt_to_tile(gt), config->ggtt_region); + xe_ggtt_node_remove(config->ggtt_region, false); config->ggtt_region = NULL; } @@ -517,7 +503,7 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) size = round_up(size, alignment); - if (xe_ggtt_node_allocated(config->ggtt_region)) { + if (config->ggtt_region) { err = pf_distribute_config_ggtt(tile, vfid, 0, 0); if (unlikely(err)) return err; @@ -528,19 +514,15 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) if (unlikely(err)) return err; } - xe_gt_assert(gt, !xe_ggtt_node_allocated(config->ggtt_region)); + xe_gt_assert(gt, !config->ggtt_region); if (!size) return 0; - node = xe_ggtt_node_init(ggtt); + node = xe_ggtt_insert_node(ggtt, size, alignment); if (IS_ERR(node)) return PTR_ERR(node); - err = xe_ggtt_node_insert(node, size, alignment); - if (unlikely(err)) - goto err; - xe_ggtt_assign(node, vfid); xe_gt_sriov_dbg_verbose(gt, "VF%u assigned GGTT %llx-%llx\n", vfid, xe_ggtt_node_addr(node), xe_ggtt_node_addr(node) + size - 1); @@ -552,7 +534,7 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) config->ggtt_region = node; return 0; err: - pf_release_ggtt(tile, node); + xe_ggtt_node_remove(node, false); return err; } @@ -562,7 +544,7 @@ static u64 pf_get_vf_config_ggtt(struct xe_gt *gt, unsigned int vfid) struct xe_ggtt_node *node = config->ggtt_region; xe_gt_assert(gt, xe_gt_is_main_type(gt)); - return xe_ggtt_node_allocated(node) ? xe_ggtt_node_size(node) : 0; + return node ? xe_ggtt_node_size(node) : 0; } /** @@ -1469,8 +1451,8 @@ int xe_gt_sriov_pf_config_set_fair_dbs(struct xe_gt *gt, unsigned int vfid, static u64 pf_get_lmem_alignment(struct xe_gt *gt) { - /* this might be platform dependent */ - return SZ_2M; + return xe_device_has_lmtt(gt_to_xe(gt)) ? + xe_lmtt_page_size(>_to_tile(gt)->sriov.pf.lmtt) : XE_PAGE_SIZE; } static u64 pf_get_min_spare_lmem(struct xe_gt *gt) @@ -1645,13 +1627,15 @@ static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) struct xe_device *xe = gt_to_xe(gt); struct xe_tile *tile = gt_to_tile(gt); struct xe_bo *bo; + u64 alignment; int err; xe_gt_assert(gt, vfid); xe_gt_assert(gt, IS_DGFX(xe)); xe_gt_assert(gt, xe_gt_is_main_type(gt)); - size = round_up(size, pf_get_lmem_alignment(gt)); + alignment = pf_get_lmem_alignment(gt); + size = round_up(size, alignment); if (config->lmem_obj) { err = pf_distribute_config_lmem(gt, vfid, 0); @@ -1667,12 +1651,12 @@ static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) if (!size) return 0; - xe_gt_assert(gt, pf_get_lmem_alignment(gt) == SZ_2M); + xe_gt_assert(gt, alignment == XE_PAGE_SIZE || alignment == SZ_2M); bo = xe_bo_create_pin_range_novm(xe, tile, ALIGN(size, PAGE_SIZE), 0, ~0ull, ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_NEEDS_2M | + XE_BO_FLAG_VRAM(tile->mem.vram) | + (alignment == SZ_2M ? XE_BO_FLAG_NEEDS_2M : 0) | XE_BO_FLAG_PINNED | XE_BO_FLAG_PINNED_LATE_RESTORE | XE_BO_FLAG_FORCE_USER_VRAM); @@ -1754,7 +1738,44 @@ int xe_gt_sriov_pf_config_set_lmem(struct xe_gt *gt, unsigned int vfid, u64 size } /** - * xe_gt_sriov_pf_config_bulk_set_lmem - Provision many VFs with LMEM. + * xe_gt_sriov_pf_config_bulk_set_lmem_locked() - Provision many VFs with LMEM. + * @gt: the &xe_gt (can't be media) + * @vfid: starting VF identifier (can't be 0) + * @num_vfs: number of VFs to provision + * @size: requested LMEM size + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_bulk_set_lmem_locked(struct xe_gt *gt, unsigned int vfid, + unsigned int num_vfs, u64 size) +{ + unsigned int n; + int err = 0; + + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + xe_gt_assert(gt, xe_device_has_lmtt(gt_to_xe(gt))); + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); + xe_gt_assert(gt, vfid); + + if (!num_vfs) + return 0; + + for (n = vfid; n < vfid + num_vfs; n++) { + err = pf_provision_vf_lmem(gt, n, size); + if (err) + break; + } + + return pf_config_bulk_set_u64_done(gt, vfid, num_vfs, size, + pf_get_vf_config_lmem, + "LMEM", n, err); +} + +/** + * xe_gt_sriov_pf_config_bulk_set_lmem() - Provision many VFs with LMEM. * @gt: the &xe_gt (can't be media) * @vfid: starting VF identifier (can't be 0) * @num_vfs: number of VFs to provision @@ -1767,26 +1788,52 @@ int xe_gt_sriov_pf_config_set_lmem(struct xe_gt *gt, unsigned int vfid, u64 size int xe_gt_sriov_pf_config_bulk_set_lmem(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs, u64 size) { - unsigned int n; - int err = 0; + guard(mutex)(xe_gt_sriov_pf_master_mutex(gt)); + return xe_gt_sriov_pf_config_bulk_set_lmem_locked(gt, vfid, num_vfs, size); +} + +/** + * xe_gt_sriov_pf_config_get_lmem_locked() - Get VF's LMEM quota. + * @gt: the &xe_gt + * @vfid: the VF identifier (can't be 0 == PFID) + * + * This function can only be called on PF. + * + * Return: VF's LMEM quota. + */ +u64 xe_gt_sriov_pf_config_get_lmem_locked(struct xe_gt *gt, unsigned int vfid) +{ + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); xe_gt_assert(gt, vfid); + + return pf_get_vf_config_lmem(gt, vfid); +} + +/** + * xe_gt_sriov_pf_config_set_lmem_locked() - Provision VF with LMEM. + * @gt: the &xe_gt (can't be media) + * @vfid: the VF identifier (can't be 0 == PFID) + * @size: requested LMEM size + * + * This function can only be called on PF. + */ +int xe_gt_sriov_pf_config_set_lmem_locked(struct xe_gt *gt, unsigned int vfid, u64 size) +{ + int err; + + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + xe_gt_assert(gt, xe_device_has_lmtt(gt_to_xe(gt))); + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); xe_gt_assert(gt, xe_gt_is_main_type(gt)); + xe_gt_assert(gt, vfid); - if (!num_vfs) - return 0; + err = pf_provision_vf_lmem(gt, vfid, size); - mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); - for (n = vfid; n < vfid + num_vfs; n++) { - err = pf_provision_vf_lmem(gt, n, size); - if (err) - break; - } - mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); - - return pf_config_bulk_set_u64_done(gt, vfid, num_vfs, size, - xe_gt_sriov_pf_config_get_lmem, - "LMEM", n, err); + return pf_config_set_u64_done(gt, vfid, size, + pf_get_vf_config_lmem(gt, vfid), + "LMEM", err); } static struct xe_bo *pf_get_vf_config_lmem_obj(struct xe_gt *gt, unsigned int vfid) @@ -1856,6 +1903,81 @@ static u64 pf_estimate_fair_lmem(struct xe_gt *gt, unsigned int num_vfs) return fair; } +static u64 pf_profile_fair_lmem(struct xe_gt *gt, unsigned int num_vfs) +{ + struct xe_tile *tile = gt_to_tile(gt); + bool admin_only_pf = xe_sriov_pf_admin_only(tile->xe); + u64 usable = xe_vram_region_usable_size(tile->mem.vram); + u64 spare = pf_get_min_spare_lmem(gt); + u64 available = usable > spare ? usable - spare : 0; + u64 shareable = ALIGN_DOWN(available, SZ_1G); + u64 alignment = pf_get_lmem_alignment(gt); + u64 fair; + + if (admin_only_pf) + fair = div_u64(shareable, num_vfs); + else + fair = div_u64(shareable, 1 + num_vfs); + + if (!admin_only_pf && fair) + fair = rounddown_pow_of_two(fair); + + return ALIGN_DOWN(fair, alignment); +} + +static void __pf_show_provisioning_lmem(struct xe_gt *gt, unsigned int first_vf, + unsigned int num_vfs, bool provisioned) +{ + unsigned int allvfs = 1 + xe_gt_sriov_pf_get_totalvfs(gt); /* PF plus VFs */ + unsigned long *bitmap __free(bitmap) = bitmap_zalloc(allvfs, GFP_KERNEL); + unsigned int weight; + unsigned int n; + + if (!bitmap) + return; + + for (n = first_vf; n < first_vf + num_vfs; n++) { + if (!!pf_get_vf_config_lmem(gt, VFID(n)) == provisioned) + bitmap_set(bitmap, n, 1); + } + + weight = bitmap_weight(bitmap, allvfs); + if (!weight) + return; + + xe_gt_sriov_info(gt, "VF%s%*pbl %s provisioned with VRAM\n", + weight > 1 ? "s " : "", allvfs, bitmap, + provisioned ? "already" : "not"); +} + +static void pf_show_all_provisioned_lmem(struct xe_gt *gt) +{ + __pf_show_provisioning_lmem(gt, VFID(1), xe_gt_sriov_pf_get_totalvfs(gt), true); +} + +static void pf_show_unprovisioned_lmem(struct xe_gt *gt, unsigned int first_vf, + unsigned int num_vfs) +{ + __pf_show_provisioning_lmem(gt, first_vf, num_vfs, false); +} + +static bool pf_needs_provision_lmem(struct xe_gt *gt, unsigned int first_vf, + unsigned int num_vfs) +{ + unsigned int vfid; + + for (vfid = first_vf; vfid < first_vf + num_vfs; vfid++) { + if (pf_get_vf_config_lmem(gt, vfid)) { + pf_show_all_provisioned_lmem(gt); + pf_show_unprovisioned_lmem(gt, first_vf, num_vfs); + return false; + } + } + + pf_show_all_provisioned_lmem(gt); + return true; +} + /** * xe_gt_sriov_pf_config_set_fair_lmem - Provision many VFs with fair LMEM. * @gt: the &xe_gt (can't be media) @@ -1869,6 +1991,7 @@ static u64 pf_estimate_fair_lmem(struct xe_gt *gt, unsigned int num_vfs) int xe_gt_sriov_pf_config_set_fair_lmem(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs) { + u64 profile; u64 fair; xe_gt_assert(gt, vfid); @@ -1878,14 +2001,22 @@ int xe_gt_sriov_pf_config_set_fair_lmem(struct xe_gt *gt, unsigned int vfid, if (!xe_device_has_lmtt(gt_to_xe(gt))) return 0; - mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); - fair = pf_estimate_fair_lmem(gt, num_vfs); - mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + guard(mutex)(xe_gt_sriov_pf_master_mutex(gt)); + if (!pf_needs_provision_lmem(gt, vfid, num_vfs)) + return 0; + + fair = pf_estimate_fair_lmem(gt, num_vfs); if (!fair) return -ENOSPC; - return xe_gt_sriov_pf_config_bulk_set_lmem(gt, vfid, num_vfs, fair); + profile = pf_profile_fair_lmem(gt, num_vfs); + fair = min(fair, profile); + if (fair < profile) + xe_gt_sriov_info(gt, "Using non-profile provisioning (%s %llu vs %llu)\n", + "VRAM", fair, profile); + + return xe_gt_sriov_pf_config_bulk_set_lmem_locked(gt, vfid, num_vfs, fair); } /** @@ -2576,7 +2707,7 @@ int xe_gt_sriov_pf_config_release(struct xe_gt *gt, unsigned int vfid, bool forc static void pf_sanitize_ggtt(struct xe_ggtt_node *ggtt_region, unsigned int vfid) { - if (xe_ggtt_node_allocated(ggtt_region)) + if (ggtt_region) xe_ggtt_assign(ggtt_region, vfid); } @@ -3035,7 +3166,7 @@ int xe_gt_sriov_pf_config_print_ggtt(struct xe_gt *gt, struct drm_printer *p) for (n = 1; n <= total_vfs; n++) { config = >->sriov.pf.vfs[n].config; - if (!xe_ggtt_node_allocated(config->ggtt_region)) + if (!config->ggtt_region) continue; string_get_size(xe_ggtt_node_size(config->ggtt_region), 1, STRING_UNITS_2, diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h index 3c6c8b6655af..4a004ecd6140 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h @@ -36,6 +36,10 @@ int xe_gt_sriov_pf_config_set_lmem(struct xe_gt *gt, unsigned int vfid, u64 size int xe_gt_sriov_pf_config_set_fair_lmem(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs); int xe_gt_sriov_pf_config_bulk_set_lmem(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs, u64 size); +u64 xe_gt_sriov_pf_config_get_lmem_locked(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_config_set_lmem_locked(struct xe_gt *gt, unsigned int vfid, u64 size); +int xe_gt_sriov_pf_config_bulk_set_lmem_locked(struct xe_gt *gt, unsigned int vfid, + unsigned int num_vfs, u64 size); struct xe_bo *xe_gt_sriov_pf_config_get_lmem_obj(struct xe_gt *gt, unsigned int vfid); u32 xe_gt_sriov_pf_config_get_exec_quantum(struct xe_gt *gt, unsigned int vfid); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c index bf48b05797de..5cb705c7ee7a 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c @@ -1259,7 +1259,7 @@ int xe_gt_sriov_pf_control_process_restore_data(struct xe_gt *gt, unsigned int v } /** - * xe_gt_sriov_pf_control_trigger restore_vf() - Start an SR-IOV VF migration data restore sequence. + * xe_gt_sriov_pf_control_trigger_restore_vf() - Start an SR-IOV VF migration data restore sequence. * @gt: the &xe_gt * @vfid: the VF identifier * diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c index b5e0a5b7723e..6586df2fcb91 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c @@ -111,6 +111,8 @@ static const struct xe_reg ver_35_runtime_regs[] = { XE2_GT_COMPUTE_DSS_2, /* _MMIO(0x914c) */ XE2_GT_GEOMETRY_DSS_1, /* _MMIO(0x9150) */ XE2_GT_GEOMETRY_DSS_2, /* _MMIO(0x9154) */ + XE3P_XPC_GT_GEOMETRY_DSS_3, /* _MMIO(0x915c) */ + XE3P_XPC_GT_COMPUTE_DSS_3, /* _MMIO(0x9160) */ SERVICE_COPY_ENABLE, /* _MMIO(0x9170) */ }; diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 30e8c2cf5f09..8989c8e1be95 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -488,16 +488,12 @@ u32 xe_gt_sriov_vf_gmdid(struct xe_gt *gt) static int vf_get_ggtt_info(struct xe_gt *gt) { struct xe_tile *tile = gt_to_tile(gt); - struct xe_ggtt *ggtt = tile->mem.ggtt; struct xe_guc *guc = >->uc.guc; u64 start, size, ggtt_size; - s64 shift; int err; xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); - guard(mutex)(&ggtt->lock); - err = guc_action_query_single_klv64(guc, GUC_KLV_VF_CFG_GGTT_START_KEY, &start); if (unlikely(err)) return err; @@ -509,8 +505,21 @@ static int vf_get_ggtt_info(struct xe_gt *gt) if (!size) return -ENODATA; + xe_tile_sriov_vf_ggtt_base_store(tile, start); ggtt_size = xe_tile_sriov_vf_ggtt(tile); - if (ggtt_size && ggtt_size != size) { + if (!ggtt_size) { + /* + * This function is called once during xe_guc_init_noalloc(), + * at which point ggtt_size = 0 and we have to initialize everything, + * and GGTT is not yet initialized. + * + * Return early as there's nothing to fixup. + */ + xe_tile_sriov_vf_ggtt_store(tile, size); + return 0; + } + + if (ggtt_size != size) { xe_gt_sriov_err(gt, "Unexpected GGTT reassignment: %lluK != %lluK\n", size / SZ_1K, ggtt_size / SZ_1K); return -EREMCHG; @@ -519,21 +528,13 @@ static int vf_get_ggtt_info(struct xe_gt *gt) xe_gt_sriov_dbg_verbose(gt, "GGTT %#llx-%#llx = %lluK\n", start, start + size - 1, size / SZ_1K); - shift = start - (s64)xe_tile_sriov_vf_ggtt_base(tile); - xe_tile_sriov_vf_ggtt_base_store(tile, start); - xe_tile_sriov_vf_ggtt_store(tile, size); - - if (shift && shift != start) { - xe_gt_sriov_info(gt, "Shifting GGTT base by %lld to 0x%016llx\n", - shift, start); - xe_tile_sriov_vf_fixup_ggtt_nodes_locked(gt_to_tile(gt), shift); - } - - if (xe_sriov_vf_migration_supported(gt_to_xe(gt))) { - WRITE_ONCE(gt->sriov.vf.migration.ggtt_need_fixes, false); - smp_wmb(); /* Ensure above write visible before wake */ - wake_up_all(>->sriov.vf.migration.wq); - } + /* + * This function can be called repeatedly from post migration fixups, + * at which point we inform the GGTT of the new base address. + * xe_ggtt_shift_nodes() may be called multiple times for each migration, + * but will be a noop if the base is unchanged. + */ + xe_ggtt_shift_nodes(tile->mem.ggtt, start); return 0; } @@ -839,6 +840,13 @@ static void xe_gt_sriov_vf_default_lrcs_hwsp_rebase(struct xe_gt *gt) xe_default_lrc_update_memirq_regs_with_address(hwe); } +static void vf_post_migration_mark_fixups_done(struct xe_gt *gt) +{ + WRITE_ONCE(gt->sriov.vf.migration.ggtt_need_fixes, false); + smp_wmb(); /* Ensure above write visible before wake */ + wake_up_all(>->sriov.vf.migration.wq); +} + static void vf_start_migration_recovery(struct xe_gt *gt) { bool started; @@ -1269,6 +1277,8 @@ static int vf_post_migration_fixups(struct xe_gt *gt) if (err) return err; + atomic_inc(>->sriov.vf.migration.fixups_complete_count); + return 0; } @@ -1373,6 +1383,7 @@ static void vf_post_migration_recovery(struct xe_gt *gt) if (err) goto fail; + vf_post_migration_mark_fixups_done(gt); vf_post_migration_rearm(gt); err = vf_post_migration_resfix_done(gt, marker); @@ -1507,19 +1518,49 @@ static bool vf_valid_ggtt(struct xe_gt *gt) } /** - * xe_gt_sriov_vf_wait_valid_ggtt() - VF wait for valid GGTT addresses - * @gt: the &xe_gt + * xe_vf_migration_fixups_complete_count() - Get count of VF fixups completions. + * @gt: the &xe_gt instance which contains affected Global GTT + * + * Return: number of times VF fixups were completed since driver + * probe, or 0 if migration is not available, or -1 if fixups are + * pending or being applied right now. */ -void xe_gt_sriov_vf_wait_valid_ggtt(struct xe_gt *gt) +int xe_vf_migration_fixups_complete_count(struct xe_gt *gt) +{ + if (!IS_SRIOV_VF(gt_to_xe(gt)) || + !xe_sriov_vf_migration_supported(gt_to_xe(gt))) + return 0; + + /* should never match fixups_complete_count value */ + if (!vf_valid_ggtt(gt)) + return -1; + + return atomic_read(>->sriov.vf.migration.fixups_complete_count); +} + +/** + * xe_gt_sriov_vf_wait_valid_ggtt() - wait for valid GGTT nodes and address refs + * @gt: the &xe_gt instance which contains affected Global GTT + * + * Return: number of times VF fixups were completed since driver + * probe, or 0 if migration is not available. + */ +int xe_gt_sriov_vf_wait_valid_ggtt(struct xe_gt *gt) { int ret; + /* + * this condition needs to be identical to one in + * xe_vf_migration_fixups_complete_count() + */ if (!IS_SRIOV_VF(gt_to_xe(gt)) || !xe_sriov_vf_migration_supported(gt_to_xe(gt))) - return; + return 0; ret = wait_event_interruptible_timeout(gt->sriov.vf.migration.wq, vf_valid_ggtt(gt), HZ * 5); xe_gt_WARN_ON(gt, !ret); + + return atomic_read(>->sriov.vf.migration.fixups_complete_count); } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h index 7d97189c2d3d..a6f7127521a5 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h @@ -39,6 +39,7 @@ void xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p); void xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p); void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p); -void xe_gt_sriov_vf_wait_valid_ggtt(struct xe_gt *gt); +int xe_gt_sriov_vf_wait_valid_ggtt(struct xe_gt *gt); +int xe_vf_migration_fixups_complete_count(struct xe_gt *gt); #endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h index 4ef881b9b662..80562ffadb16 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h @@ -54,6 +54,8 @@ struct xe_gt_sriov_vf_migration { wait_queue_head_t wq; /** @scratch: Scratch memory for VF recovery */ void *scratch; + /** @fixups_complete_count: Counts completed fixups stages */ + atomic_t fixups_complete_count; /** @debug: Debug hooks for delaying migration */ struct { /** @@ -73,7 +75,7 @@ struct xe_gt_sriov_vf_migration { bool recovery_queued; /** @recovery_inprogress: VF post migration recovery in progress */ bool recovery_inprogress; - /** @ggtt_need_fixes: VF GGTT needs fixes */ + /** @ggtt_need_fixes: VF GGTT and references to it need fixes */ bool ggtt_need_fixes; }; diff --git a/drivers/gpu/drm/xe/xe_gt_stats.c b/drivers/gpu/drm/xe/xe_gt_stats.c index 37506434d7a3..81cec441b449 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats.c +++ b/drivers/gpu/drm/xe/xe_gt_stats.c @@ -3,12 +3,37 @@ * Copyright © 2024 Intel Corporation */ -#include - +#include #include +#include "xe_device.h" #include "xe_gt_stats.h" -#include "xe_gt_types.h" + +static void xe_gt_stats_fini(struct drm_device *drm, void *arg) +{ + struct xe_gt *gt = arg; + + free_percpu(gt->stats); +} + +/** + * xe_gt_stats_init() - Initialize GT statistics + * @gt: GT structure + * + * Allocate per-CPU GT statistics. Using per-CPU stats allows increments + * to occur without cross-CPU atomics. + * + * Return: 0 on success, -ENOMEM on failure. + */ +int xe_gt_stats_init(struct xe_gt *gt) +{ + gt->stats = alloc_percpu(struct xe_gt_stats); + if (!gt->stats) + return -ENOMEM; + + return drmm_add_action_or_reset(>_to_xe(gt)->drm, xe_gt_stats_fini, + gt); +} /** * xe_gt_stats_incr - Increments the specified stats counter @@ -23,7 +48,7 @@ void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr) if (id >= __XE_GT_STATS_NUM_IDS) return; - atomic64_add(incr, >->stats.counters[id]); + this_cpu_add(gt->stats->counters[id], incr); } #define DEF_STAT_STR(ID, name) [XE_GT_STATS_ID_##ID] = name @@ -35,6 +60,7 @@ static const char *const stat_description[__XE_GT_STATS_NUM_IDS] = { DEF_STAT_STR(SVM_TLB_INVAL_US, "svm_tlb_inval_us"), DEF_STAT_STR(VMA_PAGEFAULT_COUNT, "vma_pagefault_count"), DEF_STAT_STR(VMA_PAGEFAULT_KB, "vma_pagefault_kb"), + DEF_STAT_STR(INVALID_PREFETCH_PAGEFAULT_COUNT, "invalid_prefetch_pagefault_count"), DEF_STAT_STR(SVM_4K_PAGEFAULT_COUNT, "svm_4K_pagefault_count"), DEF_STAT_STR(SVM_64K_PAGEFAULT_COUNT, "svm_64K_pagefault_count"), DEF_STAT_STR(SVM_2M_PAGEFAULT_COUNT, "svm_2M_pagefault_count"), @@ -94,23 +120,37 @@ int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p) { enum xe_gt_stats_id id; - for (id = 0; id < __XE_GT_STATS_NUM_IDS; ++id) - drm_printf(p, "%s: %lld\n", stat_description[id], - atomic64_read(>->stats.counters[id])); + for (id = 0; id < __XE_GT_STATS_NUM_IDS; ++id) { + u64 total = 0; + int cpu; + + for_each_possible_cpu(cpu) { + struct xe_gt_stats *s = per_cpu_ptr(gt->stats, cpu); + + total += s->counters[id]; + } + + drm_printf(p, "%s: %lld\n", stat_description[id], total); + } return 0; } /** - * xe_gt_stats_clear - Clear the GT stats + * xe_gt_stats_clear() - Clear the GT stats * @gt: GT structure * - * This clear (zeros) all the available GT stats. + * Clear (zero) all available GT stats. Note that if the stats are being + * updated while this function is running, the results may be unpredictable. + * Intended to be called on an idle GPU. */ void xe_gt_stats_clear(struct xe_gt *gt) { - int id; + int cpu; - for (id = 0; id < ARRAY_SIZE(gt->stats.counters); ++id) - atomic64_set(>->stats.counters[id], 0); + for_each_possible_cpu(cpu) { + struct xe_gt_stats *s = per_cpu_ptr(gt->stats, cpu); + + memset(s, 0, sizeof(*s)); + } } diff --git a/drivers/gpu/drm/xe/xe_gt_stats.h b/drivers/gpu/drm/xe/xe_gt_stats.h index 59a7bf60e242..3d0defab9b30 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats.h +++ b/drivers/gpu/drm/xe/xe_gt_stats.h @@ -14,10 +14,16 @@ struct xe_gt; struct drm_printer; #ifdef CONFIG_DEBUG_FS +int xe_gt_stats_init(struct xe_gt *gt); int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p); void xe_gt_stats_clear(struct xe_gt *gt); void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr); #else +static inline int xe_gt_stats_init(struct xe_gt *gt) +{ + return 0; +} + static inline void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr) diff --git a/drivers/gpu/drm/xe/xe_gt_stats_types.h b/drivers/gpu/drm/xe/xe_gt_stats_types.h index b8accdbc54eb..b6081c312474 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats_types.h +++ b/drivers/gpu/drm/xe/xe_gt_stats_types.h @@ -6,6 +6,8 @@ #ifndef _XE_GT_STATS_TYPES_H_ #define _XE_GT_STATS_TYPES_H_ +#include + enum xe_gt_stats_id { XE_GT_STATS_ID_SVM_PAGEFAULT_COUNT, XE_GT_STATS_ID_TLB_INVAL, @@ -13,6 +15,7 @@ enum xe_gt_stats_id { XE_GT_STATS_ID_SVM_TLB_INVAL_US, XE_GT_STATS_ID_VMA_PAGEFAULT_COUNT, XE_GT_STATS_ID_VMA_PAGEFAULT_KB, + XE_GT_STATS_ID_INVALID_PREFETCH_PAGEFAULT_COUNT, XE_GT_STATS_ID_SVM_4K_PAGEFAULT_COUNT, XE_GT_STATS_ID_SVM_64K_PAGEFAULT_COUNT, XE_GT_STATS_ID_SVM_2M_PAGEFAULT_COUNT, @@ -58,4 +61,21 @@ enum xe_gt_stats_id { __XE_GT_STATS_NUM_IDS, }; +/** + * struct xe_gt_stats - Per-CPU GT statistics counters + * @counters: Array of 64-bit counters indexed by &enum xe_gt_stats_id + * + * This structure is used for high-frequency, per-CPU statistics collection + * in the Xe driver. By using a per-CPU allocation and ensuring the structure + * is cache-line aligned, we avoid the performance-heavy atomics and cache + * coherency traffic. + * + * Updates to these counters should be performed using the this_cpu_add() + * macro to ensure they are atomic with respect to local interrupts and + * preemption-safe without the overhead of explicit locking. + */ +struct xe_gt_stats { + u64 counters[__XE_GT_STATS_NUM_IDS]; +} ____cacheline_aligned; + #endif diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c index bd5260221d8d..bfe87e682879 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.c +++ b/drivers/gpu/drm/xe/xe_gt_topology.c @@ -205,24 +205,6 @@ load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask) } } -static void -get_num_dss_regs(struct xe_device *xe, int *geometry_regs, int *compute_regs) -{ - if (GRAPHICS_VER(xe) > 20) { - *geometry_regs = 3; - *compute_regs = 3; - } else if (GRAPHICS_VERx100(xe) == 1260) { - *geometry_regs = 0; - *compute_regs = 2; - } else if (GRAPHICS_VERx100(xe) >= 1250) { - *geometry_regs = 1; - *compute_regs = 1; - } else { - *geometry_regs = 1; - *compute_regs = 0; - } -} - void xe_gt_topology_init(struct xe_gt *gt) { @@ -230,29 +212,27 @@ xe_gt_topology_init(struct xe_gt *gt) XELP_GT_GEOMETRY_DSS_ENABLE, XE2_GT_GEOMETRY_DSS_1, XE2_GT_GEOMETRY_DSS_2, + XE3P_XPC_GT_GEOMETRY_DSS_3, }; static const struct xe_reg compute_regs[] = { XEHP_GT_COMPUTE_DSS_ENABLE, XEHPC_GT_COMPUTE_DSS_ENABLE_EXT, XE2_GT_COMPUTE_DSS_2, + XE3P_XPC_GT_COMPUTE_DSS_3, }; - int num_geometry_regs, num_compute_regs; - struct xe_device *xe = gt_to_xe(gt); struct drm_printer p; - get_num_dss_regs(xe, &num_geometry_regs, &num_compute_regs); - /* * Register counts returned shouldn't exceed the number of registers * passed as parameters below. */ - xe_gt_assert(gt, num_geometry_regs <= ARRAY_SIZE(geometry_regs)); - xe_gt_assert(gt, num_compute_regs <= ARRAY_SIZE(compute_regs)); + xe_gt_assert(gt, gt->info.num_geometry_xecore_fuse_regs <= ARRAY_SIZE(geometry_regs)); + xe_gt_assert(gt, gt->info.num_compute_xecore_fuse_regs <= ARRAY_SIZE(compute_regs)); load_dss_mask(gt, gt->fuse_topo.g_dss_mask, - num_geometry_regs, geometry_regs); + gt->info.num_geometry_xecore_fuse_regs, geometry_regs); load_dss_mask(gt, gt->fuse_topo.c_dss_mask, - num_compute_regs, compute_regs); + gt->info.num_compute_xecore_fuse_regs, compute_regs); load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss, >->fuse_topo.eu_type); load_l3_bank_mask(gt, gt->fuse_topo.l3_bank_mask); @@ -330,15 +310,14 @@ xe_l3_bank_mask_ffs(const xe_l3_bank_mask_t mask) */ bool xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad) { - struct xe_device *xe = gt_to_xe(gt); xe_dss_mask_t all_dss; - int g_dss_regs, c_dss_regs, dss_per_quad, quad_first; + int dss_per_quad, quad_first; bitmap_or(all_dss, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask, XE_MAX_DSS_FUSE_BITS); - get_num_dss_regs(xe, &g_dss_regs, &c_dss_regs); - dss_per_quad = 32 * max(g_dss_regs, c_dss_regs) / 4; + dss_per_quad = 32 * max(gt->info.num_geometry_xecore_fuse_regs, + gt->info.num_compute_xecore_fuse_regs) / 4; quad_first = xe_dss_mask_group_ffs(all_dss, dss_per_quad, quad); diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 5318d92fd473..8b55cf25a75f 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -35,7 +35,7 @@ enum xe_gt_eu_type { XE_GT_EU_TYPE_SIMD16, }; -#define XE_MAX_DSS_FUSE_REGS 3 +#define XE_MAX_DSS_FUSE_REGS 4 #define XE_MAX_DSS_FUSE_BITS (32 * XE_MAX_DSS_FUSE_REGS) #define XE_MAX_EU_FUSE_REGS 1 #define XE_MAX_EU_FUSE_BITS (32 * XE_MAX_EU_FUSE_REGS) @@ -45,11 +45,6 @@ typedef unsigned long xe_dss_mask_t[BITS_TO_LONGS(XE_MAX_DSS_FUSE_BITS)]; typedef unsigned long xe_eu_mask_t[BITS_TO_LONGS(XE_MAX_EU_FUSE_BITS)]; typedef unsigned long xe_l3_bank_mask_t[BITS_TO_LONGS(XE_MAX_L3_BANK_MASK_BITS)]; -struct xe_mmio_range { - u32 start; - u32 end; -}; - /* * The hardware has multiple kinds of multicast register ranges that need * special register steering (and future platforms are expected to add @@ -149,14 +144,21 @@ struct xe_gt { u8 id; /** @info.has_indirect_ring_state: GT has indirect ring state support */ u8 has_indirect_ring_state:1; + /** + * @info.num_geometry_xecore_fuse_regs: Number of 32b-bit fuse + * registers the geometry XeCore mask spans. + */ + u8 num_geometry_xecore_fuse_regs; + /** + * @info.num_compute_xecore_fuse_regs: Number of 32b-bit fuse + * registers the compute XeCore mask spans. + */ + u8 num_compute_xecore_fuse_regs; } info; #if IS_ENABLED(CONFIG_DEBUG_FS) /** @stats: GT stats */ - struct { - /** @stats.counters: counters for various GT stats */ - atomic64_t counters[__XE_GT_STATS_NUM_IDS]; - } stats; + struct xe_gt_stats __percpu *stats; #endif /** diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 6df7c3f260e5..54d2fc780127 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -35,11 +35,13 @@ #include "xe_guc_klv_helpers.h" #include "xe_guc_log.h" #include "xe_guc_pc.h" +#include "xe_guc_rc.h" #include "xe_guc_relay.h" #include "xe_guc_submit.h" #include "xe_memirq.h" #include "xe_mmio.h" #include "xe_platform_types.h" +#include "xe_sleep.h" #include "xe_sriov.h" #include "xe_sriov_pf_migration.h" #include "xe_uc.h" @@ -211,9 +213,6 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc) !xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_RENDER)) flags |= GUC_WA_RCS_REGS_IN_CCS_REGS_LIST; - if (XE_GT_WA(gt, 1509372804)) - flags |= GUC_WA_RENDER_RST_RC6_EXIT; - if (XE_GT_WA(gt, 14018913170)) flags |= GUC_WA_ENABLE_TSC_CHECK_ON_RC6; @@ -668,6 +667,13 @@ static void guc_fini_hw(void *arg) guc_g2g_fini(guc); } +static void vf_guc_fini_hw(void *arg) +{ + struct xe_guc *guc = arg; + + xe_gt_sriov_vf_reset(guc_to_gt(guc)); +} + /** * xe_guc_comm_init_early - early initialization of GuC communication * @guc: the &xe_guc to initialize @@ -772,6 +778,10 @@ int xe_guc_init(struct xe_guc *guc) xe->info.has_page_reclaim_hw_assist = false; if (IS_SRIOV_VF(xe)) { + ret = devm_add_action_or_reset(xe->drm.dev, vf_guc_fini_hw, guc); + if (ret) + goto out; + ret = xe_guc_ct_init(&guc->ct); if (ret) goto out; @@ -869,6 +879,10 @@ int xe_guc_init_post_hwconfig(struct xe_guc *guc) if (ret) return ret; + ret = xe_guc_rc_init(guc); + if (ret) + return ret; + ret = xe_guc_engine_activity_init(guc); if (ret) return ret; @@ -900,6 +914,41 @@ int xe_guc_post_load_init(struct xe_guc *guc) return xe_guc_submit_enable(guc); } +/* + * Wa_14025883347: Prevent GuC firmware DMA failures during GuC-only reset by ensuring + * SRAM save/restore operations are complete before reset. + */ +static void guc_prevent_fw_dma_failure_on_reset(struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + u32 boot_hash_chk, guc_status, sram_status; + int ret; + + guc_status = xe_mmio_read32(>->mmio, GUC_STATUS); + if (guc_status & GS_MIA_IN_RESET) + return; + + boot_hash_chk = xe_mmio_read32(>->mmio, BOOT_HASH_CHK); + if (!(boot_hash_chk & GUC_BOOT_UKERNEL_VALID)) + return; + + /* Disable idle flow during reset (GuC reset re-enables it automatically) */ + xe_mmio_rmw32(>->mmio, GUC_MAX_IDLE_COUNT, 0, GUC_IDLE_FLOW_DISABLE); + + ret = xe_mmio_wait32(>->mmio, GUC_STATUS, GS_UKERNEL_MASK, + FIELD_PREP(GS_UKERNEL_MASK, XE_GUC_LOAD_STATUS_READY), + 100000, &guc_status, false); + if (ret) + xe_gt_warn(gt, "GuC not ready after disabling idle flow (GUC_STATUS: 0x%x)\n", + guc_status); + + ret = xe_mmio_wait32(>->mmio, GUC_SRAM_STATUS, GUC_SRAM_HANDLING_MASK, + 0, 5000, &sram_status, false); + if (ret) + xe_gt_warn(gt, "SRAM handling not complete (GUC_SRAM_STATUS: 0x%x)\n", + sram_status); +} + int xe_guc_reset(struct xe_guc *guc) { struct xe_gt *gt = guc_to_gt(guc); @@ -912,6 +961,9 @@ int xe_guc_reset(struct xe_guc *guc) if (IS_SRIOV_VF(gt_to_xe(gt))) return xe_gt_sriov_vf_bootstrap(gt); + if (XE_GT_WA(gt, 14025883347)) + guc_prevent_fw_dma_failure_on_reset(guc); + xe_mmio_write32(mmio, GDRST, GRDOM_GUC); ret = xe_mmio_wait32(mmio, GDRST, GRDOM_GUC, 0, 5000, &gdrst, false); @@ -1388,17 +1440,21 @@ int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr) return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action)); } +#define MAX_RETRIES_ON_FLR 2 +#define MIN_SLEEP_MS_ON_FLR 256 + int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request, u32 len, u32 *response_buf) { struct xe_device *xe = guc_to_xe(guc); struct xe_gt *gt = guc_to_gt(guc); struct xe_mmio *mmio = >->mmio; - u32 header, reply; struct xe_reg reply_reg = xe_gt_is_media_type(gt) ? MED_VF_SW_FLAG(0) : VF_SW_FLAG(0); const u32 LAST_INDEX = VF_SW_FLAG_COUNT - 1; - bool lost = false; + unsigned int sleep_period_ms = 1; + unsigned int lost = 0; + u32 header; int ret; int i; @@ -1430,21 +1486,25 @@ retry: ret = xe_mmio_wait32(mmio, reply_reg, GUC_HXG_MSG_0_ORIGIN, FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_GUC), - 50000, &reply, false); + 50000, &header, false); if (ret) { /* scratch registers might be cleared during FLR, try once more */ - if (!reply && !lost) { + if (!header) { + if (++lost > MAX_RETRIES_ON_FLR) { + xe_gt_err(gt, "GuC mmio request %#x: lost, too many retries %u\n", + request[0], lost); + return -ENOLINK; + } xe_gt_dbg(gt, "GuC mmio request %#x: lost, trying again\n", request[0]); - lost = true; + xe_sleep_relaxed_ms(MIN_SLEEP_MS_ON_FLR); goto retry; } timeout: xe_gt_err(gt, "GuC mmio request %#x: no reply %#x\n", - request[0], reply); + request[0], header); return ret; } - header = xe_mmio_read32(mmio, reply_reg); if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) == GUC_HXG_TYPE_NO_RESPONSE_BUSY) { /* @@ -1480,6 +1540,8 @@ timeout: xe_gt_dbg(gt, "GuC mmio request %#x: retrying, reason %#x\n", request[0], reason); + + xe_sleep_exponential_ms(&sleep_period_ms, 256); goto retry; } @@ -1609,6 +1671,7 @@ void xe_guc_stop_prepare(struct xe_guc *guc) if (!IS_SRIOV_VF(guc_to_xe(guc))) { int err; + xe_guc_rc_disable(guc); err = xe_guc_pc_stop(&guc->pc); xe_gt_WARN(guc_to_gt(guc), err, "Failed to stop GuC PC: %pe\n", ERR_PTR(err)); diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index d04589140b77..496c6c77bee6 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -32,6 +32,7 @@ #include "xe_guc_tlb_inval.h" #include "xe_map.h" #include "xe_pm.h" +#include "xe_sleep.h" #include "xe_sriov_vf.h" #include "xe_trace_guc.h" @@ -254,6 +255,7 @@ static bool g2h_fence_needs_alloc(struct g2h_fence *g2h_fence) #define CTB_DESC_SIZE ALIGN(sizeof(struct guc_ct_buffer_desc), SZ_2K) #define CTB_H2G_BUFFER_OFFSET (CTB_DESC_SIZE * 2) +#define CTB_G2H_BUFFER_OFFSET (CTB_DESC_SIZE * 2) #define CTB_H2G_BUFFER_SIZE (SZ_4K) #define CTB_H2G_BUFFER_DWORDS (CTB_H2G_BUFFER_SIZE / sizeof(u32)) #define CTB_G2H_BUFFER_SIZE (SZ_128K) @@ -274,14 +276,18 @@ static bool g2h_fence_needs_alloc(struct g2h_fence *g2h_fence) */ long xe_guc_ct_queue_proc_time_jiffies(struct xe_guc_ct *ct) { - BUILD_BUG_ON(!IS_ALIGNED(CTB_H2G_BUFFER_SIZE, SZ_4)); + BUILD_BUG_ON(!IS_ALIGNED(CTB_H2G_BUFFER_SIZE, SZ_4K)); return (CTB_H2G_BUFFER_SIZE / SZ_4K) * HZ; } -static size_t guc_ct_size(void) +static size_t guc_h2g_size(void) { - return CTB_H2G_BUFFER_OFFSET + CTB_H2G_BUFFER_SIZE + - CTB_G2H_BUFFER_SIZE; + return CTB_H2G_BUFFER_OFFSET + CTB_H2G_BUFFER_SIZE; +} + +static size_t guc_g2h_size(void) +{ + return CTB_G2H_BUFFER_OFFSET + CTB_G2H_BUFFER_SIZE; } static void guc_ct_fini(struct drm_device *drm, void *arg) @@ -310,7 +316,8 @@ int xe_guc_ct_init_noalloc(struct xe_guc_ct *ct) struct xe_gt *gt = ct_to_gt(ct); int err; - xe_gt_assert(gt, !(guc_ct_size() % PAGE_SIZE)); + xe_gt_assert(gt, !(guc_h2g_size() % PAGE_SIZE)); + xe_gt_assert(gt, !(guc_g2h_size() % PAGE_SIZE)); err = drmm_mutex_init(&xe->drm, &ct->lock); if (err) @@ -355,7 +362,7 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) struct xe_tile *tile = gt_to_tile(gt); struct xe_bo *bo; - bo = xe_managed_bo_create_pin_map(xe, tile, guc_ct_size(), + bo = xe_managed_bo_create_pin_map(xe, tile, guc_h2g_size(), XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_INVALIDATE | @@ -363,7 +370,17 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) if (IS_ERR(bo)) return PTR_ERR(bo); - ct->bo = bo; + ct->ctbs.h2g.bo = bo; + + bo = xe_managed_bo_create_pin_map(xe, tile, guc_g2h_size(), + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE | + XE_BO_FLAG_PINNED_NORESTORE); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + ct->ctbs.g2h.bo = bo; return devm_add_action_or_reset(xe->drm.dev, guc_action_disable_ct, ct); } @@ -388,7 +405,7 @@ int xe_guc_ct_init_post_hwconfig(struct xe_guc_ct *ct) xe_assert(xe, !xe_guc_ct_enabled(ct)); if (IS_DGFX(xe)) { - ret = xe_managed_bo_reinit_in_vram(xe, tile, &ct->bo); + ret = xe_managed_bo_reinit_in_vram(xe, tile, &ct->ctbs.h2g.bo); if (ret) return ret; } @@ -438,8 +455,7 @@ static void guc_ct_ctb_g2h_init(struct xe_device *xe, struct guc_ctb *g2h, g2h->desc = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE); xe_map_memset(xe, &g2h->desc, 0, 0, sizeof(struct guc_ct_buffer_desc)); - g2h->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_H2G_BUFFER_OFFSET + - CTB_H2G_BUFFER_SIZE); + g2h->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_G2H_BUFFER_OFFSET); } static int guc_ct_ctb_h2g_register(struct xe_guc_ct *ct) @@ -448,8 +464,8 @@ static int guc_ct_ctb_h2g_register(struct xe_guc_ct *ct) u32 desc_addr, ctb_addr, size; int err; - desc_addr = xe_bo_ggtt_addr(ct->bo); - ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_H2G_BUFFER_OFFSET; + desc_addr = xe_bo_ggtt_addr(ct->ctbs.h2g.bo); + ctb_addr = xe_bo_ggtt_addr(ct->ctbs.h2g.bo) + CTB_H2G_BUFFER_OFFSET; size = ct->ctbs.h2g.info.size * sizeof(u32); err = xe_guc_self_cfg64(guc, @@ -475,9 +491,8 @@ static int guc_ct_ctb_g2h_register(struct xe_guc_ct *ct) u32 desc_addr, ctb_addr, size; int err; - desc_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE; - ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_H2G_BUFFER_OFFSET + - CTB_H2G_BUFFER_SIZE; + desc_addr = xe_bo_ggtt_addr(ct->ctbs.g2h.bo) + CTB_DESC_SIZE; + ctb_addr = xe_bo_ggtt_addr(ct->ctbs.g2h.bo) + CTB_G2H_BUFFER_OFFSET; size = ct->ctbs.g2h.info.size * sizeof(u32); err = xe_guc_self_cfg64(guc, @@ -604,9 +619,12 @@ static int __xe_guc_ct_start(struct xe_guc_ct *ct, bool needs_register) xe_gt_assert(gt, !xe_guc_ct_enabled(ct)); if (needs_register) { - xe_map_memset(xe, &ct->bo->vmap, 0, 0, xe_bo_size(ct->bo)); - guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap); - guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap); + xe_map_memset(xe, &ct->ctbs.h2g.bo->vmap, 0, 0, + xe_bo_size(ct->ctbs.h2g.bo)); + xe_map_memset(xe, &ct->ctbs.g2h.bo->vmap, 0, 0, + xe_bo_size(ct->ctbs.g2h.bo)); + guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->ctbs.h2g.bo->vmap); + guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->ctbs.g2h.bo->vmap); err = guc_ct_ctb_h2g_register(ct); if (err) @@ -623,7 +641,7 @@ static int __xe_guc_ct_start(struct xe_guc_ct *ct, bool needs_register) ct->ctbs.h2g.info.broken = false; ct->ctbs.g2h.info.broken = false; /* Skip everything in H2G buffer */ - xe_map_memset(xe, &ct->bo->vmap, CTB_H2G_BUFFER_OFFSET, 0, + xe_map_memset(xe, &ct->ctbs.h2g.bo->vmap, CTB_H2G_BUFFER_OFFSET, 0, CTB_H2G_BUFFER_SIZE); } @@ -643,7 +661,7 @@ static int __xe_guc_ct_start(struct xe_guc_ct *ct, bool needs_register) spin_lock_irq(&ct->dead.lock); if (ct->dead.reason) { ct->dead.reason |= (1 << CT_DEAD_STATE_REARM); - queue_work(system_unbound_wq, &ct->dead.worker); + queue_work(system_dfl_wq, &ct->dead.worker); } spin_unlock_irq(&ct->dead.lock); #endif @@ -921,22 +939,22 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, u32 full_len; struct iosys_map map = IOSYS_MAP_INIT_OFFSET(&h2g->cmds, tail * sizeof(u32)); - u32 desc_status; full_len = len + GUC_CTB_HDR_LEN; lockdep_assert_held(&ct->lock); xe_gt_assert(gt, full_len <= GUC_CTB_MSG_MAX_LEN); - desc_status = desc_read(xe, h2g, status); - if (desc_status) { - xe_gt_err(gt, "CT write: non-zero status: %u\n", desc_status); - goto corrupted; - } - if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) { u32 desc_tail = desc_read(xe, h2g, tail); u32 desc_head = desc_read(xe, h2g, head); + u32 desc_status; + + desc_status = desc_read(xe, h2g, status); + if (desc_status) { + xe_gt_err(gt, "CT write: non-zero status: %u\n", desc_status); + goto corrupted; + } if (tail != desc_tail) { desc_write(xe, h2g, status, desc_status | GUC_CTB_STATUS_MISMATCH); @@ -1005,8 +1023,15 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, /* Update descriptor */ desc_write(xe, h2g, tail, h2g->info.tail); - trace_xe_guc_ctb_h2g(xe, gt->info.id, *(action - 1), full_len, - desc_read(xe, h2g, head), h2g->info.tail); + /* + * desc_read() performs an VRAM read which serializes the CPU and drains + * posted writes on dGPU platforms. Tracepoints evaluate arguments even + * when disabled, so guard the event to avoid adding µs-scale latency to + * the fast H2G submission path when tracing is not active. + */ + if (trace_xe_guc_ctb_h2g_enabled()) + trace_xe_guc_ctb_h2g(xe, gt->info.id, *(action - 1), full_len, + desc_read(xe, h2g, head), h2g->info.tail); return 0; @@ -1101,7 +1126,8 @@ static int dequeue_one_g2h(struct xe_guc_ct *ct); */ static bool guc_ct_send_wait_for_retry(struct xe_guc_ct *ct, u32 len, u32 g2h_len, struct g2h_fence *g2h_fence, - unsigned int *sleep_period_ms) + unsigned int *sleep_period_ms, + unsigned int *sleep_total_ms) { struct xe_device *xe = ct_to_xe(ct); @@ -1115,17 +1141,15 @@ static bool guc_ct_send_wait_for_retry(struct xe_guc_ct *ct, u32 len, if (!h2g_has_room(ct, len + GUC_CTB_HDR_LEN)) { struct guc_ctb *h2g = &ct->ctbs.h2g; - if (*sleep_period_ms == 1024) + if (*sleep_total_ms > 1000) return false; trace_xe_guc_ct_h2g_flow_control(xe, h2g->info.head, h2g->info.tail, h2g->info.size, h2g->info.space, len + GUC_CTB_HDR_LEN); - msleep(*sleep_period_ms); - *sleep_period_ms <<= 1; + *sleep_total_ms += xe_sleep_exponential_ms(sleep_period_ms, 64); } else { - struct xe_device *xe = ct_to_xe(ct); struct guc_ctb *g2h = &ct->ctbs.g2h; int ret; @@ -1147,7 +1171,7 @@ static bool guc_ct_send_wait_for_retry(struct xe_guc_ct *ct, u32 len, ret = dequeue_one_g2h(ct); if (ret < 0) { if (ret != -ECANCELED) - xe_gt_err(ct_to_gt(ct), "CTB receive failed (%pe)", + xe_gt_err(ct_to_gt(ct), "CTB receive failed (%pe)\n", ERR_PTR(ret)); return false; } @@ -1161,6 +1185,7 @@ static int guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len, { struct xe_gt *gt = ct_to_gt(ct); unsigned int sleep_period_ms = 1; + unsigned int sleep_total_ms = 0; int ret; xe_gt_assert(gt, !g2h_len || !g2h_fence); @@ -1173,7 +1198,7 @@ try_again: if (unlikely(ret == -EBUSY)) { if (!guc_ct_send_wait_for_retry(ct, len, g2h_len, g2h_fence, - &sleep_period_ms)) + &sleep_period_ms, &sleep_total_ms)) goto broken; goto try_again; } @@ -1322,7 +1347,7 @@ retry_same_fence: */ mutex_lock(&ct->lock); if (!ret) { - xe_gt_err(gt, "Timed out wait for G2H, fence %u, action %04x, done %s", + xe_gt_err(gt, "Timed out wait for G2H, fence %u, action %04x, done %s\n", g2h_fence.seqno, action[0], str_yes_no(g2h_fence.done)); xa_erase(&ct->fence_lookup, g2h_fence.seqno); mutex_unlock(&ct->lock); @@ -1832,7 +1857,7 @@ static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len) ret = xe_guc_tlb_inval_done_handler(guc, payload, adj_len); break; default: - xe_gt_warn(gt, "NOT_POSSIBLE"); + xe_gt_warn(gt, "NOT_POSSIBLE\n"); } if (ret) { @@ -1935,7 +1960,7 @@ static void receive_g2h(struct xe_guc_ct *ct) mutex_unlock(&ct->lock); if (unlikely(ret == -EPROTO || ret == -EOPNOTSUPP)) { - xe_gt_err(ct_to_gt(ct), "CT dequeue failed: %d", ret); + xe_gt_err(ct_to_gt(ct), "CT dequeue failed: %d\n", ret); CT_DEAD(ct, NULL, G2H_RECV); kick_reset(ct); } @@ -1961,8 +1986,9 @@ static struct xe_guc_ct_snapshot *guc_ct_snapshot_alloc(struct xe_guc_ct *ct, bo if (!snapshot) return NULL; - if (ct->bo && want_ctb) { - snapshot->ctb_size = xe_bo_size(ct->bo); + if (ct->ctbs.h2g.bo && ct->ctbs.g2h.bo && want_ctb) { + snapshot->ctb_size = xe_bo_size(ct->ctbs.h2g.bo) + + xe_bo_size(ct->ctbs.g2h.bo); snapshot->ctb = kmalloc(snapshot->ctb_size, atomic ? GFP_ATOMIC : GFP_KERNEL); } @@ -2010,8 +2036,13 @@ static struct xe_guc_ct_snapshot *guc_ct_snapshot_capture(struct xe_guc_ct *ct, guc_ctb_snapshot_capture(xe, &ct->ctbs.g2h, &snapshot->g2h); } - if (ct->bo && snapshot->ctb) - xe_map_memcpy_from(xe, snapshot->ctb, &ct->bo->vmap, 0, snapshot->ctb_size); + if (ct->ctbs.h2g.bo && ct->ctbs.g2h.bo && snapshot->ctb) { + xe_map_memcpy_from(xe, snapshot->ctb, &ct->ctbs.h2g.bo->vmap, 0, + xe_bo_size(ct->ctbs.h2g.bo)); + xe_map_memcpy_from(xe, snapshot->ctb + xe_bo_size(ct->ctbs.h2g.bo), + &ct->ctbs.g2h.bo->vmap, 0, + xe_bo_size(ct->ctbs.g2h.bo)); + } return snapshot; } @@ -2165,7 +2196,7 @@ static void ct_dead_capture(struct xe_guc_ct *ct, struct guc_ctb *ctb, u32 reaso spin_unlock_irqrestore(&ct->dead.lock, flags); - queue_work(system_unbound_wq, &(ct)->dead.worker); + queue_work(system_dfl_wq, &(ct)->dead.worker); } static void ct_dead_print(struct xe_dead_ct *dead) diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h index 09d7ff1ef42a..46ad1402347d 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct_types.h +++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h @@ -39,6 +39,8 @@ struct guc_ctb_info { * struct guc_ctb - GuC command transport buffer (CTB) */ struct guc_ctb { + /** @bo: Xe BO for CTB */ + struct xe_bo *bo; /** @desc: dma buffer map for CTB descriptor */ struct iosys_map desc; /** @cmds: dma buffer map for CTB commands */ @@ -126,8 +128,6 @@ struct xe_fast_req_fence { * for the H2G and G2H requests sent and received through the buffers. */ struct xe_guc_ct { - /** @bo: Xe BO for CT */ - struct xe_bo *bo; /** @lock: protects everything in CT layer */ struct mutex lock; /** @fast_lock: protects G2H channel and credits */ diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h index a33ea288b907..bb8f71d38611 100644 --- a/drivers/gpu/drm/xe/xe_guc_fwif.h +++ b/drivers/gpu/drm/xe/xe_guc_fwif.h @@ -261,7 +261,8 @@ struct xe_guc_pagefault_desc { #define PFD_ACCESS_TYPE GENMASK(1, 0) #define PFD_FAULT_TYPE GENMASK(3, 2) #define PFD_VFID GENMASK(9, 4) -#define PFD_RSVD_1 GENMASK(11, 10) +#define PFD_RSVD_1 BIT(10) +#define PFD_PREFETCH BIT(11) /* Only valid on Xe3+, reserved on prior platforms */ #define PFD_VIRTUAL_ADDR_LO GENMASK(31, 12) #define PFD_VIRTUAL_ADDR_LO_SHIFT 12 @@ -281,7 +282,7 @@ struct xe_guc_pagefault_reply { u32 dw1; #define PFR_VFID GENMASK(5, 0) -#define PFR_RSVD_1 BIT(6) +#define PFR_PREFETCH BIT(6) /* Only valid on Xe3+, reserved on prior platforms */ #define PFR_ENG_INSTANCE GENMASK(12, 7) #define PFR_ENG_CLASS GENMASK(15, 13) #define PFR_PDATA GENMASK(31, 16) diff --git a/drivers/gpu/drm/xe/xe_guc_log.h b/drivers/gpu/drm/xe/xe_guc_log.h index 1b05bb60c1c7..4649a260755e 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.h +++ b/drivers/gpu/drm/xe/xe_guc_log.h @@ -13,9 +13,13 @@ struct drm_printer; struct xe_device; #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC) -#define XE_GUC_LOG_EVENT_DATA_BUFFER_SIZE SZ_8M +#define XE_GUC_LOG_EVENT_DATA_BUFFER_SIZE SZ_16M #define XE_GUC_LOG_CRASH_DUMP_BUFFER_SIZE SZ_1M #define XE_GUC_LOG_STATE_CAPTURE_BUFFER_SIZE SZ_2M +#elif IS_ENABLED(CONFIG_DRM_XE_DEBUG) +#define XE_GUC_LOG_EVENT_DATA_BUFFER_SIZE SZ_8M +#define XE_GUC_LOG_CRASH_DUMP_BUFFER_SIZE SZ_1M +#define XE_GUC_LOG_STATE_CAPTURE_BUFFER_SIZE SZ_1M #else #define XE_GUC_LOG_EVENT_DATA_BUFFER_SIZE SZ_64K #define XE_GUC_LOG_CRASH_DUMP_BUFFER_SIZE SZ_16K diff --git a/drivers/gpu/drm/xe/xe_guc_pagefault.c b/drivers/gpu/drm/xe/xe_guc_pagefault.c index 719a18187a31..607e32392f46 100644 --- a/drivers/gpu/drm/xe/xe_guc_pagefault.c +++ b/drivers/gpu/drm/xe/xe_guc_pagefault.c @@ -8,15 +8,18 @@ #include "xe_guc_ct.h" #include "xe_guc_pagefault.h" #include "xe_pagefault.h" +#include "xe_pagefault_types.h" static void guc_ack_fault(struct xe_pagefault *pf, int err) { u32 vfid = FIELD_GET(PFD_VFID, pf->producer.msg[2]); + u32 prefetch = FIELD_GET(PFD_PREFETCH, pf->producer.msg[2]); u32 engine_instance = FIELD_GET(PFD_ENG_INSTANCE, pf->producer.msg[0]); u32 engine_class = FIELD_GET(PFD_ENG_CLASS, pf->producer.msg[0]); u32 pdata = FIELD_GET(PFD_PDATA_LO, pf->producer.msg[0]) | (FIELD_GET(PFD_PDATA_HI, pf->producer.msg[1]) << PFD_PDATA_HI_SHIFT); + u32 asid = FIELD_GET(PFD_ASID, pf->producer.msg[1]); u32 action[] = { XE_GUC_ACTION_PAGE_FAULT_RES_DESC, @@ -24,9 +27,10 @@ static void guc_ack_fault(struct xe_pagefault *pf, int err) FIELD_PREP(PFR_SUCCESS, !!err) | FIELD_PREP(PFR_REPLY, PFR_ACCESS) | FIELD_PREP(PFR_DESC_TYPE, FAULT_RESPONSE_DESC) | - FIELD_PREP(PFR_ASID, pf->consumer.asid), + FIELD_PREP(PFR_ASID, asid), FIELD_PREP(PFR_VFID, vfid) | + FIELD_PREP(PFR_PREFETCH, err ? prefetch : 0) | FIELD_PREP(PFR_ENG_INSTANCE, engine_instance) | FIELD_PREP(PFR_ENG_CLASS, engine_class) | FIELD_PREP(PFR_PDATA, pdata), @@ -75,12 +79,16 @@ int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len) (FIELD_GET(PFD_VIRTUAL_ADDR_LO, msg[2]) << PFD_VIRTUAL_ADDR_LO_SHIFT); pf.consumer.asid = FIELD_GET(PFD_ASID, msg[1]); - pf.consumer.access_type = FIELD_GET(PFD_ACCESS_TYPE, msg[2]); - pf.consumer.fault_type = FIELD_GET(PFD_FAULT_TYPE, msg[2]); + pf.consumer.access_type = FIELD_GET(PFD_ACCESS_TYPE, msg[2]) | + (FIELD_GET(PFD_PREFETCH, msg[2]) ? XE_PAGEFAULT_ACCESS_PREFETCH : 0); if (FIELD_GET(XE2_PFD_TRVA_FAULT, msg[0])) - pf.consumer.fault_level = XE_PAGEFAULT_LEVEL_NACK; + pf.consumer.fault_type_level = XE_PAGEFAULT_TYPE_LEVEL_NACK; else - pf.consumer.fault_level = FIELD_GET(PFD_FAULT_LEVEL, msg[0]); + pf.consumer.fault_type_level = + FIELD_PREP(XE_PAGEFAULT_LEVEL_MASK, + FIELD_GET(PFD_FAULT_LEVEL, msg[0])) | + FIELD_PREP(XE_PAGEFAULT_TYPE_MASK, + FIELD_GET(PFD_FAULT_TYPE, msg[2])); pf.consumer.engine_class = FIELD_GET(PFD_ENG_CLASS, msg[0]); pf.consumer.engine_instance = FIELD_GET(PFD_ENG_INSTANCE, msg[0]); diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 5e5495a39a3c..21fe73ab4583 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -92,6 +92,17 @@ * Render-C states is also a GuC PC feature that is now enabled in Xe for * all platforms. * + * Implementation details: + * ----------------------- + * The implementation for GuC Power Management features is split as follows: + * + * xe_guc_rc: Logic for handling GuC RC + * xe_gt_idle: Host side logic for RC6 and Coarse Power gating (CPG) + * xe_guc_pc: Logic for all other SLPC related features + * + * There is some cross interaction between these where host C6 will need to be + * enabled when we plan to skip GuC RC. Also, the GuC RC mode is currently + * overridden through 0x3003 which is an SLPC H2G call. */ static struct xe_guc *pc_to_guc(struct xe_guc_pc *pc) @@ -253,20 +264,35 @@ static int pc_action_unset_param(struct xe_guc_pc *pc, u8 id) return ret; } -static int pc_action_setup_gucrc(struct xe_guc_pc *pc, u32 mode) +/** + * xe_guc_pc_action_set_param() - Set value of SLPC param + * @pc: Xe_GuC_PC instance + * @id: Param id + * @value: Value to set + * + * This function can be used to set any SLPC param. + * + * Return: 0 on Success + */ +int xe_guc_pc_action_set_param(struct xe_guc_pc *pc, u8 id, u32 value) { - struct xe_guc_ct *ct = pc_to_ct(pc); - u32 action[] = { - GUC_ACTION_HOST2GUC_SETUP_PC_GUCRC, - mode, - }; - int ret; + xe_device_assert_mem_access(pc_to_xe(pc)); + return pc_action_set_param(pc, id, value); +} - ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); - if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -ECANCELED)) - xe_gt_err(pc_to_gt(pc), "GuC RC enable mode=%u failed: %pe\n", - mode, ERR_PTR(ret)); - return ret; +/** + * xe_guc_pc_action_unset_param() - Revert to default value + * @pc: Xe_GuC_PC instance + * @id: Param id + * + * This function can be used revert any SLPC param to its default value. + * + * Return: 0 on Success + */ +int xe_guc_pc_action_unset_param(struct xe_guc_pc *pc, u8 id) +{ + xe_device_assert_mem_access(pc_to_xe(pc)); + return pc_action_unset_param(pc, id); } static u32 decode_freq(u32 raw) @@ -1050,55 +1076,6 @@ int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc) return ret; } -/** - * xe_guc_pc_gucrc_disable - Disable GuC RC - * @pc: Xe_GuC_PC instance - * - * Disables GuC RC by taking control of RC6 back from GuC. - * - * Return: 0 on success, negative error code on error. - */ -int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc) -{ - struct xe_device *xe = pc_to_xe(pc); - struct xe_gt *gt = pc_to_gt(pc); - int ret = 0; - - if (xe->info.skip_guc_pc) - return 0; - - ret = pc_action_setup_gucrc(pc, GUCRC_HOST_CONTROL); - if (ret) - return ret; - - return xe_gt_idle_disable_c6(gt); -} - -/** - * xe_guc_pc_override_gucrc_mode - override GUCRC mode - * @pc: Xe_GuC_PC instance - * @mode: new value of the mode. - * - * Return: 0 on success, negative error code on error - */ -int xe_guc_pc_override_gucrc_mode(struct xe_guc_pc *pc, enum slpc_gucrc_mode mode) -{ - guard(xe_pm_runtime)(pc_to_xe(pc)); - return pc_action_set_param(pc, SLPC_PARAM_PWRGATE_RC_MODE, mode); -} - -/** - * xe_guc_pc_unset_gucrc_mode - unset GUCRC mode override - * @pc: Xe_GuC_PC instance - * - * Return: 0 on success, negative error code on error - */ -int xe_guc_pc_unset_gucrc_mode(struct xe_guc_pc *pc) -{ - guard(xe_pm_runtime)(pc_to_xe(pc)); - return pc_action_unset_param(pc, SLPC_PARAM_PWRGATE_RC_MODE); -} - static void pc_init_pcode_freq(struct xe_guc_pc *pc) { u32 min = DIV_ROUND_CLOSEST(pc->rpn_freq, GT_FREQUENCY_MULTIPLIER); @@ -1247,9 +1224,6 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) return -ETIMEDOUT; if (xe->info.skip_guc_pc) { - if (xe->info.platform != XE_PVC) - xe_gt_idle_enable_c6(gt); - /* Request max possible since dynamic freq mgmt is not enabled */ pc_set_cur_freq(pc, UINT_MAX); return 0; @@ -1291,15 +1265,6 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) if (ret) return ret; - if (xe->info.platform == XE_PVC) { - xe_guc_pc_gucrc_disable(pc); - return 0; - } - - ret = pc_action_setup_gucrc(pc, GUCRC_FIRMWARE_CONTROL); - if (ret) - return ret; - /* Enable SLPC Optimized Strategy for compute */ ret = pc_action_set_strategy(pc, SLPC_OPTIMIZED_STRATEGY_COMPUTE); @@ -1319,10 +1284,8 @@ int xe_guc_pc_stop(struct xe_guc_pc *pc) { struct xe_device *xe = pc_to_xe(pc); - if (xe->info.skip_guc_pc) { - xe_gt_idle_disable_c6(pc_to_gt(pc)); + if (xe->info.skip_guc_pc) return 0; - } mutex_lock(&pc->freq_lock); pc->freq_ready = false; @@ -1343,8 +1306,7 @@ static void xe_guc_pc_fini_hw(void *arg) if (xe_device_wedged(xe)) return; - CLASS(xe_force_wake, fw_ref)(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL); - xe_guc_pc_gucrc_disable(pc); + CLASS(xe_force_wake, fw_ref)(gt_to_fw(pc_to_gt(pc)), XE_FW_GT); XE_WARN_ON(xe_guc_pc_stop(pc)); /* Bind requested freq to mert_freq_cap before unload */ diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h index 0e31396f103c..0678a4e787b3 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.h +++ b/drivers/gpu/drm/xe/xe_guc_pc.h @@ -9,16 +9,14 @@ #include struct xe_guc_pc; -enum slpc_gucrc_mode; struct drm_printer; int xe_guc_pc_init(struct xe_guc_pc *pc); int xe_guc_pc_start(struct xe_guc_pc *pc); int xe_guc_pc_stop(struct xe_guc_pc *pc); -int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc); -int xe_guc_pc_override_gucrc_mode(struct xe_guc_pc *pc, enum slpc_gucrc_mode mode); -int xe_guc_pc_unset_gucrc_mode(struct xe_guc_pc *pc); void xe_guc_pc_print(struct xe_guc_pc *pc, struct drm_printer *p); +int xe_guc_pc_action_set_param(struct xe_guc_pc *pc, u8 id, u32 value); +int xe_guc_pc_action_unset_param(struct xe_guc_pc *pc, u8 id); u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc); int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq); diff --git a/drivers/gpu/drm/xe/xe_guc_rc.c b/drivers/gpu/drm/xe/xe_guc_rc.c new file mode 100644 index 000000000000..99fa127b261f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_rc.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2026 Intel Corporation + */ + +#include + +#include "abi/guc_actions_slpc_abi.h" +#include "xe_device.h" +#include "xe_force_wake.h" +#include "xe_gt.h" +#include "xe_gt_idle.h" +#include "xe_gt_printk.h" +#include "xe_guc.h" +#include "xe_guc_ct.h" +#include "xe_guc_pc.h" +#include "xe_guc_rc.h" +#include "xe_pm.h" + +/** + * DOC: GuC RC (Render C-states) + * + * GuC handles the GT transition to deeper C-states in conjunction with Pcode. + * GuC RC can be enabled independently of the frequency component in SLPC, + * which is also controlled by GuC. + * + * This file will contain all H2G related logic for handling Render C-states. + * There are some calls to xe_gt_idle, where we enable host C6 when GuC RC is + * skipped. GuC RC is mostly independent of xe_guc_pc with the exception of + * functions that override the mode for which we have to rely on the SLPC H2G + * calls. + */ + +static int guc_action_setup_gucrc(struct xe_guc *guc, u32 control) +{ + u32 action[] = { + GUC_ACTION_HOST2GUC_SETUP_PC_GUCRC, + control, + }; + int ret; + + ret = xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); + if (ret && !(xe_device_wedged(guc_to_xe(guc)) && ret == -ECANCELED)) + xe_gt_err(guc_to_gt(guc), + "GuC RC setup %s(%u) failed (%pe)\n", + control == GUCRC_HOST_CONTROL ? "HOST_CONTROL" : + control == GUCRC_FIRMWARE_CONTROL ? "FIRMWARE_CONTROL" : + "UNKNOWN", control, ERR_PTR(ret)); + return ret; +} + +/** + * xe_guc_rc_disable() - Disable GuC RC + * @guc: Xe GuC instance + * + * Disables GuC RC by taking control of RC6 back from GuC. + */ +void xe_guc_rc_disable(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + + if (!xe->info.skip_guc_pc && xe->info.platform != XE_PVC) + if (guc_action_setup_gucrc(guc, GUCRC_HOST_CONTROL)) + return; + + xe_gt_WARN_ON(gt, xe_gt_idle_disable_c6(gt)); +} + +static void xe_guc_rc_fini_hw(void *arg) +{ + struct xe_guc *guc = arg; + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + + if (xe_device_wedged(xe)) + return; + + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT); + xe_guc_rc_disable(guc); +} + +/** + * xe_guc_rc_init() - Init GuC RC + * @guc: Xe GuC instance + * + * Add callback action for GuC RC + * + * Return: 0 on success, negative error code on error. + */ +int xe_guc_rc_init(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + + xe_gt_assert(gt, xe_device_uc_enabled(xe)); + + return devm_add_action_or_reset(xe->drm.dev, xe_guc_rc_fini_hw, guc); +} + +/** + * xe_guc_rc_enable() - Enable GuC RC feature if applicable + * @guc: Xe GuC instance + * + * Enables GuC RC feature. + * + * Return: 0 on success, negative error code on error. + */ +int xe_guc_rc_enable(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + + xe_gt_assert(gt, xe_device_uc_enabled(xe)); + + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT); + if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FW_GT)) + return -ETIMEDOUT; + + if (xe->info.platform == XE_PVC) { + xe_guc_rc_disable(guc); + return 0; + } + + if (xe->info.skip_guc_pc) { + xe_gt_idle_enable_c6(gt); + return 0; + } + + return guc_action_setup_gucrc(guc, GUCRC_FIRMWARE_CONTROL); +} diff --git a/drivers/gpu/drm/xe/xe_guc_rc.h b/drivers/gpu/drm/xe/xe_guc_rc.h new file mode 100644 index 000000000000..b083fc364dd4 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_rc.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2026 Intel Corporation + */ + +#ifndef _XE_GUC_RC_H_ +#define _XE_GUC_RC_H_ + +struct xe_guc; +enum slpc_gucrc_mode; + +int xe_guc_rc_init(struct xe_guc *guc); +int xe_guc_rc_enable(struct xe_guc *guc); +void xe_guc_rc_disable(struct xe_guc *guc); + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 799ef9f48003..ca7aa4f358d0 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -8,9 +8,7 @@ #include #include #include -#include #include -#include #include @@ -42,6 +40,7 @@ #include "xe_pm.h" #include "xe_ring_ops_types.h" #include "xe_sched_job.h" +#include "xe_sleep.h" #include "xe_trace.h" #include "xe_uc_fw.h" #include "xe_vm.h" @@ -556,6 +555,72 @@ static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q) xe_sched_tdr_queue_imm(&q->guc->sched); } +static void xe_guc_exec_queue_group_stop(struct xe_exec_queue *q) +{ + struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); + struct xe_exec_queue_group *group = q->multi_queue.group; + struct xe_exec_queue *eq, *next; + LIST_HEAD(tmp); + + xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), + xe_exec_queue_is_multi_queue(q)); + + mutex_lock(&group->list_lock); + + /* + * Stop all future queues being from executing while group is stopped. + */ + group->stopped = true; + + list_for_each_entry_safe(eq, next, &group->list, multi_queue.link) + /* + * Refcount prevents an attempted removal from &group->list, + * temporary list allows safe iteration after dropping + * &group->list_lock. + */ + if (xe_exec_queue_get_unless_zero(eq)) + list_move_tail(&eq->multi_queue.link, &tmp); + + mutex_unlock(&group->list_lock); + + /* We cannot stop under list lock without getting inversions */ + xe_sched_submission_stop(&primary->guc->sched); + list_for_each_entry(eq, &tmp, multi_queue.link) + xe_sched_submission_stop(&eq->guc->sched); + + mutex_lock(&group->list_lock); + list_for_each_entry_safe(eq, next, &tmp, multi_queue.link) { + /* + * Corner where we got banned while stopping and not on + * &group->list + */ + if (READ_ONCE(group->banned)) + xe_guc_exec_queue_trigger_cleanup(eq); + + list_move_tail(&eq->multi_queue.link, &group->list); + xe_exec_queue_put(eq); + } + mutex_unlock(&group->list_lock); +} + +static void xe_guc_exec_queue_group_start(struct xe_exec_queue *q) +{ + struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); + struct xe_exec_queue_group *group = q->multi_queue.group; + struct xe_exec_queue *eq; + + xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), + xe_exec_queue_is_multi_queue(q)); + + xe_sched_submission_start(&primary->guc->sched); + + mutex_lock(&group->list_lock); + group->stopped = false; + list_for_each_entry(eq, &group->list, multi_queue.link) + xe_sched_submission_start(&eq->guc->sched); + mutex_unlock(&group->list_lock); +} + static void xe_guc_exec_queue_group_trigger_cleanup(struct xe_exec_queue *q) { struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); @@ -738,6 +803,7 @@ static void xe_guc_exec_queue_group_cgp_sync(struct xe_guc *guc, { struct xe_exec_queue_group *group = q->multi_queue.group; struct xe_device *xe = guc_to_xe(guc); + enum xe_multi_queue_priority priority; long ret; /* @@ -761,7 +827,10 @@ static void xe_guc_exec_queue_group_cgp_sync(struct xe_guc *guc, return; } - xe_lrc_set_multi_queue_priority(q->lrc[0], q->multi_queue.priority); + scoped_guard(spinlock, &q->multi_queue.lock) + priority = q->multi_queue.priority; + + xe_lrc_set_multi_queue_priority(q->lrc[0], priority); xe_guc_exec_queue_group_cgp_update(xe, q); WRITE_ONCE(group->sync_pending, true); @@ -962,24 +1031,6 @@ static u32 wq_space_until_wrap(struct xe_exec_queue *q) return (WQ_SIZE - q->guc->wqi_tail); } -static inline void relaxed_ms_sleep(unsigned int delay_ms) -{ - unsigned long min_us, max_us; - - if (!delay_ms) - return; - - if (delay_ms > 20) { - msleep(delay_ms); - return; - } - - min_us = mul_u32_u32(delay_ms, 1000); - max_us = min_us + 500; - - usleep_range(min_us, max_us); -} - static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size) { struct xe_guc *guc = exec_queue_to_guc(q); @@ -998,10 +1049,7 @@ try_again: return -ENODEV; } - msleep(sleep_period_ms); - sleep_total_ms += sleep_period_ms; - if (sleep_period_ms < 64) - sleep_period_ms <<= 1; + sleep_total_ms += xe_sleep_exponential_ms(&sleep_period_ms, 64); goto try_again; } } @@ -1414,7 +1462,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) { struct xe_sched_job *job = to_xe_sched_job(drm_job); struct drm_sched_job *tmp_job; - struct xe_exec_queue *q = job->q; + struct xe_exec_queue *q = job->q, *primary; struct xe_gpu_scheduler *sched = &q->guc->sched; struct xe_guc *guc = exec_queue_to_guc(q); const char *process_name = "no process"; @@ -1425,6 +1473,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); + primary = xe_exec_queue_multi_queue_primary(q); + /* * TDR has fired before free job worker. Common if exec queue * immediately closed after last fence signaled. Add back to pending @@ -1436,7 +1486,10 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) return DRM_GPU_SCHED_STAT_NO_HANG; /* Kill the run_job entry point */ - xe_sched_submission_stop(sched); + if (xe_exec_queue_is_multi_queue(q)) + xe_guc_exec_queue_group_stop(q); + else + xe_sched_submission_stop(sched); /* Must check all state after stopping scheduler */ skip_timeout_check = exec_queue_reset(q) || @@ -1451,14 +1504,6 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) if (xe_exec_queue_is_lr(q)) xe_gt_assert(guc_to_gt(guc), skip_timeout_check); - /* - * FIXME: In multi-queue scenario, the TDR must ensure that the whole - * multi-queue group is off the HW before signaling the fences to avoid - * possible memory corruptions. This means disabling scheduling on the - * primary queue before or during the secondary queue's TDR. Need to - * implement this in least obtrusive way. - */ - /* * If devcoredump not captured and GuC capture for the job is not ready * do manual capture first and decide later if we need to use it @@ -1485,10 +1530,11 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) set_exec_queue_banned(q); /* Kick job / queue off hardware */ - if (!wedged && (exec_queue_enabled(q) || exec_queue_pending_disable(q))) { + if (!wedged && (exec_queue_enabled(primary) || + exec_queue_pending_disable(primary))) { int ret; - if (exec_queue_reset(q)) + if (exec_queue_reset(primary)) err = -EIO; if (xe_uc_fw_is_running(&guc->fw)) { @@ -1497,8 +1543,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) * modifying state */ ret = wait_event_timeout(guc->ct.wq, - (!exec_queue_pending_enable(q) && - !exec_queue_pending_disable(q)) || + (!exec_queue_pending_enable(primary) && + !exec_queue_pending_disable(primary)) || xe_guc_read_stopped(guc) || vf_recovery(guc), HZ * 5); if (vf_recovery(guc)) @@ -1506,7 +1552,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) if (!ret || xe_guc_read_stopped(guc)) goto trigger_reset; - disable_scheduling(q, skip_timeout_check); + disable_scheduling(primary, skip_timeout_check); } /* @@ -1520,7 +1566,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) smp_rmb(); ret = wait_event_timeout(guc->ct.wq, !xe_uc_fw_is_running(&guc->fw) || - !exec_queue_pending_disable(q) || + !exec_queue_pending_disable(primary) || xe_guc_read_stopped(guc) || vf_recovery(guc), HZ * 5); if (vf_recovery(guc)) @@ -1530,11 +1576,11 @@ trigger_reset: if (!ret) xe_gt_warn(guc_to_gt(guc), "Schedule disable failed to respond, guc_id=%d", - q->guc->id); - xe_devcoredump(q, job, + primary->guc->id); + xe_devcoredump(primary, job, "Schedule disable failed to respond, guc_id=%d, ret=%d, guc_read=%d", - q->guc->id, ret, xe_guc_read_stopped(guc)); - xe_gt_reset_async(q->gt); + primary->guc->id, ret, xe_guc_read_stopped(guc)); + xe_gt_reset_async(primary->gt); xe_sched_tdr_queue_imm(sched); goto rearm; } @@ -1580,12 +1626,13 @@ trigger_reset: drm_sched_for_each_pending_job(tmp_job, &sched->base, NULL) xe_sched_job_set_error(to_xe_sched_job(tmp_job), -ECANCELED); - xe_sched_submission_start(sched); - - if (xe_exec_queue_is_multi_queue(q)) + if (xe_exec_queue_is_multi_queue(q)) { + xe_guc_exec_queue_group_start(q); xe_guc_exec_queue_group_trigger_cleanup(q); - else + } else { + xe_sched_submission_start(sched); xe_guc_exec_queue_trigger_cleanup(q); + } /* * We want the job added back to the pending list so it gets freed; this @@ -1599,7 +1646,10 @@ rearm: * but there is not currently an easy way to do in DRM scheduler. With * some thought, do this in a follow up. */ - xe_sched_submission_start(sched); + if (xe_exec_queue_is_multi_queue(q)) + xe_guc_exec_queue_group_start(q); + else + xe_sched_submission_start(sched); handle_vf_resume: return DRM_GPU_SCHED_STAT_NO_HANG; } @@ -1762,7 +1812,7 @@ static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg) since_resume_ms; if (wait_ms > 0 && q->guc->resume_time) - relaxed_ms_sleep(wait_ms); + xe_sleep_relaxed_ms(wait_ms); set_exec_queue_suspended(q); disable_scheduling(q, false); @@ -1965,6 +2015,8 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) INIT_LIST_HEAD(&q->multi_queue.link); mutex_lock(&group->list_lock); + if (group->stopped) + WRITE_ONCE(q->guc->sched.base.pause_submit, true); list_add_tail(&q->multi_queue.link, &group->list); mutex_unlock(&group->list_lock); } @@ -2111,15 +2163,22 @@ static int guc_exec_queue_set_multi_queue_priority(struct xe_exec_queue *q, xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), xe_exec_queue_is_multi_queue(q)); - if (q->multi_queue.priority == priority || - exec_queue_killed_or_banned_or_wedged(q)) + if (exec_queue_killed_or_banned_or_wedged(q)) return 0; msg = kmalloc_obj(*msg); if (!msg) return -ENOMEM; - q->multi_queue.priority = priority; + scoped_guard(spinlock, &q->multi_queue.lock) { + if (q->multi_queue.priority == priority) { + kfree(msg); + return 0; + } + + q->multi_queue.priority = priority; + } + guc_exec_queue_add_msg(q, msg, SET_MULTI_QUEUE_PRIORITY); return 0; @@ -2206,6 +2265,14 @@ static bool guc_exec_queue_reset_status(struct xe_exec_queue *q) return exec_queue_reset(q) || exec_queue_killed_or_banned_or_wedged(q); } +static bool guc_exec_queue_active(struct xe_exec_queue *q) +{ + struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); + + return exec_queue_enabled(primary) && + !exec_queue_pending_disable(primary); +} + /* * All of these functions are an abstraction layer which other parts of Xe can * use to trap into the GuC backend. All of these functions, aside from init, @@ -2225,6 +2292,7 @@ static const struct xe_exec_queue_ops guc_exec_queue_ops = { .suspend_wait = guc_exec_queue_suspend_wait, .resume = guc_exec_queue_resume, .reset_status = guc_exec_queue_reset_status, + .active = guc_exec_queue_active, }; static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) diff --git a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c index 774467befbb9..ced58f46f846 100644 --- a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c +++ b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c @@ -6,15 +6,19 @@ #include "abi/guc_actions_abi.h" #include "xe_device.h" +#include "xe_exec_queue.h" +#include "xe_exec_queue_types.h" #include "xe_gt_stats.h" #include "xe_gt_types.h" #include "xe_guc.h" #include "xe_guc_ct.h" +#include "xe_guc_exec_queue_types.h" #include "xe_guc_tlb_inval.h" #include "xe_force_wake.h" #include "xe_mmio.h" #include "xe_sa.h" #include "xe_tlb_inval.h" +#include "xe_vm.h" #include "regs/xe_guc_regs.h" @@ -111,6 +115,38 @@ static int send_page_reclaim(struct xe_guc *guc, u32 seqno, G2H_LEN_DW_PAGE_RECLAMATION, 1); } +static u64 normalize_invalidation_range(struct xe_gt *gt, u64 *start, u64 *end) +{ + u64 orig_start = *start; + u64 length = *end - *start; + u64 align; + + if (length < SZ_4K) + length = SZ_4K; + + align = roundup_pow_of_two(length); + *start = ALIGN_DOWN(*start, align); + *end = ALIGN(*end, align); + length = align; + while (*start + length < *end) { + length <<= 1; + *start = ALIGN_DOWN(orig_start, length); + } + + if (length >= SZ_2M) { + length = max_t(u64, SZ_16M, length); + *start = ALIGN_DOWN(orig_start, length); + } + + xe_gt_assert(gt, length >= SZ_4K); + xe_gt_assert(gt, is_power_of_2(length)); + xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1, + ilog2(SZ_2M) + 1))); + xe_gt_assert(gt, IS_ALIGNED(*start, length)); + + return length; +} + /* * Ensure that roundup_pow_of_two(length) doesn't overflow. * Note that roundup_pow_of_two() operates on unsigned long, @@ -118,19 +154,21 @@ static int send_page_reclaim(struct xe_guc *guc, u32 seqno, */ #define MAX_RANGE_TLB_INVALIDATION_LENGTH (rounddown_pow_of_two(ULONG_MAX)) -static int send_tlb_inval_ppgtt(struct xe_tlb_inval *tlb_inval, u32 seqno, - u64 start, u64 end, u32 asid, +static int send_tlb_inval_ppgtt(struct xe_guc *guc, u32 seqno, u64 start, + u64 end, u32 id, u32 type, struct drm_suballoc *prl_sa) { #define MAX_TLB_INVALIDATION_LEN 7 - struct xe_guc *guc = tlb_inval->private; struct xe_gt *gt = guc_to_gt(guc); + struct xe_device *xe = guc_to_xe(guc); u32 action[MAX_TLB_INVALIDATION_LEN]; u64 length = end - start; int len = 0, err; - if (guc_to_xe(guc)->info.force_execlist) - return -ECANCELED; + xe_gt_assert(gt, (type == XE_GUC_TLB_INVAL_PAGE_SELECTIVE && + !xe->info.has_ctx_tlb_inval) || + (type == XE_GUC_TLB_INVAL_PAGE_SELECTIVE_CTX && + xe->info.has_ctx_tlb_inval)); action[len++] = XE_GUC_ACTION_TLB_INVALIDATION; action[len++] = !prl_sa ? seqno : TLB_INVALIDATION_SEQNO_INVALID; @@ -138,55 +176,150 @@ static int send_tlb_inval_ppgtt(struct xe_tlb_inval *tlb_inval, u32 seqno, length > MAX_RANGE_TLB_INVALIDATION_LENGTH) { action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL); } else { - u64 orig_start = start; - u64 align; - - if (length < SZ_4K) - length = SZ_4K; - - /* - * We need to invalidate a higher granularity if start address - * is not aligned to length. When start is not aligned with - * length we need to find the length large enough to create an - * address mask covering the required range. - */ - align = roundup_pow_of_two(length); - start = ALIGN_DOWN(start, align); - end = ALIGN(end, align); - length = align; - while (start + length < end) { - length <<= 1; - start = ALIGN_DOWN(orig_start, length); - } - - /* - * Minimum invalidation size for a 2MB page that the hardware - * expects is 16MB - */ - if (length >= SZ_2M) { - length = max_t(u64, SZ_16M, length); - start = ALIGN_DOWN(orig_start, length); - } - - xe_gt_assert(gt, length >= SZ_4K); - xe_gt_assert(gt, is_power_of_2(length)); - xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1, - ilog2(SZ_2M) + 1))); - xe_gt_assert(gt, IS_ALIGNED(start, length)); + u64 normalize_len = normalize_invalidation_range(gt, &start, + &end); + bool need_flush = !prl_sa && + seqno != TLB_INVALIDATION_SEQNO_INVALID; /* Flush on NULL case, Media is not required to modify flush due to no PPC so NOP */ - action[len++] = MAKE_INVAL_OP_FLUSH(XE_GUC_TLB_INVAL_PAGE_SELECTIVE, !prl_sa); - action[len++] = asid; + action[len++] = MAKE_INVAL_OP_FLUSH(type, need_flush); + action[len++] = id; action[len++] = lower_32_bits(start); action[len++] = upper_32_bits(start); - action[len++] = ilog2(length) - ilog2(SZ_4K); + action[len++] = ilog2(normalize_len) - ilog2(SZ_4K); } xe_gt_assert(gt, len <= MAX_TLB_INVALIDATION_LEN); +#undef MAX_TLB_INVALIDATION_LEN err = send_tlb_inval(guc, action, len); - if (!err && prl_sa) + if (!err && prl_sa) { + xe_gt_assert(gt, seqno != TLB_INVALIDATION_SEQNO_INVALID); err = send_page_reclaim(guc, seqno, xe_sa_bo_gpu_addr(prl_sa)); + } + return err; +} + +static int send_tlb_inval_asid_ppgtt(struct xe_tlb_inval *tlb_inval, u32 seqno, + u64 start, u64 end, u32 asid, + struct drm_suballoc *prl_sa) +{ + struct xe_guc *guc = tlb_inval->private; + + lockdep_assert_held(&tlb_inval->seqno_lock); + + if (guc_to_xe(guc)->info.force_execlist) + return -ECANCELED; + + return send_tlb_inval_ppgtt(guc, seqno, start, end, asid, + XE_GUC_TLB_INVAL_PAGE_SELECTIVE, prl_sa); +} + +static int send_tlb_inval_ctx_ppgtt(struct xe_tlb_inval *tlb_inval, u32 seqno, + u64 start, u64 end, u32 asid, + struct drm_suballoc *prl_sa) +{ + struct xe_guc *guc = tlb_inval->private; + struct xe_device *xe = guc_to_xe(guc); + struct xe_exec_queue *q, *next, *last_q = NULL; + struct xe_vm *vm; + LIST_HEAD(tlb_inval_list); + int err = 0, id = guc_to_gt(guc)->info.id; + + lockdep_assert_held(&tlb_inval->seqno_lock); + + if (xe->info.force_execlist) + return -ECANCELED; + + vm = xe_device_asid_to_vm(xe, asid); + if (IS_ERR(vm)) + return PTR_ERR(vm); + + down_read(&vm->exec_queues.lock); + + /* + * XXX: Randomly picking a threshold for now. This will need to be + * tuned based on expected UMD queue counts and performance profiling. + */ +#define EXEC_QUEUE_COUNT_FULL_THRESHOLD 8 + if (vm->exec_queues.count[id] >= EXEC_QUEUE_COUNT_FULL_THRESHOLD) { + u32 action[] = { + XE_GUC_ACTION_TLB_INVALIDATION, + seqno, + MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL), + }; + + err = send_tlb_inval(guc, action, ARRAY_SIZE(action)); + goto err_unlock; + } +#undef EXEC_QUEUE_COUNT_FULL_THRESHOLD + + /* + * Move exec queues to a temporary list to issue invalidations. The exec + * queue must active and a reference must be taken to prevent concurrent + * deregistrations. + * + * List modification is safe because we hold 'vm->exec_queues.lock' for + * reading, which prevents external modifications. Using a per-GT list + * is also safe since 'tlb_inval->seqno_lock' ensures no other GT users + * can enter this code path. + */ + list_for_each_entry_safe(q, next, &vm->exec_queues.list[id], + vm_exec_queue_link) { + if (q->ops->active(q) && xe_exec_queue_get_unless_zero(q)) { + last_q = q; + list_move_tail(&q->vm_exec_queue_link, &tlb_inval_list); + } + } + + if (!last_q) { + /* + * We can't break fence ordering for TLB invalidation jobs, if + * TLB invalidations are inflight issue a dummy invalidation to + * maintain ordering. Nor can we move safely the seqno_recv when + * returning -ECANCELED if TLB invalidations are in flight. Use + * GGTT invalidation as dummy invalidation given ASID + * invalidations are unsupported here. + */ + if (xe_tlb_inval_idle(tlb_inval)) + err = -ECANCELED; + else + err = send_tlb_inval_ggtt(tlb_inval, seqno); + goto err_unlock; + } + + list_for_each_entry_safe(q, next, &tlb_inval_list, vm_exec_queue_link) { + struct drm_suballoc *__prl_sa = NULL; + int __seqno = TLB_INVALIDATION_SEQNO_INVALID; + u32 type = XE_GUC_TLB_INVAL_PAGE_SELECTIVE_CTX; + + xe_assert(xe, q->vm == vm); + + if (err) + goto unref; + + if (last_q == q) { + __prl_sa = prl_sa; + __seqno = seqno; + } + + err = send_tlb_inval_ppgtt(guc, __seqno, start, end, + q->guc->id, type, __prl_sa); + +unref: + /* + * Must always return exec queue to original list / drop + * reference + */ + list_move_tail(&q->vm_exec_queue_link, + &vm->exec_queues.list[id]); + xe_exec_queue_put(q); + } + +err_unlock: + up_read(&vm->exec_queues.lock); + xe_vm_put(vm); + return err; } @@ -217,10 +350,19 @@ static long tlb_inval_timeout_delay(struct xe_tlb_inval *tlb_inval) return hw_tlb_timeout + 2 * delay; } -static const struct xe_tlb_inval_ops guc_tlb_inval_ops = { +static const struct xe_tlb_inval_ops guc_tlb_inval_asid_ops = { .all = send_tlb_inval_all, .ggtt = send_tlb_inval_ggtt, - .ppgtt = send_tlb_inval_ppgtt, + .ppgtt = send_tlb_inval_asid_ppgtt, + .initialized = tlb_inval_initialized, + .flush = tlb_inval_flush, + .timeout_delay = tlb_inval_timeout_delay, +}; + +static const struct xe_tlb_inval_ops guc_tlb_inval_ctx_ops = { + .ggtt = send_tlb_inval_ggtt, + .all = send_tlb_inval_all, + .ppgtt = send_tlb_inval_ctx_ppgtt, .initialized = tlb_inval_initialized, .flush = tlb_inval_flush, .timeout_delay = tlb_inval_timeout_delay, @@ -237,8 +379,14 @@ static const struct xe_tlb_inval_ops guc_tlb_inval_ops = { void xe_guc_tlb_inval_init_early(struct xe_guc *guc, struct xe_tlb_inval *tlb_inval) { + struct xe_device *xe = guc_to_xe(guc); + tlb_inval->private = guc; - tlb_inval->ops = &guc_tlb_inval_ops; + + if (xe->info.has_ctx_tlb_inval) + tlb_inval->ops = &guc_tlb_inval_ctx_ops; + else + tlb_inval->ops = &guc_tlb_inval_asid_ops; } /** diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 688d645e0e73..ea3ad600d7c7 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -408,7 +408,8 @@ xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe) }, }; - xe_rtp_process_to_sr(&ctx, lrc_setup, ARRAY_SIZE(lrc_setup), &hwe->reg_lrc); + xe_rtp_process_to_sr(&ctx, lrc_setup, ARRAY_SIZE(lrc_setup), + &hwe->reg_lrc, true); } static void @@ -472,7 +473,8 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe) }, }; - xe_rtp_process_to_sr(&ctx, engine_entries, ARRAY_SIZE(engine_entries), &hwe->reg_sr); + xe_rtp_process_to_sr(&ctx, engine_entries, ARRAY_SIZE(engine_entries), + &hwe->reg_sr, false); } static const struct engine_info *find_engine_info(enum xe_engine_class class, int instance) diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c index 2ef33dfbe3a2..4c2b113364d3 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_group.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c @@ -51,7 +51,8 @@ hw_engine_group_alloc(struct xe_device *xe) if (!group) return ERR_PTR(-ENOMEM); - group->resume_wq = alloc_workqueue("xe-resume-lr-jobs-wq", 0, 0); + group->resume_wq = alloc_workqueue("xe-resume-lr-jobs-wq", WQ_PERCPU, + 0); if (!group->resume_wq) return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/xe/xe_i2c.c b/drivers/gpu/drm/xe/xe_i2c.c index 1e1fb72e49bf..1deb812fe01d 100644 --- a/drivers/gpu/drm/xe/xe_i2c.c +++ b/drivers/gpu/drm/xe/xe_i2c.c @@ -27,7 +27,7 @@ #include "regs/xe_i2c_regs.h" #include "regs/xe_irq_regs.h" -#include "xe_device_types.h" +#include "xe_device.h" #include "xe_i2c.h" #include "xe_mmio.h" #include "xe_sriov.h" diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c index 8163c3a8fc87..0c726eda9390 100644 --- a/drivers/gpu/drm/xe/xe_lmtt.c +++ b/drivers/gpu/drm/xe/xe_lmtt.c @@ -57,6 +57,23 @@ static u64 lmtt_page_size(struct xe_lmtt *lmtt) return BIT_ULL(lmtt->ops->lmtt_pte_shift(0)); } +/** + * xe_lmtt_page_size() - Get LMTT page size. + * @lmtt: the &xe_lmtt + * + * This function shall be called only by PF. + * + * Return: LMTT page size. + */ +u64 xe_lmtt_page_size(struct xe_lmtt *lmtt) +{ + lmtt_assert(lmtt, IS_SRIOV_PF(lmtt_to_xe(lmtt))); + lmtt_assert(lmtt, xe_device_has_lmtt(lmtt_to_xe(lmtt))); + lmtt_assert(lmtt, lmtt->ops); + + return lmtt_page_size(lmtt); +} + static struct xe_lmtt_pt *lmtt_pt_alloc(struct xe_lmtt *lmtt, unsigned int level) { unsigned int num_entries = level ? lmtt->ops->lmtt_pte_num(level) : 0; diff --git a/drivers/gpu/drm/xe/xe_lmtt.h b/drivers/gpu/drm/xe/xe_lmtt.h index 75a234fbf367..8fa387b38c52 100644 --- a/drivers/gpu/drm/xe/xe_lmtt.h +++ b/drivers/gpu/drm/xe/xe_lmtt.h @@ -20,6 +20,7 @@ int xe_lmtt_prepare_pages(struct xe_lmtt *lmtt, unsigned int vfid, u64 range); int xe_lmtt_populate_pages(struct xe_lmtt *lmtt, unsigned int vfid, struct xe_bo *bo, u64 offset); void xe_lmtt_drop_pages(struct xe_lmtt *lmtt, unsigned int vfid); u64 xe_lmtt_estimate_pt_size(struct xe_lmtt *lmtt, u64 size); +u64 xe_lmtt_page_size(struct xe_lmtt *lmtt); #else static inline int xe_lmtt_init(struct xe_lmtt *lmtt) { return 0; } static inline void xe_lmtt_init_hw(struct xe_lmtt *lmtt) { } diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index b0f037bc227f..fcdbd403fa3c 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -113,13 +113,17 @@ size_t xe_gt_lrc_hang_replay_size(struct xe_gt *gt, enum xe_engine_class class) /* Engine context image */ switch (class) { case XE_ENGINE_CLASS_RENDER: - if (GRAPHICS_VER(xe) >= 20) + if (GRAPHICS_VERx100(xe) >= 3510) + size += 7 * SZ_4K; + else if (GRAPHICS_VER(xe) >= 20) size += 3 * SZ_4K; else size += 13 * SZ_4K; break; case XE_ENGINE_CLASS_COMPUTE: - if (GRAPHICS_VER(xe) >= 20) + if (GRAPHICS_VERx100(xe) >= 3510) + size += 5 * SZ_4K; + else if (GRAPHICS_VER(xe) >= 20) size += 2 * SZ_4K; else size += 13 * SZ_4K; @@ -711,12 +715,13 @@ u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc) #define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset #define __xe_lrc_regs_offset xe_lrc_regs_offset -#define LRC_SEQNO_PPHWSP_OFFSET 512 -#define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8) -#define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8) +#define LRC_CTX_JOB_TIMESTAMP_OFFSET 512 #define LRC_ENGINE_ID_PPHWSP_OFFSET 1024 #define LRC_PARALLEL_PPHWSP_OFFSET 2048 +#define LRC_SEQNO_OFFSET 0 +#define LRC_START_SEQNO_OFFSET (LRC_SEQNO_OFFSET + 8) + u32 xe_lrc_regs_offset(struct xe_lrc *lrc) { return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE; @@ -743,14 +748,12 @@ size_t xe_lrc_skip_size(struct xe_device *xe) static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc) { - /* The seqno is stored in the driver-defined portion of PPHWSP */ - return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET; + return LRC_SEQNO_OFFSET; } static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc) { - /* The start seqno is stored in the driver-defined portion of PPHWSP */ - return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET; + return LRC_START_SEQNO_OFFSET; } static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc) @@ -801,10 +804,11 @@ static inline u32 __xe_lrc_wa_bb_offset(struct xe_lrc *lrc) return xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE; } -#define DECL_MAP_ADDR_HELPERS(elem) \ +#define DECL_MAP_ADDR_HELPERS(elem, bo_expr) \ static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \ { \ - struct iosys_map map = lrc->bo->vmap; \ + struct xe_bo *bo = (bo_expr); \ + struct iosys_map map = bo->vmap; \ \ xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map)); \ iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \ @@ -812,20 +816,22 @@ static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \ } \ static inline u32 __maybe_unused __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \ { \ - return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \ + struct xe_bo *bo = (bo_expr); \ +\ + return xe_bo_ggtt_addr(bo) + __xe_lrc_##elem##_offset(lrc); \ } \ -DECL_MAP_ADDR_HELPERS(ring) -DECL_MAP_ADDR_HELPERS(pphwsp) -DECL_MAP_ADDR_HELPERS(seqno) -DECL_MAP_ADDR_HELPERS(regs) -DECL_MAP_ADDR_HELPERS(start_seqno) -DECL_MAP_ADDR_HELPERS(ctx_job_timestamp) -DECL_MAP_ADDR_HELPERS(ctx_timestamp) -DECL_MAP_ADDR_HELPERS(ctx_timestamp_udw) -DECL_MAP_ADDR_HELPERS(parallel) -DECL_MAP_ADDR_HELPERS(indirect_ring) -DECL_MAP_ADDR_HELPERS(engine_id) +DECL_MAP_ADDR_HELPERS(ring, lrc->bo) +DECL_MAP_ADDR_HELPERS(pphwsp, lrc->bo) +DECL_MAP_ADDR_HELPERS(seqno, lrc->seqno_bo) +DECL_MAP_ADDR_HELPERS(regs, lrc->bo) +DECL_MAP_ADDR_HELPERS(start_seqno, lrc->seqno_bo) +DECL_MAP_ADDR_HELPERS(ctx_job_timestamp, lrc->bo) +DECL_MAP_ADDR_HELPERS(ctx_timestamp, lrc->bo) +DECL_MAP_ADDR_HELPERS(ctx_timestamp_udw, lrc->bo) +DECL_MAP_ADDR_HELPERS(parallel, lrc->bo) +DECL_MAP_ADDR_HELPERS(indirect_ring, lrc->bo) +DECL_MAP_ADDR_HELPERS(engine_id, lrc->bo) #undef DECL_MAP_ADDR_HELPERS @@ -1032,6 +1038,7 @@ static void xe_lrc_finish(struct xe_lrc *lrc) { xe_hw_fence_ctx_finish(&lrc->fence_ctx); xe_bo_unpin_map_no_vm(lrc->bo); + xe_bo_unpin_map_no_vm(lrc->seqno_bo); } /* @@ -1431,53 +1438,16 @@ void xe_lrc_set_multi_queue_priority(struct xe_lrc *lrc, enum xe_multi_queue_pri lrc->desc |= FIELD_PREP(LRC_PRIORITY, xe_multi_queue_prio_to_lrc(lrc, priority)); } -static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, - struct xe_vm *vm, void *replay_state, u32 ring_size, - u16 msix_vec, - u32 init_flags) +static int xe_lrc_ctx_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, struct xe_vm *vm, + void *replay_state, u16 msix_vec, u32 init_flags) { struct xe_gt *gt = hwe->gt; - const u32 lrc_size = xe_gt_lrc_size(gt, hwe->class); - u32 bo_size = ring_size + lrc_size + LRC_WA_BB_SIZE; struct xe_tile *tile = gt_to_tile(gt); struct xe_device *xe = gt_to_xe(gt); struct iosys_map map; u32 arb_enable; - u32 bo_flags; int err; - kref_init(&lrc->refcount); - lrc->gt = gt; - lrc->replay_size = xe_gt_lrc_hang_replay_size(gt, hwe->class); - lrc->size = lrc_size; - lrc->flags = 0; - lrc->ring.size = ring_size; - lrc->ring.tail = 0; - - if (gt_engine_needs_indirect_ctx(gt, hwe->class)) { - lrc->flags |= XE_LRC_FLAG_INDIRECT_CTX; - bo_size += LRC_INDIRECT_CTX_BO_SIZE; - } - - if (xe_gt_has_indirect_ring_state(gt)) - lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE; - - bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT | - XE_BO_FLAG_GGTT_INVALIDATE; - - if ((vm && vm->xef) || init_flags & XE_LRC_CREATE_USER_CTX) /* userspace */ - bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE | XE_BO_FLAG_FORCE_USER_VRAM; - - lrc->bo = xe_bo_create_pin_map_novm(xe, tile, - bo_size, - ttm_bo_type_kernel, - bo_flags, false); - if (IS_ERR(lrc->bo)) - return PTR_ERR(lrc->bo); - - xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt, - hwe->fence_irq, hwe->name); - /* * Init Per-Process of HW status Page, LRC / context state to known * values. If there's already a primed default_lrc, just copy it, otherwise @@ -1489,7 +1459,7 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */ xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE, - lrc_size - LRC_PPHWSP_SIZE); + lrc->size - LRC_PPHWSP_SIZE); if (replay_state) xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, replay_state, lrc->replay_size); @@ -1497,21 +1467,16 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, void *init_data = empty_lrc_data(hwe); if (!init_data) { - err = -ENOMEM; - goto err_lrc_finish; + return -ENOMEM; } - xe_map_memcpy_to(xe, &map, 0, init_data, lrc_size); + xe_map_memcpy_to(xe, &map, 0, init_data, lrc->size); kfree(init_data); } - if (vm) { + if (vm) xe_lrc_set_ppgtt(lrc, vm); - if (vm->xef) - xe_drm_client_add_bo(vm->xef->client, lrc->bo); - } - if (xe_device_has_msix(xe)) { xe_lrc_write_ctx_reg(lrc, CTX_INT_STATUS_REPORT_PTR, xe_memirq_status_ptr(&tile->memirq, hwe)); @@ -1527,14 +1492,20 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc)); xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START_UDW, 0); - xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, 0); + + /* Match head and tail pointers */ + xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, lrc->ring.tail); xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, lrc->ring.tail); + xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_CTL, RING_CTL_SIZE(lrc->ring.size) | RING_VALID); } else { xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc)); - xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0); + + /* Match head and tail pointers */ + xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, lrc->ring.tail); xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); + xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL, RING_CTL_SIZE(lrc->ring.size) | RING_VALID); } @@ -1583,12 +1554,76 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, err = setup_wa_bb(lrc, hwe); if (err) - goto err_lrc_finish; + return err; err = setup_indirect_ctx(lrc, hwe); + + return err; +} + +static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, struct xe_vm *vm, + void *replay_state, u32 ring_size, u16 msix_vec, u32 init_flags) +{ + struct xe_gt *gt = hwe->gt; + const u32 lrc_size = xe_gt_lrc_size(gt, hwe->class); + u32 bo_size = ring_size + lrc_size + LRC_WA_BB_SIZE; + struct xe_tile *tile = gt_to_tile(gt); + struct xe_device *xe = gt_to_xe(gt); + struct xe_bo *bo; + u32 bo_flags; + int err; + + kref_init(&lrc->refcount); + lrc->gt = gt; + lrc->replay_size = xe_gt_lrc_hang_replay_size(gt, hwe->class); + lrc->size = lrc_size; + lrc->flags = 0; + lrc->ring.size = ring_size; + lrc->ring.tail = 0; + + if (gt_engine_needs_indirect_ctx(gt, hwe->class)) { + lrc->flags |= XE_LRC_FLAG_INDIRECT_CTX; + bo_size += LRC_INDIRECT_CTX_BO_SIZE; + } + + if (xe_gt_has_indirect_ring_state(gt)) + lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE; + + bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE; + + if ((vm && vm->xef) || init_flags & XE_LRC_CREATE_USER_CTX) /* userspace */ + bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE | XE_BO_FLAG_FORCE_USER_VRAM; + + bo = xe_bo_create_pin_map_novm(xe, tile, bo_size, + ttm_bo_type_kernel, + bo_flags, false); + if (IS_ERR(lrc->bo)) + return PTR_ERR(lrc->bo); + + lrc->bo = bo; + + bo = xe_bo_create_pin_map_novm(xe, tile, PAGE_SIZE, + ttm_bo_type_kernel, + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE | + XE_BO_FLAG_SYSTEM, false); + if (IS_ERR(bo)) { + err = PTR_ERR(bo); + goto err_lrc_finish; + } + lrc->seqno_bo = bo; + + xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt, + hwe->fence_irq, hwe->name); + + err = xe_lrc_ctx_init(lrc, hwe, vm, replay_state, msix_vec, init_flags); if (err) goto err_lrc_finish; + if (vm && vm->xef) + xe_drm_client_add_bo(vm->xef->client, lrc->bo); + return 0; err_lrc_finish: @@ -1966,6 +2001,7 @@ static int dump_gfxpipe_command(struct drm_printer *p, MATCH(PIPELINE_SELECT); MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST); + MATCH3D(3DSTATE_CUSTOM_SAMPLE_PATTERN); MATCH3D(3DSTATE_CLEAR_PARAMS); MATCH3D(3DSTATE_DEPTH_BUFFER); MATCH3D(3DSTATE_STENCIL_BUFFER); @@ -2049,8 +2085,16 @@ static int dump_gfxpipe_command(struct drm_printer *p, MATCH3D(3DSTATE_SBE_MESH); MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER); MATCH3D(3DSTATE_COARSE_PIXEL); + MATCH3D(3DSTATE_MESH_SHADER_DATA_EXT); + MATCH3D(3DSTATE_TASK_SHADER_DATA_EXT); + MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC_2); + MATCH3D(3DSTATE_CC_STATE_POINTERS_2); + MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS_2); + MATCH3D(3DSTATE_BLEND_STATE_POINTERS_2); + MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_2); MATCH3D(3DSTATE_DRAWING_RECTANGLE); + MATCH3D(3DSTATE_URB_MEMORY); MATCH3D(3DSTATE_CHROMA_KEY); MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET); MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN); @@ -2070,6 +2114,7 @@ static int dump_gfxpipe_command(struct drm_printer *p, MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE); MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS); MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO); + MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTER_2); default: drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n", @@ -2141,6 +2186,102 @@ void xe_lrc_dump_default(struct drm_printer *p, } } +/* + * Lookup the value of a register within the offset/value pairs of an + * MI_LOAD_REGISTER_IMM instruction. + * + * Return -ENOENT if the register is not present in the MI_LRI instruction. + */ +static int lookup_reg_in_mi_lri(u32 offset, u32 *value, + const u32 *dword_pair, int num_regs) +{ + for (int i = 0; i < num_regs; i++) { + if (dword_pair[2 * i] == offset) { + *value = dword_pair[2 * i + 1]; + return 0; + } + } + + return -ENOENT; +} + +/* + * Lookup the value of a register in a specific engine type's default LRC. + * + * Return -EINVAL if the default LRC doesn't exist, or ENOENT if the register + * cannot be found in the default LRC. + */ +int xe_lrc_lookup_default_reg_value(struct xe_gt *gt, + enum xe_engine_class hwe_class, + u32 offset, + u32 *value) +{ + u32 *dw; + int remaining_dw, ret; + + if (!gt->default_lrc[hwe_class]) + return -EINVAL; + + /* + * Skip the beginning of the LRC since it contains the per-process + * hardware status page. + */ + dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE; + remaining_dw = (xe_gt_lrc_size(gt, hwe_class) - LRC_PPHWSP_SIZE) / 4; + + while (remaining_dw > 0) { + u32 num_dw = instr_dw(*dw); + + if (num_dw > remaining_dw) + num_dw = remaining_dw; + + switch (*dw & XE_INSTR_CMD_TYPE) { + case XE_INSTR_MI: + switch (*dw & MI_OPCODE) { + case MI_BATCH_BUFFER_END: + /* End of LRC; register not found */ + return -ENOENT; + + case MI_NOOP: + case MI_TOPOLOGY_FILTER: + /* + * MI_NOOP and MI_TOPOLOGY_FILTER don't have + * a length field and are always 1-dword + * instructions. + */ + remaining_dw--; + dw++; + break; + + case MI_LOAD_REGISTER_IMM: + ret = lookup_reg_in_mi_lri(offset, value, + dw + 1, (num_dw - 1) / 2); + if (ret == 0) + return 0; + + fallthrough; + + default: + /* + * Jump to next instruction based on length + * field. + */ + remaining_dw -= num_dw; + dw += num_dw; + break; + } + break; + + default: + /* Jump to next instruction based on length field. */ + remaining_dw -= num_dw; + dw += num_dw; + } + } + + return -ENOENT; +} + struct instr_state { u32 instr; u16 num_dw; diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index c307a3fd9ea2..48f7c26cf129 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -75,7 +75,8 @@ static inline struct xe_lrc *xe_lrc_get(struct xe_lrc *lrc) */ static inline void xe_lrc_put(struct xe_lrc *lrc) { - kref_put(&lrc->refcount, xe_lrc_destroy); + if (lrc) + kref_put(&lrc->refcount, xe_lrc_destroy); } /** @@ -133,6 +134,10 @@ size_t xe_lrc_skip_size(struct xe_device *xe); void xe_lrc_dump_default(struct drm_printer *p, struct xe_gt *gt, enum xe_engine_class); +int xe_lrc_lookup_default_reg_value(struct xe_gt *gt, + enum xe_engine_class hwe_class, + u32 offset, + u32 *value); u32 *xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, u32 *cs); diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h index a4373d280c39..5a718f759ed6 100644 --- a/drivers/gpu/drm/xe/xe_lrc_types.h +++ b/drivers/gpu/drm/xe/xe_lrc_types.h @@ -22,6 +22,12 @@ struct xe_lrc { */ struct xe_bo *bo; + /** + * @seqno_bo: Buffer object (memory) for seqno numbers. Always in system + * memory as this a CPU read, GPU write path object. + */ + struct xe_bo *seqno_bo; + /** @size: size of the lrc and optional indirect ring state */ u32 size; diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index e58b9b433654..8af6c347bea8 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -25,6 +25,7 @@ #include "xe_exec_queue.h" #include "xe_ggtt.h" #include "xe_gt.h" +#include "xe_gt_printk.h" #include "xe_hw_engine.h" #include "xe_lrc.h" #include "xe_map.h" @@ -1148,65 +1149,73 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q, size -= src_L0; } + bb = xe_bb_alloc(gt); + if (IS_ERR(bb)) + return PTR_ERR(bb); + bb_pool = ctx->mem.ccs_bb_pool; - guard(mutex) (xe_sa_bo_swap_guard(bb_pool)); - xe_sa_bo_swap_shadow(bb_pool); + scoped_guard(mutex, xe_sa_bo_swap_guard(bb_pool)) { + xe_sa_bo_swap_shadow(bb_pool); - bb = xe_bb_ccs_new(gt, batch_size, read_write); - if (IS_ERR(bb)) { - drm_err(&xe->drm, "BB allocation failed.\n"); - err = PTR_ERR(bb); - return err; + err = xe_bb_init(bb, bb_pool, batch_size); + if (err) { + xe_gt_err(gt, "BB allocation failed.\n"); + xe_bb_free(bb, NULL); + return err; + } + + batch_size_allocated = batch_size; + size = xe_bo_size(src_bo); + batch_size = 0; + + /* + * Emit PTE and copy commands here. + * The CCS copy command can only support limited size. If the size to be + * copied is more than the limit, divide copy into chunks. So, calculate + * sizes here again before copy command is emitted. + */ + + while (size) { + batch_size += 10; /* Flush + ggtt addr + 2 NOP */ + u32 flush_flags = 0; + u64 ccs_ofs, ccs_size; + u32 ccs_pt; + + u32 avail_pts = max_mem_transfer_per_pass(xe) / + LEVEL0_PAGE_TABLE_ENCODE_SIZE; + + src_L0 = xe_migrate_res_sizes(m, &src_it); + + batch_size += pte_update_size(m, false, src, &src_it, &src_L0, + &src_L0_ofs, &src_L0_pt, 0, 0, + avail_pts); + + ccs_size = xe_device_ccs_bytes(xe, src_L0); + batch_size += pte_update_size(m, 0, NULL, &ccs_it, &ccs_size, &ccs_ofs, + &ccs_pt, 0, avail_pts, avail_pts); + xe_assert(xe, IS_ALIGNED(ccs_it.start, PAGE_SIZE)); + batch_size += EMIT_COPY_CCS_DW; + + emit_pte(m, bb, src_L0_pt, false, true, &src_it, src_L0, src); + + emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src); + + bb->len = emit_flush_invalidate(bb->cs, bb->len, flush_flags); + flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, src_is_pltt, + src_L0_ofs, dst_is_pltt, + src_L0, ccs_ofs, true); + bb->len = emit_flush_invalidate(bb->cs, bb->len, flush_flags); + + size -= src_L0; + } + + xe_assert(xe, (batch_size_allocated == bb->len)); + src_bo->bb_ccs[read_write] = bb; + + xe_sriov_vf_ccs_rw_update_bb_addr(ctx); + xe_sa_bo_sync_shadow(bb->bo); } - batch_size_allocated = batch_size; - size = xe_bo_size(src_bo); - batch_size = 0; - - /* - * Emit PTE and copy commands here. - * The CCS copy command can only support limited size. If the size to be - * copied is more than the limit, divide copy into chunks. So, calculate - * sizes here again before copy command is emitted. - */ - while (size) { - batch_size += 10; /* Flush + ggtt addr + 2 NOP */ - u32 flush_flags = 0; - u64 ccs_ofs, ccs_size; - u32 ccs_pt; - - u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE; - - src_L0 = xe_migrate_res_sizes(m, &src_it); - - batch_size += pte_update_size(m, false, src, &src_it, &src_L0, - &src_L0_ofs, &src_L0_pt, 0, 0, - avail_pts); - - ccs_size = xe_device_ccs_bytes(xe, src_L0); - batch_size += pte_update_size(m, 0, NULL, &ccs_it, &ccs_size, &ccs_ofs, - &ccs_pt, 0, avail_pts, avail_pts); - xe_assert(xe, IS_ALIGNED(ccs_it.start, PAGE_SIZE)); - batch_size += EMIT_COPY_CCS_DW; - - emit_pte(m, bb, src_L0_pt, false, true, &src_it, src_L0, src); - - emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src); - - bb->len = emit_flush_invalidate(bb->cs, bb->len, flush_flags); - flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, src_is_pltt, - src_L0_ofs, dst_is_pltt, - src_L0, ccs_ofs, true); - bb->len = emit_flush_invalidate(bb->cs, bb->len, flush_flags); - - size -= src_L0; - } - - xe_assert(xe, (batch_size_allocated == bb->len)); - src_bo->bb_ccs[read_write] = bb; - - xe_sriov_vf_ccs_rw_update_bb_addr(ctx); - xe_sa_bo_sync_shadow(bb->bo); return 0; } diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h index 15362789ab99..41ae720acbc3 100644 --- a/drivers/gpu/drm/xe/xe_mmio.h +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -6,7 +6,7 @@ #ifndef _XE_MMIO_H_ #define _XE_MMIO_H_ -#include "xe_gt_types.h" +#include "xe_mmio_types.h" struct xe_device; struct xe_reg; @@ -37,11 +37,6 @@ static inline u32 xe_mmio_adjusted_addr(const struct xe_mmio *mmio, u32 addr) return addr; } -static inline struct xe_mmio *xe_root_tile_mmio(struct xe_device *xe) -{ - return &xe->tiles[0].mmio; -} - #ifdef CONFIG_PCI_IOV void xe_mmio_init_vf_view(struct xe_mmio *mmio, const struct xe_mmio *base, unsigned int vfid); #endif diff --git a/drivers/gpu/drm/xe/xe_mmio_types.h b/drivers/gpu/drm/xe/xe_mmio_types.h new file mode 100644 index 000000000000..99e8f269eaf2 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_mmio_types.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022-2026 Intel Corporation + */ + +#ifndef _XE_MMIO_TYPES_H_ +#define _XE_MMIO_TYPES_H_ + +#include + +struct xe_gt; +struct xe_tile; + +/** + * struct xe_mmio - register mmio structure + * + * Represents an MMIO region that the CPU may use to access registers. A + * region may share its IO map with other regions (e.g., all GTs within a + * tile share the same map with their parent tile, but represent different + * subregions of the overall IO space). + */ +struct xe_mmio { + /** @tile: Backpointer to tile, used for tracing */ + struct xe_tile *tile; + + /** @regs: Map used to access registers. */ + void __iomem *regs; + + /** + * @sriov_vf_gt: Backpointer to GT. + * + * This pointer is only set for GT MMIO regions and only when running + * as an SRIOV VF structure + */ + struct xe_gt *sriov_vf_gt; + + /** + * @regs_size: Length of the register region within the map. + * + * The size of the iomap set in *regs is generally larger than the + * register mmio space since it includes unused regions and/or + * non-register regions such as the GGTT PTEs. + */ + size_t regs_size; + + /** @adj_limit: adjust MMIO address if address is below this value */ + u32 adj_limit; + + /** @adj_offset: offset to add to MMIO address when adjusting */ + u32 adj_offset; +}; + +/** + * struct xe_mmio_range - register range structure + * + * @start: first register offset in the range. + * @end: last register offset in the range. + */ +struct xe_mmio_range { + u32 start; + u32 end; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index 54822497c21e..1d19df860bea 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -600,6 +600,7 @@ static unsigned int get_mocs_settings(struct xe_device *xe, info->wb_index = 4; info->unused_entries_index = 4; break; + case XE_NOVALAKE_P: case XE_NOVALAKE_S: case XE_PANTHERLAKE: case XE_LUNARLAKE: diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index a0048f64ed12..4cb578182912 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -10,6 +10,7 @@ #include +#include "xe_defaults.h" #include "xe_device_types.h" #include "xe_drv.h" #include "xe_configfs.h" @@ -19,51 +20,38 @@ #include "xe_observation.h" #include "xe_sched_job.h" -#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) -#define DEFAULT_GUC_LOG_LEVEL 3 -#else -#define DEFAULT_GUC_LOG_LEVEL 1 -#endif - -#define DEFAULT_PROBE_DISPLAY true -#define DEFAULT_VRAM_BAR_SIZE 0 -#define DEFAULT_FORCE_PROBE CONFIG_DRM_XE_FORCE_PROBE -#define DEFAULT_MAX_VFS ~0 -#define DEFAULT_MAX_VFS_STR "unlimited" -#define DEFAULT_WEDGED_MODE XE_WEDGED_MODE_DEFAULT -#define DEFAULT_WEDGED_MODE_STR XE_WEDGED_MODE_DEFAULT_STR -#define DEFAULT_SVM_NOTIFIER_SIZE 512 - struct xe_modparam xe_modparam = { - .probe_display = DEFAULT_PROBE_DISPLAY, - .guc_log_level = DEFAULT_GUC_LOG_LEVEL, - .force_probe = DEFAULT_FORCE_PROBE, + .probe_display = XE_DEFAULT_PROBE_DISPLAY, + .guc_log_level = XE_DEFAULT_GUC_LOG_LEVEL, + .force_probe = XE_DEFAULT_FORCE_PROBE, #ifdef CONFIG_PCI_IOV - .max_vfs = DEFAULT_MAX_VFS, + .max_vfs = XE_DEFAULT_MAX_VFS, #endif - .wedged_mode = DEFAULT_WEDGED_MODE, - .svm_notifier_size = DEFAULT_SVM_NOTIFIER_SIZE, + .wedged_mode = XE_DEFAULT_WEDGED_MODE, + .svm_notifier_size = XE_DEFAULT_SVM_NOTIFIER_SIZE, /* the rest are 0 by default */ }; module_param_named(svm_notifier_size, xe_modparam.svm_notifier_size, uint, 0600); MODULE_PARM_DESC(svm_notifier_size, "Set the svm notifier size in MiB, must be power of 2 " - "[default=" __stringify(DEFAULT_SVM_NOTIFIER_SIZE) "]"); + "[default=" __stringify(XE_DEFAULT_SVM_NOTIFIER_SIZE) "]"); module_param_named_unsafe(force_execlist, xe_modparam.force_execlist, bool, 0444); MODULE_PARM_DESC(force_execlist, "Force Execlist submission"); +#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) module_param_named(probe_display, xe_modparam.probe_display, bool, 0444); MODULE_PARM_DESC(probe_display, "Probe display HW, otherwise it's left untouched " - "[default=" __stringify(DEFAULT_PROBE_DISPLAY) "])"); + "[default=" __stringify(XE_DEFAULT_PROBE_DISPLAY) "])"); +#endif module_param_named(vram_bar_size, xe_modparam.force_vram_bar_size, int, 0600); MODULE_PARM_DESC(vram_bar_size, "Set the vram bar size in MiB (<0=disable-resize, 0=max-needed-size, >0=force-size " - "[default=" __stringify(DEFAULT_VRAM_BAR_SIZE) "])"); + "[default=" __stringify(XE_DEFAULT_VRAM_BAR_SIZE) "])"); module_param_named(guc_log_level, xe_modparam.guc_log_level, int, 0600); MODULE_PARM_DESC(guc_log_level, "GuC firmware logging level (0=disable, 1=normal, 2..5=verbose-levels " - "[default=" __stringify(DEFAULT_GUC_LOG_LEVEL) "])"); + "[default=" __stringify(XE_DEFAULT_GUC_LOG_LEVEL) "])"); module_param_named_unsafe(guc_firmware_path, xe_modparam.guc_firmware_path, charp, 0400); MODULE_PARM_DESC(guc_firmware_path, @@ -80,20 +68,20 @@ MODULE_PARM_DESC(gsc_firmware_path, module_param_named_unsafe(force_probe, xe_modparam.force_probe, charp, 0400); MODULE_PARM_DESC(force_probe, "Force probe options for specified devices. See CONFIG_DRM_XE_FORCE_PROBE for details " - "[default=" DEFAULT_FORCE_PROBE "])"); + "[default=" XE_DEFAULT_FORCE_PROBE "])"); #ifdef CONFIG_PCI_IOV module_param_named(max_vfs, xe_modparam.max_vfs, uint, 0400); MODULE_PARM_DESC(max_vfs, "Limit number of Virtual Functions (VFs) that could be managed. " "(0=no VFs; N=allow up to N VFs " - "[default=" DEFAULT_MAX_VFS_STR "])"); + "[default=" XE_DEFAULT_MAX_VFS_STR "])"); #endif module_param_named_unsafe(wedged_mode, xe_modparam.wedged_mode, uint, 0600); MODULE_PARM_DESC(wedged_mode, "Module's default policy for the wedged mode (0=never, 1=upon-critical-error, 2=upon-any-hang-no-reset " - "[default=" DEFAULT_WEDGED_MODE_STR "])"); + "[default=" XE_DEFAULT_WEDGED_MODE_STR "])"); static int xe_check_nomodeset(void) { diff --git a/drivers/gpu/drm/xe/xe_nvm.c b/drivers/gpu/drm/xe/xe_nvm.c index 9c4ccd3b39d4..33487e91f366 100644 --- a/drivers/gpu/drm/xe/xe_nvm.c +++ b/drivers/gpu/drm/xe/xe_nvm.c @@ -6,7 +6,7 @@ #include #include -#include "xe_device_types.h" +#include "xe_device.h" #include "xe_mmio.h" #include "xe_nvm.h" #include "xe_pcode_api.h" @@ -133,12 +133,10 @@ int xe_nvm_init(struct xe_device *xe) if (WARN_ON(xe->nvm)) return -EFAULT; - xe->nvm = kzalloc_obj(*nvm); - if (!xe->nvm) + nvm = kzalloc_obj(*nvm); + if (!nvm) return -ENOMEM; - nvm = xe->nvm; - nvm->writable_override = xe_nvm_writable_override(xe); nvm->non_posted_erase = xe_nvm_non_posted_erase(xe); nvm->bar.parent = &pdev->resource[0]; @@ -165,7 +163,6 @@ int xe_nvm_init(struct xe_device *xe) if (ret) { drm_err(&xe->drm, "xe-nvm aux init failed %d\n", ret); kfree(nvm); - xe->nvm = NULL; return ret; } @@ -173,8 +170,9 @@ int xe_nvm_init(struct xe_device *xe) if (ret) { drm_err(&xe->drm, "xe-nvm aux add failed %d\n", ret); auxiliary_device_uninit(aux_dev); - xe->nvm = NULL; return ret; } + + xe->nvm = nvm; return devm_add_action_or_reset(xe->drm.dev, xe_nvm_fini, xe); } diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 4dd3f29933cf..dcd393b0931a 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -29,7 +29,7 @@ #include "xe_gt.h" #include "xe_gt_mcr.h" #include "xe_gt_printk.h" -#include "xe_guc_pc.h" +#include "xe_guc_rc.h" #include "xe_macros.h" #include "xe_mmio.h" #include "xe_oa.h" @@ -873,10 +873,6 @@ static void xe_oa_stream_destroy(struct xe_oa_stream *stream) xe_force_wake_put(gt_to_fw(gt), stream->fw_ref); xe_pm_runtime_put(stream->oa->xe); - /* Wa_1509372804:pvc: Unset the override of GUCRC mode to enable rc6 */ - if (stream->override_gucrc) - xe_gt_WARN_ON(gt, xe_guc_pc_unset_gucrc_mode(>->uc.guc.pc)); - xe_oa_free_configs(stream); xe_file_put(stream->xef); } @@ -969,7 +965,7 @@ static void xe_oa_config_cb(struct dma_fence *fence, struct dma_fence_cb *cb) struct xe_oa_fence *ofence = container_of(cb, typeof(*ofence), cb); INIT_DELAYED_WORK(&ofence->work, xe_oa_fence_work_fn); - queue_delayed_work(system_unbound_wq, &ofence->work, + queue_delayed_work(system_dfl_wq, &ofence->work, usecs_to_jiffies(NOA_PROGRAM_ADDITIONAL_DELAY_US)); dma_fence_put(fence); } @@ -1760,19 +1756,6 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, goto exit; } - /* - * GuC reset of engines causes OA to lose configuration - * state. Prevent this by overriding GUCRC mode. - */ - if (XE_GT_WA(stream->gt, 1509372804)) { - ret = xe_guc_pc_override_gucrc_mode(>->uc.guc.pc, - SLPC_GUCRC_MODE_GUCRC_NO_RC6); - if (ret) - goto err_free_configs; - - stream->override_gucrc = true; - } - /* Take runtime pm ref and forcewake to disable RC6 */ xe_pm_runtime_get(stream->oa->xe); stream->fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); @@ -1823,9 +1806,6 @@ err_free_oa_buf: err_fw_put: xe_force_wake_put(gt_to_fw(gt), stream->fw_ref); xe_pm_runtime_put(stream->oa->xe); - if (stream->override_gucrc) - xe_gt_WARN_ON(gt, xe_guc_pc_unset_gucrc_mode(>->uc.guc.pc)); -err_free_configs: xe_oa_free_configs(stream); exit: xe_file_put(stream->xef); diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h index 08cc8d7c2215..b03ffd513483 100644 --- a/drivers/gpu/drm/xe/xe_oa_types.h +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -239,9 +239,6 @@ struct xe_oa_stream { /** @poll_period_ns: hrtimer period for checking OA buffer for available data */ u64 poll_period_ns; - /** @override_gucrc: GuC RC has been overridden for the OA stream */ - bool override_gucrc; - /** @oa_status: temporary storage for oa_status register value */ u32 oa_status; diff --git a/drivers/gpu/drm/xe/xe_pagefault.c b/drivers/gpu/drm/xe/xe_pagefault.c index 6bee53d6ffc3..ea4857acf28d 100644 --- a/drivers/gpu/drm/xe/xe_pagefault.c +++ b/drivers/gpu/drm/xe/xe_pagefault.c @@ -136,7 +136,7 @@ unlock_dma_resv: static bool xe_pagefault_access_is_atomic(enum xe_pagefault_access_type access_type) { - return access_type == XE_PAGEFAULT_ACCESS_TYPE_ATOMIC; + return (access_type & XE_PAGEFAULT_ACCESS_TYPE_MASK) == XE_PAGEFAULT_ACCESS_TYPE_ATOMIC; } static struct xe_vm *xe_pagefault_asid_to_vm(struct xe_device *xe, u32 asid) @@ -164,7 +164,7 @@ static int xe_pagefault_service(struct xe_pagefault *pf) bool atomic; /* Producer flagged this fault to be nacked */ - if (pf->consumer.fault_level == XE_PAGEFAULT_LEVEL_NACK) + if (pf->consumer.fault_type_level == XE_PAGEFAULT_TYPE_LEVEL_NACK) return -EFAULT; vm = xe_pagefault_asid_to_vm(xe, pf->consumer.asid); @@ -225,17 +225,20 @@ static void xe_pagefault_print(struct xe_pagefault *pf) { xe_gt_info(pf->gt, "\n\tASID: %d\n" "\tFaulted Address: 0x%08x%08x\n" - "\tFaultType: %d\n" - "\tAccessType: %d\n" - "\tFaultLevel: %d\n" + "\tFaultType: %lu\n" + "\tAccessType: %lu\n" + "\tFaultLevel: %lu\n" "\tEngineClass: %d %s\n" "\tEngineInstance: %d\n", pf->consumer.asid, upper_32_bits(pf->consumer.page_addr), lower_32_bits(pf->consumer.page_addr), - pf->consumer.fault_type, - pf->consumer.access_type, - pf->consumer.fault_level, + FIELD_GET(XE_PAGEFAULT_TYPE_MASK, + pf->consumer.fault_type_level), + FIELD_GET(XE_PAGEFAULT_ACCESS_TYPE_MASK, + pf->consumer.access_type), + FIELD_GET(XE_PAGEFAULT_LEVEL_MASK, + pf->consumer.fault_type_level), pf->consumer.engine_class, xe_hw_engine_class_to_str(pf->consumer.engine_class), pf->consumer.engine_instance); @@ -259,9 +262,15 @@ static void xe_pagefault_queue_work(struct work_struct *w) err = xe_pagefault_service(&pf); if (err) { - xe_pagefault_print(&pf); - xe_gt_info(pf.gt, "Fault response: Unsuccessful %pe\n", - ERR_PTR(err)); + if (!(pf.consumer.access_type & XE_PAGEFAULT_ACCESS_PREFETCH)) { + xe_pagefault_print(&pf); + xe_gt_info(pf.gt, "Fault response: Unsuccessful %pe\n", + ERR_PTR(err)); + } else { + xe_gt_stats_incr(pf.gt, XE_GT_STATS_ID_INVALID_PREFETCH_PAGEFAULT_COUNT, 1); + xe_gt_dbg(pf.gt, "Prefetch Fault response: Unsuccessful %pe\n", + ERR_PTR(err)); + } } pf.producer.ops->ack_fault(&pf, err); diff --git a/drivers/gpu/drm/xe/xe_pagefault_types.h b/drivers/gpu/drm/xe/xe_pagefault_types.h index d3b516407d60..b3289219b1be 100644 --- a/drivers/gpu/drm/xe/xe_pagefault_types.h +++ b/drivers/gpu/drm/xe/xe_pagefault_types.h @@ -68,24 +68,26 @@ struct xe_pagefault { /** @consumer.asid: address space ID */ u32 asid; /** - * @consumer.access_type: access type, u8 rather than enum to - * keep size compact + * @consumer.access_type: access type and prefetch flag packed + * into a u8. */ u8 access_type; +#define XE_PAGEFAULT_ACCESS_TYPE_MASK GENMASK(1, 0) +#define XE_PAGEFAULT_ACCESS_PREFETCH BIT(7) /** - * @consumer.fault_type: fault type, u8 rather than enum to - * keep size compact + * @consumer.fault_type_level: fault type and level, u8 rather + * than enum to keep size compact */ - u8 fault_type; -#define XE_PAGEFAULT_LEVEL_NACK 0xff /* Producer indicates nack fault */ - /** @consumer.fault_level: fault level */ - u8 fault_level; + u8 fault_type_level; +#define XE_PAGEFAULT_TYPE_LEVEL_NACK 0xff /* Producer indicates nack fault */ +#define XE_PAGEFAULT_LEVEL_MASK GENMASK(3, 0) +#define XE_PAGEFAULT_TYPE_MASK GENMASK(7, 4) /** @consumer.engine_class: engine class */ u8 engine_class; /** @consumer.engine_instance: engine instance */ u8 engine_instance; /** consumer.reserved: reserved bits for future expansion */ - u8 reserved[7]; + u64 reserved; } consumer; /** * @producer: State for the producer (i.e., HW/FW interface). Populated diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index 14d0dce5190a..f840d9a58740 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -88,6 +88,7 @@ struct xe_pat_ops { void (*program_media)(struct xe_gt *gt, const struct xe_pat_table_entry table[], int n_entries); int (*dump)(struct xe_gt *gt, struct drm_printer *p); + void (*entry_dump)(struct drm_printer *p, const char *label, u32 pat, bool rsvd); }; static const struct xe_pat_table_entry xelp_pat_table[] = { @@ -123,7 +124,8 @@ static const struct xe_pat_table_entry xelpg_pat_table[] = { * - no_promote: 0=promotable, 1=no promote * - comp_en: 0=disable, 1=enable * - l3clos: L3 class of service (0-3) - * - l3_policy: 0=WB, 1=XD ("WB - Transient Display"), 3=UC + * - l3_policy: 0=WB, 1=XD ("WB - Transient Display"), + * 2=XA ("WB - Transient App" for Xe3p), 3=UC * - l4_policy: 0=WB, 1=WT, 3=UC * - coh_mode: 0=no snoop, 2=1-way coherent, 3=2-way coherent * @@ -252,6 +254,44 @@ static const struct xe_pat_table_entry xe3p_xpc_pat_table[] = { [31] = XE3P_XPC_PAT( 0, 3, 0, 0, 3 ), }; +static const struct xe_pat_table_entry xe3p_primary_pat_pta = XE2_PAT(0, 0, 0, 0, 0, 3); +static const struct xe_pat_table_entry xe3p_media_pat_pta = XE2_PAT(0, 0, 0, 0, 0, 2); + +static const struct xe_pat_table_entry xe3p_lpg_pat_table[] = { + [ 0] = XE2_PAT( 0, 0, 0, 0, 3, 0 ), + [ 1] = XE2_PAT( 0, 0, 0, 0, 3, 2 ), + [ 2] = XE2_PAT( 0, 0, 0, 0, 3, 3 ), + [ 3] = XE2_PAT( 0, 0, 0, 3, 3, 0 ), + [ 4] = XE2_PAT( 0, 0, 0, 3, 0, 2 ), + [ 5] = XE2_PAT( 0, 0, 0, 3, 3, 2 ), + [ 6] = XE2_PAT( 1, 0, 0, 1, 3, 0 ), + [ 7] = XE2_PAT( 0, 0, 0, 3, 0, 3 ), + [ 8] = XE2_PAT( 0, 0, 0, 3, 0, 0 ), + [ 9] = XE2_PAT( 0, 1, 0, 0, 3, 0 ), + [10] = XE2_PAT( 0, 1, 0, 3, 0, 0 ), + [11] = XE2_PAT( 1, 1, 0, 1, 3, 0 ), + [12] = XE2_PAT( 0, 1, 0, 3, 3, 0 ), + [13] = XE2_PAT( 0, 0, 0, 0, 0, 0 ), + [14] = XE2_PAT( 0, 1, 0, 0, 0, 0 ), + [15] = XE2_PAT( 1, 1, 0, 1, 1, 0 ), + [16] = XE2_PAT( 0, 1, 0, 0, 3, 2 ), + /* 17 is reserved; leave set to all 0's */ + [18] = XE2_PAT( 1, 0, 0, 2, 3, 0 ), + [19] = XE2_PAT( 1, 0, 0, 2, 3, 2 ), + [20] = XE2_PAT( 0, 0, 1, 0, 3, 0 ), + [21] = XE2_PAT( 0, 1, 1, 0, 3, 0 ), + [22] = XE2_PAT( 0, 0, 1, 0, 3, 2 ), + [23] = XE2_PAT( 0, 0, 1, 0, 3, 3 ), + [24] = XE2_PAT( 0, 0, 2, 0, 3, 0 ), + [25] = XE2_PAT( 0, 1, 2, 0, 3, 0 ), + [26] = XE2_PAT( 0, 0, 2, 0, 3, 2 ), + [27] = XE2_PAT( 0, 0, 2, 0, 3, 3 ), + [28] = XE2_PAT( 0, 0, 3, 0, 3, 0 ), + [29] = XE2_PAT( 0, 1, 3, 0, 3, 0 ), + [30] = XE2_PAT( 0, 0, 3, 0, 3, 2 ), + [31] = XE2_PAT( 0, 0, 3, 0, 3, 3 ), +}; + u16 xe_pat_index_get_coh_mode(struct xe_device *xe, u16 pat_index) { WARN_ON(pat_index >= xe->pat.n_entries); @@ -284,8 +324,10 @@ static void program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[ if (xe->pat.pat_ats) xe_mmio_write32(>->mmio, XE_REG(_PAT_ATS), xe->pat.pat_ats->value); - if (xe->pat.pat_pta) - xe_mmio_write32(>->mmio, XE_REG(_PAT_PTA), xe->pat.pat_pta->value); + if (xe->pat.pat_primary_pta && xe_gt_is_main_type(gt)) + xe_mmio_write32(>->mmio, XE_REG(_PAT_PTA), xe->pat.pat_primary_pta->value); + if (xe->pat.pat_media_pta && xe_gt_is_media_type(gt)) + xe_mmio_write32(>->mmio, XE_REG(_PAT_PTA), xe->pat.pat_media_pta->value); } static void program_pat_mcr(struct xe_gt *gt, const struct xe_pat_table_entry table[], @@ -301,8 +343,10 @@ static void program_pat_mcr(struct xe_gt *gt, const struct xe_pat_table_entry ta if (xe->pat.pat_ats) xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_ATS), xe->pat.pat_ats->value); - if (xe->pat.pat_pta) - xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_PTA), xe->pat.pat_pta->value); + if (xe->pat.pat_primary_pta && xe_gt_is_main_type(gt)) + xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_PTA), xe->pat.pat_primary_pta->value); + if (xe->pat.pat_media_pta && xe_gt_is_media_type(gt)) + xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_PTA), xe->pat.pat_media_pta->value); } static int xelp_dump(struct xe_gt *gt, struct drm_printer *p) @@ -458,7 +502,7 @@ static int xe2_dump(struct xe_gt *gt, struct drm_printer *p) pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i))); xe_pat_index_label(label, sizeof(label), i); - xe2_pat_entry_dump(p, label, pat, !xe->pat.table[i].valid); + xe->pat.ops->entry_dump(p, label, pat, !xe->pat.table[i].valid); } /* @@ -471,7 +515,7 @@ static int xe2_dump(struct xe_gt *gt, struct drm_printer *p) pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_PTA)); drm_printf(p, "Page Table Access:\n"); - xe2_pat_entry_dump(p, "PTA_MODE", pat, false); + xe->pat.ops->entry_dump(p, "PTA_MODE", pat, false); return 0; } @@ -480,44 +524,14 @@ static const struct xe_pat_ops xe2_pat_ops = { .program_graphics = program_pat_mcr, .program_media = program_pat, .dump = xe2_dump, + .entry_dump = xe2_pat_entry_dump, }; -static int xe3p_xpc_dump(struct xe_gt *gt, struct drm_printer *p) -{ - struct xe_device *xe = gt_to_xe(gt); - u32 pat; - int i; - char label[PAT_LABEL_LEN]; - - CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT); - if (!fw_ref.domains) - return -ETIMEDOUT; - - drm_printf(p, "PAT table: (* = reserved entry)\n"); - - for (i = 0; i < xe->pat.n_entries; i++) { - pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i))); - - xe_pat_index_label(label, sizeof(label), i); - xe3p_xpc_pat_entry_dump(p, label, pat, !xe->pat.table[i].valid); - } - - /* - * Also print PTA_MODE, which describes how the hardware accesses - * PPGTT entries. - */ - pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_PTA)); - - drm_printf(p, "Page Table Access:\n"); - xe3p_xpc_pat_entry_dump(p, "PTA_MODE", pat, false); - - return 0; -} - static const struct xe_pat_ops xe3p_xpc_pat_ops = { .program_graphics = program_pat_mcr, .program_media = program_pat, - .dump = xe3p_xpc_dump, + .dump = xe2_dump, + .entry_dump = xe3p_xpc_pat_entry_dump, }; void xe_pat_init_early(struct xe_device *xe) @@ -527,11 +541,26 @@ void xe_pat_init_early(struct xe_device *xe) xe->pat.ops = &xe3p_xpc_pat_ops; xe->pat.table = xe3p_xpc_pat_table; xe->pat.pat_ats = &xe3p_xpc_pat_ats; - xe->pat.pat_pta = &xe3p_xpc_pat_pta; + xe->pat.pat_primary_pta = &xe3p_xpc_pat_pta; + xe->pat.pat_media_pta = &xe3p_xpc_pat_pta; xe->pat.n_entries = ARRAY_SIZE(xe3p_xpc_pat_table); xe->pat.idx[XE_CACHE_NONE] = 3; xe->pat.idx[XE_CACHE_WT] = 3; /* N/A (no display); use UC */ xe->pat.idx[XE_CACHE_WB] = 2; + } else if (GRAPHICS_VER(xe) == 35) { + xe->pat.ops = &xe2_pat_ops; + xe->pat.table = xe3p_lpg_pat_table; + xe->pat.pat_ats = &xe2_pat_ats; + if (!IS_DGFX(xe)) { + xe->pat.pat_primary_pta = &xe3p_primary_pat_pta; + xe->pat.pat_media_pta = &xe3p_media_pat_pta; + } + xe->pat.n_entries = ARRAY_SIZE(xe3p_lpg_pat_table); + xe->pat.idx[XE_CACHE_NONE] = 3; + xe->pat.idx[XE_CACHE_WT] = 15; + xe->pat.idx[XE_CACHE_WB] = 2; + xe->pat.idx[XE_CACHE_NONE_COMPRESSION] = 12; + xe->pat.idx[XE_CACHE_WB_COMPRESSION] = 16; } else if (GRAPHICS_VER(xe) == 30 || GRAPHICS_VER(xe) == 20) { xe->pat.ops = &xe2_pat_ops; if (GRAPHICS_VER(xe) == 30) { @@ -541,8 +570,10 @@ void xe_pat_init_early(struct xe_device *xe) xe->pat.table = xe2_pat_table; } xe->pat.pat_ats = &xe2_pat_ats; - if (IS_DGFX(xe)) - xe->pat.pat_pta = &xe2_pat_pta; + if (IS_DGFX(xe)) { + xe->pat.pat_primary_pta = &xe2_pat_pta; + xe->pat.pat_media_pta = &xe2_pat_pta; + } /* Wa_16023588340. XXX: Should use XE_WA */ if (GRAPHICS_VERx100(xe) == 2001) @@ -600,20 +631,17 @@ void xe_pat_init_early(struct xe_device *xe) GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100); } - /* VFs can't program nor dump PAT settings */ - if (IS_SRIOV_VF(xe)) - xe->pat.ops = NULL; - - xe_assert(xe, !xe->pat.ops || xe->pat.ops->dump); - xe_assert(xe, !xe->pat.ops || xe->pat.ops->program_graphics); - xe_assert(xe, !xe->pat.ops || MEDIA_VER(xe) < 13 || xe->pat.ops->program_media); + xe_assert(xe, xe->pat.ops->dump); + xe_assert(xe, xe->pat.ops->program_graphics); + xe_assert(xe, MEDIA_VER(xe) < 13 || xe->pat.ops->program_media); + xe_assert(xe, GRAPHICS_VER(xe) < 20 || xe->pat.ops->entry_dump); } void xe_pat_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); - if (!xe->pat.ops) + if (IS_SRIOV_VF(xe)) return; if (xe_gt_is_media_type(gt)) @@ -633,7 +661,7 @@ int xe_pat_dump(struct xe_gt *gt, struct drm_printer *p) { struct xe_device *xe = gt_to_xe(gt); - if (!xe->pat.ops) + if (IS_SRIOV_VF(xe)) return -EOPNOTSUPP; return xe->pat.ops->dump(gt, p); @@ -649,6 +677,8 @@ int xe_pat_dump(struct xe_gt *gt, struct drm_printer *p) int xe_pat_dump_sw_config(struct xe_gt *gt, struct drm_printer *p) { struct xe_device *xe = gt_to_xe(gt); + const struct xe_pat_table_entry *pta_entry = xe_gt_is_main_type(gt) ? + xe->pat.pat_primary_pta : xe->pat.pat_media_pta; char label[PAT_LABEL_LEN]; if (!xe->pat.table || !xe->pat.n_entries) @@ -658,12 +688,9 @@ int xe_pat_dump_sw_config(struct xe_gt *gt, struct drm_printer *p) for (u32 i = 0; i < xe->pat.n_entries; i++) { u32 pat = xe->pat.table[i].value; - if (GRAPHICS_VERx100(xe) == 3511) { + if (GRAPHICS_VER(xe) >= 20) { xe_pat_index_label(label, sizeof(label), i); - xe3p_xpc_pat_entry_dump(p, label, pat, !xe->pat.table[i].valid); - } else if (GRAPHICS_VER(xe) == 30 || GRAPHICS_VER(xe) == 20) { - xe_pat_index_label(label, sizeof(label), i); - xe2_pat_entry_dump(p, label, pat, !xe->pat.table[i].valid); + xe->pat.ops->entry_dump(p, label, pat, !xe->pat.table[i].valid); } else if (xe->info.platform == XE_METEORLAKE) { xelpg_pat_entry_dump(p, i, pat); } else if (xe->info.platform == XE_PVC) { @@ -675,18 +702,18 @@ int xe_pat_dump_sw_config(struct xe_gt *gt, struct drm_printer *p) } } - if (xe->pat.pat_pta) { - u32 pat = xe->pat.pat_pta->value; + if (pta_entry) { + u32 pat = pta_entry->value; drm_printf(p, "Page Table Access:\n"); - xe2_pat_entry_dump(p, "PTA_MODE", pat, false); + xe->pat.ops->entry_dump(p, "PTA_MODE", pat, false); } if (xe->pat.pat_ats) { u32 pat = xe->pat.pat_ats->value; drm_printf(p, "PCIe ATS/PASID:\n"); - xe2_pat_entry_dump(p, "PAT_ATS ", pat, false); + xe->pat.ops->entry_dump(p, "PAT_ATS ", pat, false); } drm_printf(p, "Cache Level:\n"); diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 5c0b3224f20d..b48e84549888 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -52,6 +52,7 @@ __diag_ignore_all("-Woverride-init", "Allow field overrides in table"); static const struct xe_graphics_desc graphics_xelp = { .hw_engine_mask = BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0), + .num_geometry_xecore_fuse_regs = 1, }; #define XE_HP_FEATURES \ @@ -62,6 +63,8 @@ static const struct xe_graphics_desc graphics_xehpg = { BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | BIT(XE_HW_ENGINE_CCS0) | BIT(XE_HW_ENGINE_CCS1) | BIT(XE_HW_ENGINE_CCS2) | BIT(XE_HW_ENGINE_CCS3), + .num_geometry_xecore_fuse_regs = 1, + .num_compute_xecore_fuse_regs = 1, XE_HP_FEATURES, }; @@ -81,12 +84,15 @@ static const struct xe_graphics_desc graphics_xehpc = { .has_asid = 1, .has_atomic_enable_pte_bit = 1, .has_usm = 1, + .num_compute_xecore_fuse_regs = 2, }; static const struct xe_graphics_desc graphics_xelpg = { .hw_engine_mask = BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | BIT(XE_HW_ENGINE_CCS0), + .num_geometry_xecore_fuse_regs = 1, + .num_compute_xecore_fuse_regs = 1, XE_HP_FEATURES, }; @@ -104,6 +110,15 @@ static const struct xe_graphics_desc graphics_xelpg = { static const struct xe_graphics_desc graphics_xe2 = { XE2_GFX_FEATURES, + .num_geometry_xecore_fuse_regs = 3, + .num_compute_xecore_fuse_regs = 3, +}; + +static const struct xe_graphics_desc graphics_xe3p_lpg = { + XE2_GFX_FEATURES, + .multi_queue_engine_class_mask = BIT(XE_ENGINE_CLASS_COPY) | BIT(XE_ENGINE_CLASS_COMPUTE), + .num_geometry_xecore_fuse_regs = 3, + .num_compute_xecore_fuse_regs = 3, }; static const struct xe_graphics_desc graphics_xe3p_xpc = { @@ -112,6 +127,10 @@ static const struct xe_graphics_desc graphics_xe3p_xpc = { .hw_engine_mask = GENMASK(XE_HW_ENGINE_BCS8, XE_HW_ENGINE_BCS1) | GENMASK(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0), + .multi_queue_engine_class_mask = BIT(XE_ENGINE_CLASS_COPY) | + BIT(XE_ENGINE_CLASS_COMPUTE), + .num_geometry_xecore_fuse_regs = 4, + .num_compute_xecore_fuse_regs = 4, }; static const struct xe_media_desc media_xem = { @@ -146,6 +165,7 @@ static const struct xe_ip graphics_ips[] = { { 3003, "Xe3_LPG", &graphics_xe2 }, { 3004, "Xe3_LPG", &graphics_xe2 }, { 3005, "Xe3_LPG", &graphics_xe2 }, + { 3510, "Xe3p_LPG", &graphics_xe3p_lpg }, { 3511, "Xe3p_XPC", &graphics_xe3p_xpc }, }; @@ -164,6 +184,10 @@ static const struct xe_ip media_ips[] = { { 3503, "Xe3p_HPM", &media_xelpmp }, }; +#define MULTI_LRC_MASK \ + .multi_lrc_mask = BIT(XE_ENGINE_CLASS_VIDEO_DECODE) | \ + BIT(XE_ENGINE_CLASS_VIDEO_ENHANCE) + static const struct xe_device_desc tgl_desc = { .pre_gmdid_graphics_ip = &graphics_ip_xelp, .pre_gmdid_media_ip = &media_ip_xem, @@ -174,6 +198,7 @@ static const struct xe_device_desc tgl_desc = { .has_llc = true, .has_sriov = true, .max_gt_per_tile = 1, + MULTI_LRC_MASK, .require_force_probe = true, .va_bits = 48, .vm_max_level = 3, @@ -188,6 +213,7 @@ static const struct xe_device_desc rkl_desc = { .has_display = true, .has_llc = true, .max_gt_per_tile = 1, + MULTI_LRC_MASK, .require_force_probe = true, .va_bits = 48, .vm_max_level = 3, @@ -205,6 +231,7 @@ static const struct xe_device_desc adl_s_desc = { .has_llc = true, .has_sriov = true, .max_gt_per_tile = 1, + MULTI_LRC_MASK, .require_force_probe = true, .subplatforms = (const struct xe_subplatform_desc[]) { { XE_SUBPLATFORM_ALDERLAKE_S_RPLS, "RPLS", adls_rpls_ids }, @@ -226,6 +253,7 @@ static const struct xe_device_desc adl_p_desc = { .has_llc = true, .has_sriov = true, .max_gt_per_tile = 1, + MULTI_LRC_MASK, .require_force_probe = true, .subplatforms = (const struct xe_subplatform_desc[]) { { XE_SUBPLATFORM_ALDERLAKE_P_RPLU, "RPLU", adlp_rplu_ids }, @@ -245,6 +273,7 @@ static const struct xe_device_desc adl_n_desc = { .has_llc = true, .has_sriov = true, .max_gt_per_tile = 1, + MULTI_LRC_MASK, .require_force_probe = true, .va_bits = 48, .vm_max_level = 3, @@ -263,6 +292,7 @@ static const struct xe_device_desc dg1_desc = { .has_gsc_nvm = 1, .has_heci_gscfi = 1, .max_gt_per_tile = 1, + MULTI_LRC_MASK, .require_force_probe = true, .va_bits = 48, .vm_max_level = 3, @@ -293,6 +323,7 @@ static const struct xe_device_desc ats_m_desc = { .pre_gmdid_media_ip = &media_ip_xehpm, .dma_mask_size = 46, .max_gt_per_tile = 1, + MULTI_LRC_MASK, .require_force_probe = true, DG2_FEATURES, @@ -305,6 +336,7 @@ static const struct xe_device_desc dg2_desc = { .pre_gmdid_media_ip = &media_ip_xehpm, .dma_mask_size = 46, .max_gt_per_tile = 1, + MULTI_LRC_MASK, .require_force_probe = true, DG2_FEATURES, @@ -323,6 +355,7 @@ static const __maybe_unused struct xe_device_desc pvc_desc = { .has_heci_gscfi = 1, .max_gt_per_tile = 1, .max_remote_tiles = 1, + MULTI_LRC_MASK, .require_force_probe = true, .va_bits = 57, .vm_max_level = 4, @@ -338,6 +371,7 @@ static const struct xe_device_desc mtl_desc = { .has_display = true, .has_pxp = true, .max_gt_per_tile = 2, + MULTI_LRC_MASK, .va_bits = 48, .vm_max_level = 3, }; @@ -349,6 +383,7 @@ static const struct xe_device_desc lnl_desc = { .has_flat_ccs = 1, .has_pxp = true, .max_gt_per_tile = 2, + MULTI_LRC_MASK, .needs_scratch = true, .va_bits = 48, .vm_max_level = 4, @@ -373,6 +408,7 @@ static const struct xe_device_desc bmg_desc = { .has_soc_remapper_telem = true, .has_sriov = true, .max_gt_per_tile = 2, + MULTI_LRC_MASK, .needs_scratch = true, .subplatforms = (const struct xe_subplatform_desc[]) { { XE_SUBPLATFORM_BATTLEMAGE_G21, "G21", bmg_g21_ids }, @@ -391,6 +427,7 @@ static const struct xe_device_desc ptl_desc = { .has_pre_prod_wa = 1, .has_pxp = true, .max_gt_per_tile = 2, + MULTI_LRC_MASK, .needs_scratch = true, .needs_shared_vf_gt_wq = true, .va_bits = 48, @@ -404,6 +441,7 @@ static const struct xe_device_desc nvls_desc = { .has_flat_ccs = 1, .has_pre_prod_wa = 1, .max_gt_per_tile = 2, + MULTI_LRC_MASK, .require_force_probe = true, .va_bits = 48, .vm_max_level = 4, @@ -425,11 +463,27 @@ static const struct xe_device_desc cri_desc = { .has_soc_remapper_telem = true, .has_sriov = true, .max_gt_per_tile = 2, + MULTI_LRC_MASK, .require_force_probe = true, .va_bits = 57, .vm_max_level = 4, }; +static const struct xe_device_desc nvlp_desc = { + PLATFORM(NOVALAKE_P), + .dma_mask_size = 46, + .has_cached_pt = true, + .has_display = true, + .has_flat_ccs = 1, + .has_page_reclaim_hw_assist = true, + .has_pre_prod_wa = true, + .max_gt_per_tile = 2, + MULTI_LRC_MASK, + .require_force_probe = true, + .va_bits = 48, + .vm_max_level = 4, +}; + #undef PLATFORM __diag_pop(); @@ -459,6 +513,7 @@ static const struct pci_device_id pciidlist[] = { INTEL_WCL_IDS(INTEL_VGA_DEVICE, &ptl_desc), INTEL_NVLS_IDS(INTEL_VGA_DEVICE, &nvls_desc), INTEL_CRI_IDS(INTEL_PCI_DEVICE, &cri_desc), + INTEL_NVLP_IDS(INTEL_VGA_DEVICE, &nvlp_desc), { } }; MODULE_DEVICE_TABLE(pci, pciidlist); @@ -710,6 +765,7 @@ static int xe_info_init_early(struct xe_device *xe, xe->info.skip_pcode = desc->skip_pcode; xe->info.needs_scratch = desc->needs_scratch; xe->info.needs_shared_vf_gt_wq = desc->needs_shared_vf_gt_wq; + xe->info.multi_lrc_mask = desc->multi_lrc_mask; xe->info.probe_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) && xe_modparam.probe_display && @@ -786,6 +842,8 @@ static struct xe_gt *alloc_primary_gt(struct xe_tile *tile, gt->info.has_indirect_ring_state = graphics_desc->has_indirect_ring_state; gt->info.multi_queue_engine_class_mask = graphics_desc->multi_queue_engine_class_mask; gt->info.engine_mask = graphics_desc->hw_engine_mask; + gt->info.num_geometry_xecore_fuse_regs = graphics_desc->num_geometry_xecore_fuse_regs; + gt->info.num_compute_xecore_fuse_regs = graphics_desc->num_compute_xecore_fuse_regs; /* * Before media version 13, the media IP was part of the primary GT @@ -892,6 +950,7 @@ static int xe_info_init(struct xe_device *xe, xe->info.has_device_atomics_on_smem = 1; xe->info.has_range_tlb_inval = graphics_desc->has_range_tlb_inval; + xe->info.has_ctx_tlb_inval = graphics_desc->has_ctx_tlb_inval; xe->info.has_usm = graphics_desc->has_usm; xe->info.has_64bit_timestamp = graphics_desc->has_64bit_timestamp; xe->info.has_mem_copy_instr = GRAPHICS_VER(xe) >= 20; diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h index c5fe9b1836d2..47e8a1552c2b 100644 --- a/drivers/gpu/drm/xe/xe_pci_types.h +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -30,6 +30,7 @@ struct xe_device_desc { u8 dma_mask_size; u8 max_remote_tiles:2; u8 max_gt_per_tile:2; + u8 multi_lrc_mask; u8 va_bits; u8 vm_max_level; u8 vram_flags; @@ -66,11 +67,14 @@ struct xe_device_desc { struct xe_graphics_desc { u64 hw_engine_mask; /* hardware engines provided by graphics IP */ u16 multi_queue_engine_class_mask; /* bitmask of engine classes which support multi queue */ + u8 num_geometry_xecore_fuse_regs; + u8 num_compute_xecore_fuse_regs; u8 has_asid:1; u8 has_atomic_enable_pte_bit:1; u8 has_indirect_ring_state:1; u8 has_range_tlb_inval:1; + u8 has_ctx_tlb_inval:1; u8 has_usm:1; u8 has_64bit_timestamp:1; }; diff --git a/drivers/gpu/drm/xe/xe_platform_types.h b/drivers/gpu/drm/xe/xe_platform_types.h index f516dbddfd88..6cff385227ea 100644 --- a/drivers/gpu/drm/xe/xe_platform_types.h +++ b/drivers/gpu/drm/xe/xe_platform_types.h @@ -26,6 +26,7 @@ enum xe_platform { XE_PANTHERLAKE, XE_NOVALAKE_S, XE_CRESCENTISLAND, + XE_NOVALAKE_P, }; enum xe_subplatform { diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index f96f2844c5ba..34db266b723f 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -142,9 +142,6 @@ query_engine_cycles(struct xe_device *xe, return -EINVAL; eci = &resp.eci; - if (eci->gt_id >= xe->info.max_gt_per_tile) - return -EINVAL; - gt = xe_device_get_gt(xe, eci->gt_id); if (!gt) return -EINVAL; diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c index 2e5c78940b41..2df0277efb2f 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.c +++ b/drivers/gpu/drm/xe/xe_reg_sr.c @@ -13,6 +13,7 @@ #include #include +#include "xe_assert.h" #include "xe_device.h" #include "xe_device_types.h" #include "xe_force_wake.h" @@ -20,6 +21,7 @@ #include "xe_gt_printk.h" #include "xe_gt_types.h" #include "xe_hw_engine_types.h" +#include "xe_lrc.h" #include "xe_mmio.h" #include "xe_rtp_types.h" @@ -98,10 +100,12 @@ int xe_reg_sr_add(struct xe_reg_sr *sr, *pentry = *e; ret = xa_err(xa_store(&sr->xa, idx, pentry, GFP_KERNEL)); if (ret) - goto fail; + goto fail_free; return 0; +fail_free: + kfree(pentry); fail: xe_gt_err(gt, "discarding save-restore reg %04lx (clear: %08x, set: %08x, masked: %s, mcr: %s): ret=%d\n", @@ -169,8 +173,11 @@ void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt) if (xa_empty(&sr->xa)) return; - if (IS_SRIOV_VF(gt_to_xe(gt))) - return; + /* + * We don't process non-LRC reg_sr lists in VF, so they should have + * been empty in the check above. + */ + xe_gt_assert(gt, !IS_SRIOV_VF(gt_to_xe(gt))); xe_gt_dbg(gt, "Applying %s save-restore MMIOs\n", sr->name); @@ -204,3 +211,66 @@ void xe_reg_sr_dump(struct xe_reg_sr *sr, struct drm_printer *p) str_yes_no(entry->reg.masked), str_yes_no(entry->reg.mcr)); } + +static u32 readback_reg(struct xe_gt *gt, struct xe_reg reg) +{ + struct xe_reg_mcr mcr_reg = to_xe_reg_mcr(reg); + + if (reg.mcr) + return xe_gt_mcr_unicast_read_any(gt, mcr_reg); + else + return xe_mmio_read32(>->mmio, reg); +} + +/** + * xe_reg_sr_readback_check() - Readback registers referenced in save/restore + * entries and check whether the programming is in place. + * @sr: Save/restore entries + * @gt: GT to read register from + * @p: DRM printer to report discrepancies on + */ +void xe_reg_sr_readback_check(struct xe_reg_sr *sr, + struct xe_gt *gt, + struct drm_printer *p) +{ + struct xe_reg_sr_entry *entry; + unsigned long offset; + + xa_for_each(&sr->xa, offset, entry) { + u32 val = readback_reg(gt, entry->reg); + u32 mask = entry->clr_bits | entry->set_bits; + + if ((val & mask) != entry->set_bits) + drm_printf(p, "%#8lx & %#10x :: expected %#10x got %#10x\n", + offset, mask, entry->set_bits, val & mask); + } +} + +/** + * xe_reg_sr_lrc_check() - Check LRC for registers referenced in save/restore + * entries and check whether the programming is in place. + * @sr: Save/restore entries + * @gt: GT to read register from + * @hwe: Hardware engine type to check LRC for + * @p: DRM printer to report discrepancies on + */ +void xe_reg_sr_lrc_check(struct xe_reg_sr *sr, + struct xe_gt *gt, + struct xe_hw_engine *hwe, + struct drm_printer *p) +{ + struct xe_reg_sr_entry *entry; + unsigned long offset; + + xa_for_each(&sr->xa, offset, entry) { + u32 val; + int ret = xe_lrc_lookup_default_reg_value(gt, hwe->class, offset, &val); + u32 mask = entry->clr_bits | entry->set_bits; + + if (ret == -ENOENT) + drm_printf(p, "%#8lx :: not found in LRC for %s\n", offset, hwe->name); + else if ((val & mask) != entry->set_bits) + drm_printf(p, "%#8lx & %#10x :: expected %#10x got %#10x\n", + offset, mask, entry->set_bits, val & mask); + } +} diff --git a/drivers/gpu/drm/xe/xe_reg_sr.h b/drivers/gpu/drm/xe/xe_reg_sr.h index 51fbba423e27..1ec6e8ecf278 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.h +++ b/drivers/gpu/drm/xe/xe_reg_sr.h @@ -19,6 +19,13 @@ struct drm_printer; int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe); void xe_reg_sr_dump(struct xe_reg_sr *sr, struct drm_printer *p); +void xe_reg_sr_readback_check(struct xe_reg_sr *sr, + struct xe_gt *gt, + struct drm_printer *p); +void xe_reg_sr_lrc_check(struct xe_reg_sr *sr, + struct xe_gt *gt, + struct xe_hw_engine *hwe, + struct drm_printer *p); int xe_reg_sr_add(struct xe_reg_sr *sr, const struct xe_reg_sr_entry *e, struct xe_gt *gt); diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c index 1d36c09681aa..80577e4b7437 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c @@ -75,7 +75,15 @@ static const struct xe_rtp_entry_sr register_whitelist[] = { XE_RTP_ACTIONS(WHITELIST(CSBE_DEBUG_STATUS(RENDER_RING_BASE), 0)) }, { XE_RTP_NAME("14024997852"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3005), ENGINE_CLASS(RENDER)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 3005), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(WHITELIST(FF_MODE, + RING_FORCE_TO_NONPRIV_ACCESS_RW), + WHITELIST(VFLSKPD, + RING_FORCE_TO_NONPRIV_ACCESS_RW)) + }, + { XE_RTP_NAME("14024997852"), + XE_RTP_RULES(GRAPHICS_VERSION(3510), GRAPHICS_STEP(A0, B0), + ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(WHITELIST(FF_MODE, RING_FORCE_TO_NONPRIV_ACCESS_RW), WHITELIST(VFLSKPD, @@ -181,7 +189,7 @@ void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe) struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); xe_rtp_process_to_sr(&ctx, register_whitelist, ARRAY_SIZE(register_whitelist), - &hwe->reg_whitelist); + &hwe->reg_whitelist, false); whitelist_apply_to_hwe(hwe); } diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 248620b0901d..53d420d72164 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -280,6 +280,9 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc i = emit_bb_start(batch_addr, ppgtt_flag, dw, i); + /* Don't preempt fence signaling */ + dw[i++] = MI_ARB_ON_OFF | MI_ARB_DISABLE; + if (job->user_fence.used) { i = emit_flush_dw(dw, i); i = emit_store_imm_ppgtt_posted(job->user_fence.addr, @@ -345,6 +348,9 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, i = emit_bb_start(batch_addr, ppgtt_flag, dw, i); + /* Don't preempt fence signaling */ + dw[i++] = MI_ARB_ON_OFF | MI_ARB_DISABLE; + if (job->user_fence.used) { i = emit_flush_dw(dw, i); i = emit_store_imm_ppgtt_posted(job->user_fence.addr, @@ -397,6 +403,9 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, i = emit_bb_start(batch_addr, ppgtt_flag, dw, i); + /* Don't preempt fence signaling */ + dw[i++] = MI_ARB_ON_OFF | MI_ARB_DISABLE; + i = emit_render_cache_flush(job, dw, i); if (job->user_fence.used) diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index b7c26e2fb411..7bfdc6795ce6 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -270,6 +270,8 @@ static void rtp_mark_active(struct xe_device *xe, * @sr: Save-restore struct where matching rules execute the action. This can be * viewed as the "coalesced view" of multiple the tables. The bits for each * register set are expected not to collide with previously added entries + * @process_in_vf: Whether this RTP table should get processed for SR-IOV VF + * devices. Should generally only be 'true' for LRC tables. * * Walk the table pointed by @entries (with an empty sentinel) and add all * entries with matching rules to @sr. If @hwe is not NULL, its mmio_base is @@ -278,7 +280,8 @@ static void rtp_mark_active(struct xe_device *xe, void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx, const struct xe_rtp_entry_sr *entries, size_t n_entries, - struct xe_reg_sr *sr) + struct xe_reg_sr *sr, + bool process_in_vf) { const struct xe_rtp_entry_sr *entry; struct xe_hw_engine *hwe = NULL; @@ -287,6 +290,9 @@ void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx, rtp_get_context(ctx, &hwe, >, &xe); + if (!process_in_vf && IS_SRIOV_VF(xe)) + return; + xe_assert(xe, entries); for (entry = entries; entry - entries < n_entries; entry++) { diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index ba5f940c0a96..be4195264286 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -431,7 +431,8 @@ void xe_rtp_process_ctx_enable_active_tracking(struct xe_rtp_process_ctx *ctx, void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx, const struct xe_rtp_entry_sr *entries, - size_t n_entries, struct xe_reg_sr *sr); + size_t n_entries, struct xe_reg_sr *sr, + bool process_in_vf); void xe_rtp_process(struct xe_rtp_process_ctx *ctx, const struct xe_rtp_entry *entries); diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c index b738102575d4..f32045f40b7a 100644 --- a/drivers/gpu/drm/xe/xe_sa.c +++ b/drivers/gpu/drm/xe/xe_sa.c @@ -89,6 +89,12 @@ struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, if (ret) return ERR_PTR(ret); + if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { + fs_reclaim_acquire(GFP_KERNEL); + might_lock(&sa_manager->swap_guard); + fs_reclaim_release(GFP_KERNEL); + } + shadow = xe_managed_bo_create_pin_map(xe, tile, size, XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT | @@ -175,6 +181,36 @@ struct drm_suballoc *__xe_sa_bo_new(struct xe_sa_manager *sa_manager, u32 size, return drm_suballoc_new(&sa_manager->base, size, gfp, true, 0); } +/** + * xe_sa_bo_alloc() - Allocate uninitialized suballoc object. + * @gfp: gfp flags used for memory allocation. + * + * Allocate memory for an uninitialized suballoc object. Intended usage is + * allocate memory for suballoc object outside of a reclaim tainted context + * and then be initialized at a later time in a reclaim tainted context. + * + * Return: a new uninitialized suballoc object, or an ERR_PTR(-ENOMEM). + */ +struct drm_suballoc *xe_sa_bo_alloc(gfp_t gfp) +{ + return drm_suballoc_alloc(gfp); +} + +/** + * xe_sa_bo_init() - Initialize a suballocation. + * @sa_manager: pointer to the sa_manager + * @sa: The struct drm_suballoc. + * @size: number of bytes we want to suballocate. + * + * Try to make a suballocation on a pre-allocated suballoc object of size @size. + * + * Return: zero on success, errno on failure. + */ +int xe_sa_bo_init(struct xe_sa_manager *sa_manager, struct drm_suballoc *sa, size_t size) +{ + return drm_suballoc_insert(&sa_manager->base, sa, size, true, 0); +} + /** * xe_sa_bo_flush_write() - Copy the data from the sub-allocation to the GPU memory. * @sa_bo: the &drm_suballoc to flush diff --git a/drivers/gpu/drm/xe/xe_sa.h b/drivers/gpu/drm/xe/xe_sa.h index 05e9a4e00e78..50218b0d1404 100644 --- a/drivers/gpu/drm/xe/xe_sa.h +++ b/drivers/gpu/drm/xe/xe_sa.h @@ -38,6 +38,8 @@ static inline struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager return __xe_sa_bo_new(sa_manager, size, GFP_KERNEL); } +struct drm_suballoc *xe_sa_bo_alloc(gfp_t gfp); +int xe_sa_bo_init(struct xe_sa_manager *sa_manager, struct drm_suballoc *sa, size_t size); void xe_sa_bo_flush_write(struct drm_suballoc *sa_bo); void xe_sa_bo_sync_read(struct drm_suballoc *sa_bo); void xe_sa_bo_free(struct drm_suballoc *sa_bo, struct dma_fence *fence); diff --git a/drivers/gpu/drm/xe/xe_sleep.h b/drivers/gpu/drm/xe/xe_sleep.h new file mode 100644 index 000000000000..2bc3f4c0ee0b --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sleep.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2026 Intel Corporation + */ + +#ifndef _XE_SLEEP_H_ +#define _XE_SLEEP_H_ + +#include +#include + +/** + * xe_sleep_relaxed_ms() - Sleep for an approximate time. + * @delay_ms: time in msec to sleep + * + * For smaller timeouts, sleep with 0.5ms accuracy. + */ +static inline void xe_sleep_relaxed_ms(unsigned int delay_ms) +{ + unsigned long min_us, max_us; + + if (!delay_ms) + return; + + if (delay_ms > 20) { + msleep(delay_ms); + return; + } + + min_us = mul_u32_u32(delay_ms, 1000); + max_us = min_us + 500; + + usleep_range(min_us, max_us); +} + +/** + * xe_sleep_exponential_ms() - Sleep for a exponentially increased time. + * @sleep_period_ms: current time in msec to sleep + * @max_sleep_ms: maximum time in msec to sleep + * + * Sleep for the @sleep_period_ms and exponentially increase this time for the + * next loop, unless reaching the @max_sleep_ms limit. + * + * Return: approximate time in msec the task was delayed. + */ +static inline unsigned int xe_sleep_exponential_ms(unsigned int *sleep_period_ms, + unsigned int max_sleep_ms) +{ + unsigned int delay_ms = *sleep_period_ms; + unsigned int next_delay_ms = 2 * delay_ms; + + xe_sleep_relaxed_ms(delay_ms); + *sleep_period_ms = min(next_delay_ms, max_sleep_ms); + return delay_ms; +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_soc_remapper.c b/drivers/gpu/drm/xe/xe_soc_remapper.c index 1c391d719196..c031336a6d75 100644 --- a/drivers/gpu/drm/xe/xe_soc_remapper.c +++ b/drivers/gpu/drm/xe/xe_soc_remapper.c @@ -4,6 +4,7 @@ */ #include "regs/xe_soc_remapper_regs.h" +#include "xe_device.h" #include "xe_mmio.h" #include "xe_soc_remapper.h" diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c index ea411944609b..f3835867fce5 100644 --- a/drivers/gpu/drm/xe/xe_sriov.c +++ b/drivers/gpu/drm/xe/xe_sriov.c @@ -120,7 +120,7 @@ int xe_sriov_init(struct xe_device *xe) xe_sriov_vf_init_early(xe); xe_assert(xe, !xe->sriov.wq); - xe->sriov.wq = alloc_workqueue("xe-sriov-wq", 0, 0); + xe->sriov.wq = alloc_workqueue("xe-sriov-wq", WQ_PERCPU, 0); if (!xe->sriov.wq) return -ENOMEM; diff --git a/drivers/gpu/drm/xe/xe_sriov.h b/drivers/gpu/drm/xe/xe_sriov.h index 6db45df55615..72e55543c30e 100644 --- a/drivers/gpu/drm/xe/xe_sriov.h +++ b/drivers/gpu/drm/xe/xe_sriov.h @@ -28,7 +28,8 @@ static inline enum xe_sriov_mode xe_device_sriov_mode(const struct xe_device *xe static inline bool xe_device_is_sriov_pf(const struct xe_device *xe) { - return xe_device_sriov_mode(xe) == XE_SRIOV_MODE_PF; + return IS_ENABLED(CONFIG_PCI_IOV) && + xe_device_sriov_mode(xe) == XE_SRIOV_MODE_PF; } static inline bool xe_device_is_sriov_vf(const struct xe_device *xe) @@ -36,11 +37,7 @@ static inline bool xe_device_is_sriov_vf(const struct xe_device *xe) return xe_device_sriov_mode(xe) == XE_SRIOV_MODE_VF; } -#ifdef CONFIG_PCI_IOV #define IS_SRIOV_PF(xe) xe_device_is_sriov_pf(xe) -#else -#define IS_SRIOV_PF(xe) (typecheck(struct xe_device *, (xe)) && false) -#endif #define IS_SRIOV_VF(xe) xe_device_is_sriov_vf(xe) #define IS_SRIOV(xe) (IS_SRIOV_PF(xe) || IS_SRIOV_VF(xe)) diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.c b/drivers/gpu/drm/xe/xe_sriov_pf.c index 6ce3c58e003c..47a6e0fd66e0 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf.c @@ -20,11 +20,14 @@ #include "xe_sriov_pf_sysfs.h" #include "xe_sriov_printk.h" +static bool wanted_admin_only(struct xe_device *xe) +{ + return xe_configfs_admin_only_pf(to_pci_dev(xe->drm.dev)); +} + static unsigned int wanted_max_vfs(struct xe_device *xe) { - if (IS_ENABLED(CONFIG_CONFIGFS_FS)) - return xe_configfs_get_max_vfs(to_pci_dev(xe->drm.dev)); - return xe_modparam.max_vfs; + return xe_configfs_get_max_vfs(to_pci_dev(xe->drm.dev)); } static int pf_reduce_totalvfs(struct xe_device *xe, int limit) @@ -76,6 +79,7 @@ bool xe_sriov_pf_readiness(struct xe_device *xe) pf_reduce_totalvfs(xe, newlimit); + xe->sriov.pf.admin_only = wanted_admin_only(xe); xe->sriov.pf.device_total_vfs = totalvfs; xe->sriov.pf.driver_max_vfs = newlimit; diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h b/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h index 9054fdc34597..0fcc6cec4afc 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h +++ b/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h @@ -56,7 +56,8 @@ static inline unsigned int xe_sriov_pf_num_vfs(const struct xe_device *xe) */ static inline bool xe_sriov_pf_admin_only(const struct xe_device *xe) { - return !xe->info.probe_display; + xe_assert(xe, IS_SRIOV_PF(xe)); + return xe->sriov.pf.admin_only; } static inline struct mutex *xe_sriov_pf_master_mutex(struct xe_device *xe) diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_provision.c b/drivers/gpu/drm/xe/xe_sriov_pf_provision.c index 01470c42e8a7..abe3677d33ed 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_provision.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf_provision.c @@ -7,6 +7,7 @@ #include "xe_device.h" #include "xe_gt_sriov_pf_config.h" #include "xe_gt_sriov_pf_policy.h" +#include "xe_lmtt.h" #include "xe_sriov.h" #include "xe_sriov_pf_helpers.h" #include "xe_sriov_pf_provision.h" @@ -32,17 +33,6 @@ static bool pf_auto_provisioning_mode(struct xe_device *xe) return xe->sriov.pf.provision.mode == XE_SRIOV_PROVISIONING_MODE_AUTO; } -static bool pf_needs_provisioning(struct xe_gt *gt, unsigned int num_vfs) -{ - unsigned int n; - - for (n = 1; n <= num_vfs; n++) - if (!xe_gt_sriov_pf_config_is_empty(gt, n)) - return false; - - return true; -} - static int pf_provision_vfs(struct xe_device *xe, unsigned int num_vfs) { struct xe_gt *gt; @@ -51,8 +41,6 @@ static int pf_provision_vfs(struct xe_device *xe, unsigned int num_vfs) int err; for_each_gt(gt, xe, id) { - if (!pf_needs_provisioning(gt, num_vfs)) - return -EUCLEAN; err = xe_gt_sriov_pf_config_set_fair(gt, VFID(1), num_vfs); result = result ?: err; } @@ -436,3 +424,108 @@ int xe_sriov_pf_provision_query_vf_priority(struct xe_device *xe, unsigned int v return !count ? -ENODATA : 0; } + +static u64 vram_per_tile(struct xe_tile *tile, u64 total) +{ + struct xe_device *xe = tile->xe; + unsigned int tcount = xe->info.tile_count; + u64 alignment = xe_lmtt_page_size(&tile->sriov.pf.lmtt); + + total = round_up(total, tcount * alignment); + return div_u64(total, tcount); +} + +/** + * xe_sriov_pf_provision_bulk_apply_vram() - Change VRAM provisioning for all VFs. + * @xe: the PF &xe_device + * @size: the VRAM size in [bytes] to set + * + * Change all VFs VRAM (LMEM) provisioning on all tiles. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_provision_bulk_apply_vram(struct xe_device *xe, u64 size) +{ + unsigned int num_vfs = xe_sriov_pf_get_totalvfs(xe); + struct xe_tile *tile; + unsigned int id; + int result = 0; + int err; + + xe_assert(xe, xe_device_has_lmtt(xe)); + + guard(mutex)(xe_sriov_pf_master_mutex(xe)); + + for_each_tile(tile, xe, id) { + err = xe_gt_sriov_pf_config_bulk_set_lmem_locked(tile->primary_gt, + VFID(1), num_vfs, + vram_per_tile(tile, size)); + result = result ?: err; + } + + return result; +} + +/** + * xe_sriov_pf_provision_apply_vf_vram() - Change single VF VRAM allocation. + * @xe: the PF &xe_device + * @vfid: the VF identifier (can't be 0 == PFID) + * @size: VRAM size to set + * + * Change VF's VRAM provisioning on all tiles/GTs. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_provision_apply_vf_vram(struct xe_device *xe, unsigned int vfid, u64 size) +{ + struct xe_tile *tile; + unsigned int id; + int result = 0; + int err; + + xe_assert(xe, vfid); + xe_assert(xe, xe_device_has_lmtt(xe)); + + guard(mutex)(xe_sriov_pf_master_mutex(xe)); + + for_each_tile(tile, xe, id) { + err = xe_gt_sriov_pf_config_set_lmem_locked(tile->primary_gt, vfid, + vram_per_tile(tile, size)); + result = result ?: err; + } + + return result; +} + +/** + * xe_sriov_pf_provision_query_vf_vram() - Query VF's VRAM allocation. + * @xe: the PF &xe_device + * @vfid: the VF identifier (can't be 0 == PFID) + * @size: placeholder for the returned VRAM size + * + * Query VF's VRAM provisioning from all tiles/GTs. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_provision_query_vf_vram(struct xe_device *xe, unsigned int vfid, u64 *size) +{ + struct xe_tile *tile; + unsigned int id; + u64 total = 0; + + xe_assert(xe, vfid); + + guard(mutex)(xe_sriov_pf_master_mutex(xe)); + + for_each_tile(tile, xe, id) + total += xe_gt_sriov_pf_config_get_lmem_locked(tile->primary_gt, vfid); + + *size = total; + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_provision.h b/drivers/gpu/drm/xe/xe_sriov_pf_provision.h index bccf23d51396..f26f49539697 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_provision.h +++ b/drivers/gpu/drm/xe/xe_sriov_pf_provision.h @@ -24,6 +24,10 @@ int xe_sriov_pf_provision_bulk_apply_priority(struct xe_device *xe, u32 prio); int xe_sriov_pf_provision_apply_vf_priority(struct xe_device *xe, unsigned int vfid, u32 prio); int xe_sriov_pf_provision_query_vf_priority(struct xe_device *xe, unsigned int vfid, u32 *prio); +int xe_sriov_pf_provision_bulk_apply_vram(struct xe_device *xe, u64 size); +int xe_sriov_pf_provision_apply_vf_vram(struct xe_device *xe, unsigned int vfid, u64 size); +int xe_sriov_pf_provision_query_vf_vram(struct xe_device *xe, unsigned int vfid, u64 *size); + int xe_sriov_pf_provision_vfs(struct xe_device *xe, unsigned int num_vfs); int xe_sriov_pf_unprovision_vfs(struct xe_device *xe, unsigned int num_vfs); diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.c b/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.c index 0b88cdade6f1..ffc3447fc8d7 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.c @@ -9,6 +9,7 @@ #include #include "xe_assert.h" +#include "xe_device.h" #include "xe_pci_sriov.h" #include "xe_pm.h" #include "xe_sriov.h" @@ -44,7 +45,8 @@ static int emit_choice(char *buf, int choice, const char * const *array, size_t * ├── .bulk_profile * │ ├── exec_quantum_ms * │ ├── preempt_timeout_us - * │ └── sched_priority + * │ ├── sched_priority + * │ └── vram_quota * ├── pf/ * │ ├── ... * │ ├── device -> ../../../BDF @@ -59,7 +61,8 @@ static int emit_choice(char *buf, int choice, const char * const *array, size_t * │ └── profile * │ ├── exec_quantum_ms * │ ├── preempt_timeout_us - * │ └── sched_priority + * │ ├── sched_priority + * │ └── vram_quota * ├── vf2/ * : * └── vfN/ @@ -132,6 +135,7 @@ static XE_SRIOV_DEV_ATTR_WO(NAME) DEFINE_SIMPLE_BULK_PROVISIONING_SRIOV_DEV_ATTR_WO(exec_quantum_ms, eq, u32); DEFINE_SIMPLE_BULK_PROVISIONING_SRIOV_DEV_ATTR_WO(preempt_timeout_us, pt, u32); +DEFINE_SIMPLE_BULK_PROVISIONING_SRIOV_DEV_ATTR_WO(vram_quota, vram, u64); static const char * const sched_priority_names[] = { [GUC_SCHED_PRIORITY_LOW] = "low", @@ -181,12 +185,26 @@ static struct attribute *bulk_profile_dev_attrs[] = { &xe_sriov_dev_attr_exec_quantum_ms.attr, &xe_sriov_dev_attr_preempt_timeout_us.attr, &xe_sriov_dev_attr_sched_priority.attr, + &xe_sriov_dev_attr_vram_quota.attr, NULL }; +static umode_t profile_dev_attr_is_visible(struct kobject *kobj, + struct attribute *attr, int index) +{ + struct xe_sriov_kobj *vkobj = to_xe_sriov_kobj(kobj); + + if (attr == &xe_sriov_dev_attr_vram_quota.attr && + !xe_device_has_lmtt(vkobj->xe)) + return 0; + + return attr->mode; +} + static const struct attribute_group bulk_profile_dev_attr_group = { .name = ".bulk_profile", .attrs = bulk_profile_dev_attrs, + .is_visible = profile_dev_attr_is_visible, }; static const struct attribute_group *xe_sriov_dev_attr_groups[] = { @@ -228,6 +246,7 @@ static XE_SRIOV_VF_ATTR(NAME) DEFINE_SIMPLE_PROVISIONING_SRIOV_VF_ATTR(exec_quantum_ms, eq, u32, "%u\n"); DEFINE_SIMPLE_PROVISIONING_SRIOV_VF_ATTR(preempt_timeout_us, pt, u32, "%u\n"); +DEFINE_SIMPLE_PROVISIONING_SRIOV_VF_ATTR(vram_quota, vram, u64, "%llu\n"); static ssize_t xe_sriov_vf_attr_sched_priority_show(struct xe_device *xe, unsigned int vfid, char *buf) @@ -274,6 +293,7 @@ static struct attribute *profile_vf_attrs[] = { &xe_sriov_vf_attr_exec_quantum_ms.attr, &xe_sriov_vf_attr_preempt_timeout_us.attr, &xe_sriov_vf_attr_sched_priority.attr, + &xe_sriov_vf_attr_vram_quota.attr, NULL }; @@ -286,6 +306,13 @@ static umode_t profile_vf_attr_is_visible(struct kobject *kobj, !sched_priority_change_allowed(vkobj->vfid)) return attr->mode & 0444; + if (attr == &xe_sriov_vf_attr_vram_quota.attr) { + if (!IS_DGFX(vkobj->xe) || vkobj->vfid == PFID) + return 0; + if (!xe_device_has_lmtt(vkobj->xe)) + return attr->mode & 0444; + } + return attr->mode; } diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_types.h b/drivers/gpu/drm/xe/xe_sriov_pf_types.h index b0253e1ae5da..080cf10512f4 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_types.h +++ b/drivers/gpu/drm/xe/xe_sriov_pf_types.h @@ -36,6 +36,9 @@ struct xe_sriov_metadata { * @XE_SRIOV_MODE_PF mode. */ struct xe_device_pf { + /** @admin_only: PF functionality focused on VFs management only. */ + bool admin_only; + /** @device_total_vfs: Maximum number of VFs supported by the device. */ u16 device_total_vfs; diff --git a/drivers/gpu/drm/xe/xe_tile.h b/drivers/gpu/drm/xe/xe_tile.h index 734132eddda5..a2a2c0e936b8 100644 --- a/drivers/gpu/drm/xe/xe_tile.h +++ b/drivers/gpu/drm/xe/xe_tile.h @@ -6,10 +6,10 @@ #ifndef _XE_TILE_H_ #define _XE_TILE_H_ -#include "xe_device_types.h" +#include "xe_tile_types.h" +struct xe_device; struct xe_pagemap; -struct xe_tile; int xe_tile_init_early(struct xe_tile *tile, struct xe_device *xe, u8 id); int xe_tile_init_noalloc(struct xe_tile *tile); diff --git a/drivers/gpu/drm/xe/xe_tile_sriov_vf.c b/drivers/gpu/drm/xe/xe_tile_sriov_vf.c index c9bac2cfdd04..24293521e090 100644 --- a/drivers/gpu/drm/xe/xe_tile_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_tile_sriov_vf.c @@ -14,173 +14,12 @@ #include "xe_tile_sriov_vf.h" #include "xe_wopcm.h" -static int vf_init_ggtt_balloons(struct xe_tile *tile) -{ - struct xe_ggtt *ggtt = tile->mem.ggtt; - - xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); - - tile->sriov.vf.ggtt_balloon[0] = xe_ggtt_node_init(ggtt); - if (IS_ERR(tile->sriov.vf.ggtt_balloon[0])) - return PTR_ERR(tile->sriov.vf.ggtt_balloon[0]); - - tile->sriov.vf.ggtt_balloon[1] = xe_ggtt_node_init(ggtt); - if (IS_ERR(tile->sriov.vf.ggtt_balloon[1])) { - xe_ggtt_node_fini(tile->sriov.vf.ggtt_balloon[0]); - return PTR_ERR(tile->sriov.vf.ggtt_balloon[1]); - } - - return 0; -} - -/** - * xe_tile_sriov_vf_balloon_ggtt_locked - Insert balloon nodes to limit used GGTT address range. - * @tile: the &xe_tile struct instance - * - * Return: 0 on success or a negative error code on failure. - */ -static int xe_tile_sriov_vf_balloon_ggtt_locked(struct xe_tile *tile) -{ - u64 ggtt_base = tile->sriov.vf.self_config.ggtt_base; - u64 ggtt_size = tile->sriov.vf.self_config.ggtt_size; - struct xe_device *xe = tile_to_xe(tile); - u64 wopcm = xe_wopcm_size(xe); - u64 start, end; - int err; - - xe_tile_assert(tile, IS_SRIOV_VF(xe)); - xe_tile_assert(tile, ggtt_size); - lockdep_assert_held(&tile->mem.ggtt->lock); - - /* - * VF can only use part of the GGTT as allocated by the PF: - * - * WOPCM GUC_GGTT_TOP - * |<------------ Total GGTT size ------------------>| - * - * VF GGTT base -->|<- size ->| - * - * +--------------------+----------+-----------------+ - * |////////////////////| block |\\\\\\\\\\\\\\\\\| - * +--------------------+----------+-----------------+ - * - * |<--- balloon[0] --->|<-- VF -->|<-- balloon[1] ->| - */ - - if (ggtt_base < wopcm || ggtt_base > GUC_GGTT_TOP || - ggtt_size > GUC_GGTT_TOP - ggtt_base) { - xe_sriov_err(xe, "tile%u: Invalid GGTT configuration: %#llx-%#llx\n", - tile->id, ggtt_base, ggtt_base + ggtt_size - 1); - return -ERANGE; - } - - start = wopcm; - end = ggtt_base; - if (end != start) { - err = xe_ggtt_node_insert_balloon_locked(tile->sriov.vf.ggtt_balloon[0], - start, end); - if (err) - return err; - } - - start = ggtt_base + ggtt_size; - end = GUC_GGTT_TOP; - if (end != start) { - err = xe_ggtt_node_insert_balloon_locked(tile->sriov.vf.ggtt_balloon[1], - start, end); - if (err) { - xe_ggtt_node_remove_balloon_locked(tile->sriov.vf.ggtt_balloon[0]); - return err; - } - } - - return 0; -} - -static int vf_balloon_ggtt(struct xe_tile *tile) -{ - struct xe_ggtt *ggtt = tile->mem.ggtt; - int err; - - mutex_lock(&ggtt->lock); - err = xe_tile_sriov_vf_balloon_ggtt_locked(tile); - mutex_unlock(&ggtt->lock); - - return err; -} - -/** - * xe_tile_sriov_vf_deballoon_ggtt_locked - Remove balloon nodes. - * @tile: the &xe_tile struct instance - */ -void xe_tile_sriov_vf_deballoon_ggtt_locked(struct xe_tile *tile) -{ - xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); - - xe_ggtt_node_remove_balloon_locked(tile->sriov.vf.ggtt_balloon[1]); - xe_ggtt_node_remove_balloon_locked(tile->sriov.vf.ggtt_balloon[0]); -} - -static void vf_deballoon_ggtt(struct xe_tile *tile) -{ - mutex_lock(&tile->mem.ggtt->lock); - xe_tile_sriov_vf_deballoon_ggtt_locked(tile); - mutex_unlock(&tile->mem.ggtt->lock); -} - -static void vf_fini_ggtt_balloons(struct xe_tile *tile) -{ - xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); - - xe_ggtt_node_fini(tile->sriov.vf.ggtt_balloon[1]); - xe_ggtt_node_fini(tile->sriov.vf.ggtt_balloon[0]); -} - -static void cleanup_ggtt(struct drm_device *drm, void *arg) -{ - struct xe_tile *tile = arg; - - vf_deballoon_ggtt(tile); - vf_fini_ggtt_balloons(tile); -} - -/** - * xe_tile_sriov_vf_prepare_ggtt - Prepare a VF's GGTT configuration. - * @tile: the &xe_tile - * - * This function is for VF use only. - * - * Return: 0 on success or a negative error code on failure. - */ -int xe_tile_sriov_vf_prepare_ggtt(struct xe_tile *tile) -{ - struct xe_device *xe = tile_to_xe(tile); - int err; - - err = vf_init_ggtt_balloons(tile); - if (err) - return err; - - err = vf_balloon_ggtt(tile); - if (err) { - vf_fini_ggtt_balloons(tile); - return err; - } - - return drmm_add_action_or_reset(&xe->drm, cleanup_ggtt, tile); -} - /** * DOC: GGTT nodes shifting during VF post-migration recovery * * The first fixup applied to the VF KMD structures as part of post-migration * recovery is shifting nodes within &xe_ggtt instance. The nodes are moved * from range previously assigned to this VF, into newly provisioned area. - * The changes include balloons, which are resized accordingly. - * - * The balloon nodes are there to eliminate unavailable ranges from use: one - * reserves the GGTT area below the range for current VF, and another one - * reserves area above. * * Below is a GGTT layout of example VF, with a certain address range assigned to * said VF, and inaccessible areas above and below: @@ -198,10 +37,6 @@ int xe_tile_sriov_vf_prepare_ggtt(struct xe_tile *tile) * * |<------- inaccessible for VF ------->||<-- inaccessible for VF ->| * - * GGTT nodes used for tracking allocations: - * - * |<---------- balloon ------------>|<- nodes->|<----- balloon ------>| - * * After the migration, GGTT area assigned to the VF might have shifted, either * to lower or to higher address. But we expect the total size and extra areas to * be identical, as migration can only happen between matching platforms. @@ -219,37 +54,12 @@ int xe_tile_sriov_vf_prepare_ggtt(struct xe_tile *tile) * So the VF has a new slice of GGTT assigned, and during migration process, the * memory content was copied to that new area. But the &xe_ggtt nodes are still * tracking allocations using the old addresses. The nodes within VF owned area - * have to be shifted, and balloon nodes need to be resized to properly mask out - * areas not owned by the VF. + * have to be shifted, and the start offset for GGTT adjusted. * - * Fixed &xe_ggtt nodes used for tracking allocations: - * - * |<------ balloon ------>|<- nodes->|<----------- balloon ----------->| - * - * Due to use of GPU profiles, we do not expect the old and new GGTT ares to + * Due to use of GPU profiles, we do not expect the old and new GGTT areas to * overlap; but our node shifting will fix addresses properly regardless. */ -/** - * xe_tile_sriov_vf_fixup_ggtt_nodes_locked - Shift GGTT allocations to match assigned range. - * @tile: the &xe_tile struct instance - * @shift: the shift value - * - * Since Global GTT is not virtualized, each VF has an assigned range - * within the global space. This range might have changed during migration, - * which requires all memory addresses pointing to GGTT to be shifted. - */ -void xe_tile_sriov_vf_fixup_ggtt_nodes_locked(struct xe_tile *tile, s64 shift) -{ - struct xe_ggtt *ggtt = tile->mem.ggtt; - - lockdep_assert_held(&ggtt->lock); - - xe_tile_sriov_vf_deballoon_ggtt_locked(tile); - xe_ggtt_shift_nodes_locked(ggtt, shift); - xe_tile_sriov_vf_balloon_ggtt_locked(tile); -} - /** * xe_tile_sriov_vf_lmem - VF LMEM configuration. * @tile: the &xe_tile @@ -330,7 +140,7 @@ u64 xe_tile_sriov_vf_ggtt_base(struct xe_tile *tile) xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); - return config->ggtt_base; + return READ_ONCE(config->ggtt_base); } /** @@ -346,5 +156,5 @@ void xe_tile_sriov_vf_ggtt_base_store(struct xe_tile *tile, u64 ggtt_base) xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); - config->ggtt_base = ggtt_base; + WRITE_ONCE(config->ggtt_base, ggtt_base); } diff --git a/drivers/gpu/drm/xe/xe_tile_sriov_vf.h b/drivers/gpu/drm/xe/xe_tile_sriov_vf.h index 749f41504883..f2bbc4fc5734 100644 --- a/drivers/gpu/drm/xe/xe_tile_sriov_vf.h +++ b/drivers/gpu/drm/xe/xe_tile_sriov_vf.h @@ -10,9 +10,6 @@ struct xe_tile; -int xe_tile_sriov_vf_prepare_ggtt(struct xe_tile *tile); -void xe_tile_sriov_vf_deballoon_ggtt_locked(struct xe_tile *tile); -void xe_tile_sriov_vf_fixup_ggtt_nodes_locked(struct xe_tile *tile, s64 shift); u64 xe_tile_sriov_vf_ggtt(struct xe_tile *tile); void xe_tile_sriov_vf_ggtt_store(struct xe_tile *tile, u64 ggtt_size); u64 xe_tile_sriov_vf_ggtt_base(struct xe_tile *tile); diff --git a/drivers/gpu/drm/xe/xe_tile_sysfs.c b/drivers/gpu/drm/xe/xe_tile_sysfs.c index 0bfd28422dc2..510aa0ac4428 100644 --- a/drivers/gpu/drm/xe/xe_tile_sysfs.c +++ b/drivers/gpu/drm/xe/xe_tile_sysfs.c @@ -7,8 +7,8 @@ #include #include +#include "xe_device_types.h" #include "xe_pm.h" -#include "xe_tile.h" #include "xe_tile_sysfs.h" #include "xe_vram_freq.h" diff --git a/drivers/gpu/drm/xe/xe_tile_types.h b/drivers/gpu/drm/xe/xe_tile_types.h new file mode 100644 index 000000000000..33932fd547d7 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tile_types.h @@ -0,0 +1,141 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022-2026 Intel Corporation + */ + +#ifndef _XE_TILE_TYPES_H_ +#define _XE_TILE_TYPES_H_ + +#include +#include + +#include "xe_lmtt_types.h" +#include "xe_memirq_types.h" +#include "xe_mert.h" +#include "xe_mmio_types.h" +#include "xe_tile_sriov_vf_types.h" + +#define tile_to_xe(tile__) \ + _Generic(tile__, \ + const struct xe_tile * : (const struct xe_device *)((tile__)->xe), \ + struct xe_tile * : (tile__)->xe) + +/** + * struct xe_tile - hardware tile structure + * + * From a driver perspective, a "tile" is effectively a complete GPU, containing + * an SGunit, 1-2 GTs, and (for discrete platforms) VRAM. + * + * Multi-tile platforms effectively bundle multiple GPUs behind a single PCI + * device and designate one "root" tile as being responsible for external PCI + * communication. PCI BAR0 exposes the GGTT and MMIO register space for each + * tile in a stacked layout, and PCI BAR2 exposes the local memory associated + * with each tile similarly. Device-wide interrupts can be enabled/disabled + * at the root tile, and the MSTR_TILE_INTR register will report which tiles + * have interrupts that need servicing. + */ +struct xe_tile { + /** @xe: Backpointer to tile's PCI device */ + struct xe_device *xe; + + /** @id: ID of the tile */ + u8 id; + + /** + * @primary_gt: Primary GT + */ + struct xe_gt *primary_gt; + + /** + * @media_gt: Media GT + * + * Only present on devices with media version >= 13. + */ + struct xe_gt *media_gt; + + /** + * @mmio: MMIO info for a tile. + * + * Each tile has its own 16MB space in BAR0, laid out as: + * * 0-4MB: registers + * * 4MB-8MB: reserved + * * 8MB-16MB: global GTT + */ + struct xe_mmio mmio; + + /** @mem: memory management info for tile */ + struct { + /** + * @mem.kernel_vram: kernel-dedicated VRAM info for tile. + * + * Although VRAM is associated with a specific tile, it can + * still be accessed by all tiles' GTs. + */ + struct xe_vram_region *kernel_vram; + + /** + * @mem.vram: general purpose VRAM info for tile. + * + * Although VRAM is associated with a specific tile, it can + * still be accessed by all tiles' GTs. + */ + struct xe_vram_region *vram; + + /** @mem.ggtt: Global graphics translation table */ + struct xe_ggtt *ggtt; + + /** + * @mem.kernel_bb_pool: Pool from which batchbuffers are allocated. + * + * Media GT shares a pool with its primary GT. + */ + struct xe_sa_manager *kernel_bb_pool; + + /** + * @mem.reclaim_pool: Pool for PRLs allocated. + * + * Only main GT has page reclaim list allocations. + */ + struct xe_sa_manager *reclaim_pool; + } mem; + + /** @sriov: tile level virtualization data */ + union { + struct { + /** @sriov.pf.lmtt: Local Memory Translation Table. */ + struct xe_lmtt lmtt; + } pf; + struct { + /** @sriov.vf.ggtt_balloon: GGTT regions excluded from use. */ + struct xe_ggtt_node *ggtt_balloon[2]; + /** @sriov.vf.self_config: VF configuration data */ + struct xe_tile_sriov_vf_selfconfig self_config; + } vf; + } sriov; + + /** @memirq: Memory Based Interrupts. */ + struct xe_memirq memirq; + + /** @csc_hw_error_work: worker to report CSC HW errors */ + struct work_struct csc_hw_error_work; + + /** @pcode: tile's PCODE */ + struct { + /** @pcode.lock: protecting tile's PCODE mailbox data */ + struct mutex lock; + } pcode; + + /** @migrate: Migration helper for vram blits and clearing */ + struct xe_migrate *migrate; + + /** @sysfs: sysfs' kobj used by xe_tile_sysfs */ + struct kobject *sysfs; + + /** @debugfs: debugfs directory associated with this tile */ + struct dentry *debugfs; + + /** @mert: MERT-related data */ + struct xe_mert mert; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_tlb_inval.c b/drivers/gpu/drm/xe/xe_tlb_inval.c index f6d522dbdf86..933f30fb617d 100644 --- a/drivers/gpu/drm/xe/xe_tlb_inval.c +++ b/drivers/gpu/drm/xe/xe_tlb_inval.c @@ -41,11 +41,14 @@ static void xe_tlb_inval_fence_fini(struct xe_tlb_inval_fence *fence) static void xe_tlb_inval_fence_signal(struct xe_tlb_inval_fence *fence) { + struct xe_tlb_inval *tlb_inval = fence->tlb_inval; bool stack = test_bit(FENCE_STACK_BIT, &fence->base.flags); lockdep_assert_held(&fence->tlb_inval->pending_lock); list_del(&fence->link); + if (list_empty(&tlb_inval->pending_fences)) + cancel_delayed_work(&tlb_inval->fence_tdr); trace_xe_tlb_inval_fence_signal(fence->tlb_inval->xe, fence); xe_tlb_inval_fence_fini(fence); dma_fence_signal(&fence->base); @@ -111,6 +114,16 @@ static void tlb_inval_fini(struct drm_device *drm, void *arg) xe_tlb_inval_reset(tlb_inval); } +static void primelockdep(struct xe_tlb_inval *tlb_inval) +{ + if (!IS_ENABLED(CONFIG_LOCKDEP)) + return; + + fs_reclaim_acquire(GFP_KERNEL); + might_lock(&tlb_inval->seqno_lock); + fs_reclaim_release(GFP_KERNEL); +} + /** * xe_gt_tlb_inval_init_early() - Initialize TLB invalidation state * @gt: GT structure @@ -137,6 +150,8 @@ int xe_gt_tlb_inval_init_early(struct xe_gt *gt) if (err) return err; + primelockdep(tlb_inval); + tlb_inval->job_wq = drmm_alloc_ordered_workqueue(&xe->drm, "gt-tbl-inval-job-wq", WQ_MEM_RECLAIM); @@ -453,3 +468,21 @@ void xe_tlb_inval_fence_init(struct xe_tlb_inval *tlb_inval, dma_fence_get(&fence->base); fence->tlb_inval = tlb_inval; } + +/** + * xe_tlb_inval_idle() - Initialize TLB invalidation is idle + * @tlb_inval: TLB invalidation client + * + * Check the TLB invalidation seqno to determine if it is idle (i.e., no TLB + * invalidations are in flight). Expected to be called in the backend after the + * fence has been added to the pending list, and takes this into account. + * + * Return: True if TLB invalidation client is idle, False otherwise + */ +bool xe_tlb_inval_idle(struct xe_tlb_inval *tlb_inval) +{ + lockdep_assert_held(&tlb_inval->seqno_lock); + + guard(spinlock_irq)(&tlb_inval->pending_lock); + return list_is_singular(&tlb_inval->pending_fences); +} diff --git a/drivers/gpu/drm/xe/xe_tlb_inval.h b/drivers/gpu/drm/xe/xe_tlb_inval.h index 858d0690f995..62089254fa23 100644 --- a/drivers/gpu/drm/xe/xe_tlb_inval.h +++ b/drivers/gpu/drm/xe/xe_tlb_inval.h @@ -43,4 +43,6 @@ xe_tlb_inval_fence_wait(struct xe_tlb_inval_fence *fence) void xe_tlb_inval_done_handler(struct xe_tlb_inval *tlb_inval, int seqno); +bool xe_tlb_inval_idle(struct xe_tlb_inval *tlb_inval); + #endif /* _XE_TLB_INVAL_ */ diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c index 2c44aa4b5562..5fd0d5506a7e 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -82,6 +82,9 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, if (place->flags & TTM_PL_FLAG_TOPDOWN) vres->flags |= GPU_BUDDY_TOPDOWN_ALLOCATION; + if (place->flags & TTM_PL_FLAG_CONTIGUOUS) + vres->flags |= GPU_BUDDY_CONTIGUOUS_ALLOCATION; + if (place->fpfn || lpfn != man->size >> PAGE_SHIFT) vres->flags |= GPU_BUDDY_RANGE_ALLOCATION; @@ -111,25 +114,12 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, goto error_unlock; } - if (place->fpfn + (size >> PAGE_SHIFT) != lpfn && - place->flags & TTM_PL_FLAG_CONTIGUOUS) { - size = roundup_pow_of_two(size); - min_page_size = size; - - lpfn = max_t(unsigned long, place->fpfn + (size >> PAGE_SHIFT), lpfn); - } - err = gpu_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT, (u64)lpfn << PAGE_SHIFT, size, min_page_size, &vres->blocks, vres->flags); if (err) goto error_unlock; - if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { - if (!gpu_buddy_block_trim(mm, NULL, vres->base.size, &vres->blocks)) - size = vres->base.size; - } - if (lpfn <= mgr->visible_size >> PAGE_SHIFT) { vres->used_visible_size = size; } else { diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index 5766fa7742d3..f8de6a4bf189 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -10,10 +10,12 @@ #include #include +#include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" #include "xe_gt_types.h" #include "xe_platform_types.h" #include "xe_rtp.h" +#include "xe_sriov.h" #undef XE_REG_MCR #define XE_REG_MCR(...) XE_REG(__VA_ARGS__, .mcr = 1) @@ -31,12 +33,12 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { /* Xe2 */ { XE_RTP_NAME("Tuning: L3 cache"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 3499)), XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f))) }, { XE_RTP_NAME("Tuning: L3 cache - media"), - XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)), + XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, 3499)), XE_RTP_ACTIONS(FIELD_SET(XE2LPM_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f))) }, @@ -52,7 +54,7 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { SET(XE2LPM_CCCHKNREG1, L3CMPCTRL)) }, { XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 3499)), XE_RTP_ACTIONS(SET(L3SQCREG3, COMPPWOVERFETCHEN)) }, { XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3 - media"), @@ -89,6 +91,15 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { XE_RTP_RULES(MEDIA_VERSION(2000)), XE_RTP_ACTIONS(SET(XE2LPM_SCRATCH3_LBCF, RWFLUSHALLEN)) }, + + /* Xe3p */ + + { XE_RTP_NAME("Tuning: Set STLB Bank Hash Mode to 4KB"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3510, XE_RTP_END_VERSION_UNDEFINED), + IS_INTEGRATED), + XE_RTP_ACTIONS(FIELD_SET(XEHP_GAMSTLB_CTRL, BANK_HASH_MODE, + BANK_HASH_4KB_MODE)) + }, }; static const struct xe_rtp_entry_sr engine_tunings[] = { @@ -107,9 +118,20 @@ static const struct xe_rtp_entry_sr engine_tunings[] = { FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(RT_CTRL, DIS_NULL_QUERY)) }, + { XE_RTP_NAME("Tuning: disable HW reporting of ctx switch to GHWSP"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3500, XE_RTP_END_VERSION_UNDEFINED)), + XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), + GHWSP_CSB_REPORT_DIS, + XE_RTP_ACTION_FLAG(ENGINE_BASE))) + }, }; static const struct xe_rtp_entry_sr lrc_tunings[] = { + { XE_RTP_NAME("Tuning: Windower HW Filtering"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3599), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, HW_FILTERING)) + }, + /* DG2 */ { XE_RTP_NAME("Tuning: L3 cache"), @@ -184,7 +206,8 @@ void xe_tuning_process_gt(struct xe_gt *gt) xe_rtp_process_ctx_enable_active_tracking(&ctx, gt->tuning_active.gt, ARRAY_SIZE(gt_tunings)); - xe_rtp_process_to_sr(&ctx, gt_tunings, ARRAY_SIZE(gt_tunings), >->reg_sr); + xe_rtp_process_to_sr(&ctx, gt_tunings, ARRAY_SIZE(gt_tunings), + >->reg_sr, false); } EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_gt); @@ -196,7 +219,7 @@ void xe_tuning_process_engine(struct xe_hw_engine *hwe) hwe->gt->tuning_active.engine, ARRAY_SIZE(engine_tunings)); xe_rtp_process_to_sr(&ctx, engine_tunings, ARRAY_SIZE(engine_tunings), - &hwe->reg_sr); + &hwe->reg_sr, false); } EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_engine); @@ -215,7 +238,8 @@ void xe_tuning_process_lrc(struct xe_hw_engine *hwe) xe_rtp_process_ctx_enable_active_tracking(&ctx, hwe->gt->tuning_active.lrc, ARRAY_SIZE(lrc_tunings)); - xe_rtp_process_to_sr(&ctx, lrc_tunings, ARRAY_SIZE(lrc_tunings), &hwe->reg_lrc); + xe_rtp_process_to_sr(&ctx, lrc_tunings, ARRAY_SIZE(lrc_tunings), + &hwe->reg_lrc, true); } /** diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index 3f63c2a7e86d..d9aa845a308d 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -13,6 +13,7 @@ #include "xe_gt_sriov_vf.h" #include "xe_guc.h" #include "xe_guc_pc.h" +#include "xe_guc_rc.h" #include "xe_guc_engine_activity.h" #include "xe_huc.h" #include "xe_sriov.h" @@ -214,6 +215,10 @@ int xe_uc_load_hw(struct xe_uc *uc) if (ret) goto err_out; + ret = xe_guc_rc_enable(&uc->guc); + if (ret) + goto err_out; + xe_guc_engine_activity_enable_stats(&uc->guc); /* We don't fail the driver load if HuC fails to auth */ @@ -242,11 +247,6 @@ int xe_uc_reset_prepare(struct xe_uc *uc) return xe_guc_reset_prepare(&uc->guc); } -void xe_uc_gucrc_disable(struct xe_uc *uc) -{ - XE_WARN_ON(xe_guc_pc_gucrc_disable(&uc->guc.pc)); -} - void xe_uc_stop_prepare(struct xe_uc *uc) { xe_gsc_stop_prepare(&uc->gsc); diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h index 5398da1a8097..255a54a8f876 100644 --- a/drivers/gpu/drm/xe/xe_uc.h +++ b/drivers/gpu/drm/xe/xe_uc.h @@ -12,7 +12,6 @@ int xe_uc_init_noalloc(struct xe_uc *uc); int xe_uc_init(struct xe_uc *uc); int xe_uc_init_post_hwconfig(struct xe_uc *uc); int xe_uc_load_hw(struct xe_uc *uc); -void xe_uc_gucrc_disable(struct xe_uc *uc); int xe_uc_reset_prepare(struct xe_uc *uc); void xe_uc_runtime_resume(struct xe_uc *uc); void xe_uc_runtime_suspend(struct xe_uc *uc); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index a82e3a4fb389..548b0769b3ef 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1112,7 +1112,7 @@ static void vma_destroy_cb(struct dma_fence *fence, struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb); INIT_WORK(&vma->destroy_work, vma_destroy_work_func); - queue_work(system_unbound_wq, &vma->destroy_work); + queue_work(system_dfl_wq, &vma->destroy_work); } static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) @@ -1474,6 +1474,20 @@ static void xe_vm_pt_destroy(struct xe_vm *vm) } } +static void xe_vm_init_prove_locking(struct xe_device *xe, struct xe_vm *vm) +{ + if (!IS_ENABLED(CONFIG_PROVE_LOCKING)) + return; + + fs_reclaim_acquire(GFP_KERNEL); + might_lock(&vm->exec_queues.lock); + fs_reclaim_release(GFP_KERNEL); + + down_read(&vm->exec_queues.lock); + might_lock(&xe_root_mmio_gt(xe)->uc.guc.ct.lock); + up_read(&vm->exec_queues.lock); +} + struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) { struct drm_gem_object *vm_resv_obj; @@ -1529,11 +1543,16 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) INIT_WORK(&vm->destroy_work, vm_destroy_work_func); INIT_LIST_HEAD(&vm->preempt.exec_queues); + for (id = 0; id < XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE; ++id) + INIT_LIST_HEAD(&vm->exec_queues.list[id]); if (flags & XE_VM_FLAG_FAULT_MODE) vm->preempt.min_run_period_ms = xe->min_run_period_pf_ms; else vm->preempt.min_run_period_ms = xe->min_run_period_lr_ms; + init_rwsem(&vm->exec_queues.lock); + xe_vm_init_prove_locking(xe, vm); + for_each_tile(tile, xe, id) xe_range_fence_tree_init(&vm->rftree[id]); @@ -1638,6 +1657,9 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) if (!vm->pt_root[id]) continue; + if (!xef) /* Not from userspace */ + create_flags |= EXEC_QUEUE_FLAG_KERNEL; + q = xe_exec_queue_create_bind(xe, tile, vm, create_flags, 0); if (IS_ERR(q)) { err = PTR_ERR(q); @@ -1653,7 +1675,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) down_write(&xe->usm.lock); err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, XA_LIMIT(1, XE_MAX_ASID - 1), - &xe->usm.next_asid, GFP_KERNEL); + &xe->usm.next_asid, GFP_NOWAIT); up_write(&xe->usm.lock); if (err < 0) goto err_close; @@ -1875,7 +1897,7 @@ static void xe_vm_free(struct drm_gpuvm *gpuvm) struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm); /* To destroy the VM we need to be able to sleep */ - queue_work(system_unbound_wq, &vm->destroy_work); + queue_work(system_dfl_wq, &vm->destroy_work); } struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) @@ -1919,7 +1941,8 @@ find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs) #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \ DRM_XE_VM_CREATE_FLAG_LR_MODE | \ - DRM_XE_VM_CREATE_FLAG_FAULT_MODE) + DRM_XE_VM_CREATE_FLAG_FAULT_MODE | \ + DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT) int xe_vm_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file) @@ -1958,12 +1981,18 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) return -EINVAL; + if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) && + args->flags & DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT)) + return -EINVAL; + if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE) flags |= XE_VM_FLAG_SCRATCH_PAGE; if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) flags |= XE_VM_FLAG_LR_MODE; if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) flags |= XE_VM_FLAG_FAULT_MODE; + if (args->flags & DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT) + flags |= XE_VM_FLAG_NO_VM_OVERCOMMIT; vm = xe_vm_create(xe, flags, xef); if (IS_ERR(vm)) @@ -2884,7 +2913,7 @@ static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, err = drm_exec_lock_obj(exec, &bo->ttm.base); if (!err && validate) err = xe_bo_validate(bo, vm, - !xe_vm_in_preempt_fence_mode(vm) && + xe_vm_allow_vm_eviction(vm) && res_evict, exec); } @@ -4571,3 +4600,52 @@ int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t start, uint64_t r return xe_vm_alloc_vma(vm, &map_req, false); } +/** + * xe_vm_add_exec_queue() - Add exec queue to VM + * @vm: The VM. + * @q: The exec_queue + * + * Add exec queue to VM, skipped if the device does not have context based TLB + * invalidations. + */ +void xe_vm_add_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) +{ + struct xe_device *xe = vm->xe; + + /* User VMs and queues only */ + xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_KERNEL)); + xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); + xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_VM)); + xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_MIGRATE)); + xe_assert(xe, vm->xef); + xe_assert(xe, vm == q->vm); + + if (!xe->info.has_ctx_tlb_inval) + return; + + down_write(&vm->exec_queues.lock); + list_add(&q->vm_exec_queue_link, &vm->exec_queues.list[q->gt->info.id]); + ++vm->exec_queues.count[q->gt->info.id]; + up_write(&vm->exec_queues.lock); +} + +/** + * xe_vm_remove_exec_queue() - Remove exec queue from VM + * @vm: The VM. + * @q: The exec_queue + * + * Remove exec queue from VM, skipped if the device does not have context based + * TLB invalidations. + */ +void xe_vm_remove_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) +{ + if (!vm->xe->info.has_ctx_tlb_inval) + return; + + down_write(&vm->exec_queues.lock); + if (!list_empty(&q->vm_exec_queue_link)) { + list_del(&q->vm_exec_queue_link); + --vm->exec_queues.count[q->gt->info.id]; + } + up_write(&vm->exec_queues.lock); +} diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 6cc98df47291..f849e369432b 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -220,6 +220,13 @@ static inline bool xe_vm_in_preempt_fence_mode(struct xe_vm *vm) return xe_vm_in_lr_mode(vm) && !xe_vm_in_fault_mode(vm); } +static inline bool xe_vm_allow_vm_eviction(struct xe_vm *vm) +{ + return !xe_vm_in_lr_mode(vm) || + (xe_vm_in_fault_mode(vm) && + !(vm->flags & XE_VM_FLAG_NO_VM_OVERCOMMIT)); +} + int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q); void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q); @@ -287,6 +294,9 @@ static inline struct dma_resv *xe_vm_resv(struct xe_vm *vm) void xe_vm_kill(struct xe_vm *vm, bool unlocked); +void xe_vm_add_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q); +void xe_vm_remove_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q); + /** * xe_vm_assert_held(vm) - Assert that the vm's reservation object is held. * @vm: The vm diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 437f64202f3b..1f6f7e30e751 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -232,6 +232,7 @@ struct xe_vm { #define XE_VM_FLAG_TILE_ID(flags) FIELD_GET(GENMASK(7, 6), flags) #define XE_VM_FLAG_SET_TILE_ID(tile) FIELD_PREP(GENMASK(7, 6), (tile)->id) #define XE_VM_FLAG_GSC BIT(8) +#define XE_VM_FLAG_NO_VM_OVERCOMMIT BIT(9) unsigned long flags; /** @@ -298,6 +299,22 @@ struct xe_vm { struct list_head pm_activate_link; } preempt; + /** @exec_queues: Manages list of exec queues attached to this VM, protected by lock. */ + struct { + /** + * @exec_queues.list: list of exec queues attached to this VM, + * per GT + */ + struct list_head list[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; + /** + * @exec_queues.count: count of exec queues attached to this VM, + * per GT + */ + int count[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; + /** @exec_queues.lock: lock to protect exec_queues list */ + struct rw_semaphore lock; + } exec_queues; + /** @um: unified memory state */ struct { /** @asid: address space ID, unique to each VM */ diff --git a/drivers/gpu/drm/xe/xe_vram_freq.c b/drivers/gpu/drm/xe/xe_vram_freq.c index 6f8281e0b96a..8717367ccd4c 100644 --- a/drivers/gpu/drm/xe/xe_vram_freq.c +++ b/drivers/gpu/drm/xe/xe_vram_freq.c @@ -5,9 +5,9 @@ #include #include +#include "xe_device_types.h" #include "xe_pcode.h" #include "xe_pcode_api.h" -#include "xe_tile.h" #include "xe_tile_sysfs.h" #include "xe_vram_freq.h" diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index c7b1bd79ab17..289acac2c3c8 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -111,6 +111,17 @@ * difference of how they are maintained in the code. In xe it uses the * xe_rtp infrastructure so the workarounds can be kept in tables, following * a more declarative approach rather than procedural. + * + * .. note:: + * When a workaround applies to every single known IP version in a range, + * the preferred handling is to use a single range-based RTP entry rather + * than individual entries for each version, even if some of the intermediate + * version numbers are currently unused. If a new intermediate IP version + * appears in the future and is enabled in the driver, any existing + * range-based entries that contain the new version number will need to be + * analyzed to determine whether their workarounds should apply to the new + * version, or whether any existing range based entries needs to be split + * into two entries that do not include the new intermediate version. */ #undef XE_REG_MCR @@ -120,6 +131,8 @@ __diag_push(); __diag_ignore_all("-Woverride-init", "Allow field overrides in table"); static const struct xe_rtp_entry_sr gt_was[] = { + /* Workarounds applying over a range of IPs */ + { XE_RTP_NAME("14011060649"), XE_RTP_RULES(MEDIA_VERSION_RANGE(1200, 1255), ENGINE_CLASS(VIDEO_DECODE), @@ -135,6 +148,22 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1260)), XE_RTP_ACTIONS(CLR(MISCCPCTL, DOP_CLOCK_GATE_RENDER_ENABLE)) }, + { XE_RTP_NAME("16021867713"), + XE_RTP_RULES(MEDIA_VERSION_RANGE(1300, 3002), + ENGINE_CLASS(VIDEO_DECODE)), + XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)), + XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), + }, + { XE_RTP_NAME("14019449301"), + XE_RTP_RULES(MEDIA_VERSION_RANGE(1301, 2000), ENGINE_CLASS(VIDEO_DECODE)), + XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F08(0), CG3DDISHRS_CLKGATE_DIS)), + XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), + }, + { XE_RTP_NAME("16028005424"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3005), OR, + MEDIA_VERSION_RANGE(1301, 3500)), + XE_RTP_ACTIONS(SET(GUC_INTR_CHICKEN, DISABLE_SIGNALING_ENGINES)) + }, /* DG1 */ @@ -191,10 +220,6 @@ static const struct xe_rtp_entry_sr gt_was[] = { /* Xe_LPG */ - { XE_RTP_NAME("14015795083"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271), GRAPHICS_STEP(A0, B0)), - XE_RTP_ACTIONS(CLR(MISCCPCTL, DOP_CLOCK_GATE_RENDER_ENABLE)) - }, { XE_RTP_NAME("14018575942"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1274)), XE_RTP_ACTIONS(SET(COMP_MOD_CTRL, FORCE_MISS_FTLB)) @@ -206,12 +231,6 @@ static const struct xe_rtp_entry_sr gt_was[] = { /* Xe_LPM+ */ - { XE_RTP_NAME("16021867713"), - XE_RTP_RULES(MEDIA_VERSION(1300), - ENGINE_CLASS(VIDEO_DECODE)), - XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)), - XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), - }, { XE_RTP_NAME("22016670082"), XE_RTP_RULES(MEDIA_VERSION(1300)), XE_RTP_ACTIONS(SET(XELPMP_SQCNT1, ENFORCE_RAR)) @@ -225,42 +244,18 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS)), XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), }, - { XE_RTP_NAME("16021867713"), - XE_RTP_RULES(MEDIA_VERSION(2000), - ENGINE_CLASS(VIDEO_DECODE)), - XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)), - XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), - }, - { XE_RTP_NAME("14019449301"), - XE_RTP_RULES(MEDIA_VERSION(2000), ENGINE_CLASS(VIDEO_DECODE)), - XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F08(0), CG3DDISHRS_CLKGATE_DIS)), - XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), - }, /* Xe2_HPG */ { XE_RTP_NAME("16025250150"), XE_RTP_RULES(GRAPHICS_VERSION(2001)), - XE_RTP_ACTIONS(SET(LSN_VC_REG2, - LSN_LNI_WGT(1) | - LSN_LNE_WGT(1) | - LSN_DIM_X_WGT(1) | - LSN_DIM_Y_WGT(1) | - LSN_DIM_Z_WGT(1))) - }, - - /* Xe2_HPM */ - - { XE_RTP_NAME("16021867713"), - XE_RTP_RULES(MEDIA_VERSION(1301), - ENGINE_CLASS(VIDEO_DECODE)), - XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)), - XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), - }, - { XE_RTP_NAME("14019449301"), - XE_RTP_RULES(MEDIA_VERSION(1301), ENGINE_CLASS(VIDEO_DECODE)), - XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F08(0), CG3DDISHRS_CLKGATE_DIS)), - XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), + XE_RTP_ACTIONS(FIELD_SET(LSN_VC_REG2, + LSN_LNI_WGT_MASK | LSN_LNE_WGT_MASK | + LSN_DIM_X_WGT_MASK | LSN_DIM_Y_WGT_MASK | + LSN_DIM_Z_WGT_MASK, + LSN_LNI_WGT(1) | LSN_LNE_WGT(1) | + LSN_DIM_X_WGT(1) | LSN_DIM_Y_WGT(1) | + LSN_DIM_Z_WGT(1))) }, /* Xe3_LPG */ @@ -272,43 +267,48 @@ static const struct xe_rtp_entry_sr gt_was[] = { /* Xe3_LPM */ - { XE_RTP_NAME("16021867713"), - XE_RTP_RULES(MEDIA_VERSION(3000), - ENGINE_CLASS(VIDEO_DECODE)), - XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)), - XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), - }, { XE_RTP_NAME("16021865536"), - XE_RTP_RULES(MEDIA_VERSION(3000), + XE_RTP_RULES(MEDIA_VERSION_RANGE(3000, 3002), ENGINE_CLASS(VIDEO_DECODE)), XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS)), XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), }, - { XE_RTP_NAME("16021865536"), - XE_RTP_RULES(MEDIA_VERSION(3002), - ENGINE_CLASS(VIDEO_DECODE)), - XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS)), - XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), - }, - { XE_RTP_NAME("16021867713"), - XE_RTP_RULES(MEDIA_VERSION(3002), - ENGINE_CLASS(VIDEO_DECODE)), - XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)), - XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), - }, { XE_RTP_NAME("14021486841"), XE_RTP_RULES(MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), ENGINE_CLASS(VIDEO_DECODE)), XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), RAMDFTUNIT_CLKGATE_DIS)), XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), }, + + /* Xe3P_LPG */ + + { XE_RTP_NAME("14025160223"), + XE_RTP_RULES(GRAPHICS_VERSION(3510), GRAPHICS_STEP(A0, B0)), + XE_RTP_ACTIONS(SET(MMIOATSREQLIMIT_GAM_WALK_3D, + DIS_ATS_WRONLY_PG)) + }, + { XE_RTP_NAME("16028780921"), + XE_RTP_RULES(GRAPHICS_VERSION(3510), GRAPHICS_STEP(A0, B0)), + XE_RTP_ACTIONS(SET(CCCHKNREG2, LOCALITYDIS)) + }, + { XE_RTP_NAME("14026144927"), + XE_RTP_RULES(GRAPHICS_VERSION(3510), GRAPHICS_STEP(A0, B0)), + XE_RTP_ACTIONS(SET(L3SQCREG2, L3_SQ_DISABLE_COAMA_2WAY_COH | + L3_SQ_DISABLE_COAMA)) + }, + { XE_RTP_NAME("14025635424"), + XE_RTP_RULES(GRAPHICS_VERSION(3510), GRAPHICS_STEP(A0, B0)), + XE_RTP_ACTIONS(SET(GAMSTLB_CTRL2, STLB_SINGLE_BANK_MODE)) + }, { XE_RTP_NAME("16028005424"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3005)), + XE_RTP_RULES(GRAPHICS_VERSION(3510), GRAPHICS_STEP(A0, B0)), XE_RTP_ACTIONS(SET(GUC_INTR_CHICKEN, DISABLE_SIGNALING_ENGINES)) }, }; static const struct xe_rtp_entry_sr engine_was[] = { + /* Workarounds applying over a range of IPs */ + { XE_RTP_NAME("22010931296, 18011464164, 14010919138"), XE_RTP_RULES(GRAPHICS_VERSION(1200), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(FF_THREAD_MODE(RENDER_RING_BASE), @@ -344,6 +344,54 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN1(RENDER_RING_BASE), FFSC_PERCTX_PREEMPT_CTRL)) }, + { XE_RTP_NAME("18032247524"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, SEQUENTIAL_ACCESS_UPGRADE_DISABLE)) + }, + { XE_RTP_NAME("16018712365"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, XE2_ALLOC_DPA_STARVE_FIX_DIS)) + }, + { XE_RTP_NAME("14020338487"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(ROW_CHICKEN3, XE2_EUPEND_CHK_FLUSH_DIS)) + }, + { XE_RTP_NAME("14018471104"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, ENABLE_SMP_LD_RENDER_SURFACE_CONTROL)) + }, + /* + * Although this workaround isn't required for the RCS, disabling these + * reports has no impact for our driver or the GuC, so we go ahead and + * apply this to all engines for simplicity. + */ + { XE_RTP_NAME("16021639441"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), OR, + MEDIA_VERSION_RANGE(1301, 2000)), + XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), + GHWSP_CSB_REPORT_DIS | + PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS, + XE_RTP_ACTION_FLAG(ENGINE_BASE))) + }, + { XE_RTP_NAME("14021402888"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 3005), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) + }, + { XE_RTP_NAME("13012615864"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 3005), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, RES_CHK_SPR_DIS)) + }, + { XE_RTP_NAME("18041344222"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 3000), + FUNC(xe_rtp_match_first_render_or_compute), + FUNC(xe_rtp_match_gt_has_discontiguous_dss_groups)), + XE_RTP_ACTIONS(SET(TDL_CHICKEN, EUSTALL_PERF_SAMPLING_DISABLE)) + }, /* TGL */ @@ -459,11 +507,6 @@ static const struct xe_rtp_entry_sr engine_was[] = { ENGINE_CLASS(COMPUTE)), XE_RTP_ACTIONS(SET(RING_HWSTAM(RENDER_RING_BASE), ~0)) }, - { XE_RTP_NAME("14014999345"), - XE_RTP_RULES(PLATFORM(PVC), ENGINE_CLASS(COMPUTE), - GRAPHICS_STEP(B0, C0)), - XE_RTP_ACTIONS(SET(CACHE_MODE_SS, DISABLE_ECC)) - }, /* Xe_LPG */ @@ -486,149 +529,36 @@ static const struct xe_rtp_entry_sr engine_was[] = { /* Xe2_LPG */ - { XE_RTP_NAME("18032247524"), - XE_RTP_RULES(GRAPHICS_VERSION(2004), - FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, SEQUENTIAL_ACCESS_UPGRADE_DISABLE)) - }, - { XE_RTP_NAME("16018712365"), - XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, XE2_ALLOC_DPA_STARVE_FIX_DIS)) - }, - { XE_RTP_NAME("14020338487"), - XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(ROW_CHICKEN3, XE2_EUPEND_CHK_FLUSH_DIS)) - }, { XE_RTP_NAME("18034896535, 16021540221"), /* 16021540221: GRAPHICS_STEP(A0, B0) */ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH)) }, - { XE_RTP_NAME("14018471104"), - XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, ENABLE_SMP_LD_RENDER_SURFACE_CONTROL)) - }, - /* - * These two workarounds are the same, just applying to different - * engines. Although Wa_18032095049 (for the RCS) isn't required on - * all steppings, disabling these reports has no impact for our - * driver or the GuC, so we go ahead and treat it the same as - * Wa_16021639441 which does apply to all steppings. - */ - { XE_RTP_NAME("18032095049, 16021639441"), - XE_RTP_RULES(GRAPHICS_VERSION(2004)), - XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), - GHWSP_CSB_REPORT_DIS | - PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS, - XE_RTP_ACTION_FLAG(ENGINE_BASE))) - }, { XE_RTP_NAME("16018610683"), XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, SLM_WMTP_RESTORE)) }, - { XE_RTP_NAME("14021402888"), - XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) - }, - { XE_RTP_NAME("13012615864"), - XE_RTP_RULES(GRAPHICS_VERSION(2004), - FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, RES_CHK_SPR_DIS)) - }, /* Xe2_HPG */ - { XE_RTP_NAME("16018712365"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), - FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, XE2_ALLOC_DPA_STARVE_FIX_DIS)) - }, { XE_RTP_NAME("16018737384"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2999), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(ROW_CHICKEN, EARLY_EOT_DIS)) }, - { XE_RTP_NAME("14020338487"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), - FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(ROW_CHICKEN3, XE2_EUPEND_CHK_FLUSH_DIS)) - }, - { XE_RTP_NAME("18032247524"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), - FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, SEQUENTIAL_ACCESS_UPGRADE_DISABLE)) - }, - { XE_RTP_NAME("14018471104"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), - FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, ENABLE_SMP_LD_RENDER_SURFACE_CONTROL)) - }, - /* - * Although this workaround isn't required for the RCS, disabling these - * reports has no impact for our driver or the GuC, so we go ahead and - * apply this to all engines for simplicity. - */ - { XE_RTP_NAME("16021639441"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002)), - XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), - GHWSP_CSB_REPORT_DIS | - PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS, - XE_RTP_ACTION_FLAG(ENGINE_BASE))) - }, { XE_RTP_NAME("14019811474"), XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, WR_REQ_CHAINING_DIS)) }, - { XE_RTP_NAME("14021402888"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) - }, { XE_RTP_NAME("14021821874, 14022954250"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, STK_ID_RESTRICT)) }, - { XE_RTP_NAME("13012615864"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), - FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, RES_CHK_SPR_DIS)) - }, - { XE_RTP_NAME("18041344222"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), - FUNC(xe_rtp_match_first_render_or_compute), - FUNC(xe_rtp_match_not_sriov_vf), - FUNC(xe_rtp_match_gt_has_discontiguous_dss_groups)), - XE_RTP_ACTIONS(SET(TDL_CHICKEN, EUSTALL_PERF_SAMPLING_DISABLE)) - }, - - /* Xe2_LPM */ - - { XE_RTP_NAME("16021639441"), - XE_RTP_RULES(MEDIA_VERSION(2000)), - XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), - GHWSP_CSB_REPORT_DIS | - PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS, - XE_RTP_ACTION_FLAG(ENGINE_BASE))) - }, - - /* Xe2_HPM */ - - { XE_RTP_NAME("16021639441"), - XE_RTP_RULES(MEDIA_VERSION(1301)), - XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), - GHWSP_CSB_REPORT_DIS | - PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS, - XE_RTP_ACTION_FLAG(ENGINE_BASE))) - }, /* Xe3_LPG */ - { XE_RTP_NAME("14021402888"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3001), - FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) - }, { XE_RTP_NAME("18034896535"), XE_RTP_RULES(GRAPHICS_VERSION(3000), GRAPHICS_STEP(A0, B0), FUNC(xe_rtp_match_first_render_or_compute)), @@ -641,34 +571,33 @@ static const struct xe_rtp_entry_sr engine_was[] = { SMP_FORCE_128B_OVERFETCH)) }, { XE_RTP_NAME("14023061436"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3001), - FUNC(xe_rtp_match_first_render_or_compute), OR, - GRAPHICS_VERSION_RANGE(3003, 3005), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3005), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(TDL_CHICKEN, QID_WAIT_FOR_THREAD_NOT_RUN_DISABLE)) }, - { XE_RTP_NAME("13012615864"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3001), OR, - GRAPHICS_VERSION_RANGE(3003, 3005), - FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, RES_CHK_SPR_DIS)) - }, { XE_RTP_NAME("16023105232"), XE_RTP_RULES(MEDIA_VERSION_RANGE(1301, 3000), OR, GRAPHICS_VERSION_RANGE(2001, 3001)), XE_RTP_ACTIONS(SET(RING_PSMI_CTL(0), RC_SEMA_IDLE_MSG_DISABLE, XE_RTP_ACTION_FLAG(ENGINE_BASE))) }, - { XE_RTP_NAME("14021402888"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3003, 3005), FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) + + /* Xe3p_LPG*/ + + { XE_RTP_NAME("22021149932"), + XE_RTP_RULES(GRAPHICS_VERSION(3510), GRAPHICS_STEP(A0, B0), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, SAMPLER_LD_LSC_DISABLE)) }, - { XE_RTP_NAME("18041344222"), - XE_RTP_RULES(GRAPHICS_VERSION(3000), - FUNC(xe_rtp_match_first_render_or_compute), - FUNC(xe_rtp_match_not_sriov_vf), - FUNC(xe_rtp_match_gt_has_discontiguous_dss_groups)), - XE_RTP_ACTIONS(SET(TDL_CHICKEN, EUSTALL_PERF_SAMPLING_DISABLE)) + { XE_RTP_NAME("14025676848"), + XE_RTP_RULES(GRAPHICS_VERSION(3510), GRAPHICS_STEP(A0, B0), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, LSCFE_SAME_ADDRESS_ATOMICS_COALESCING_DISABLE)) + }, + { XE_RTP_NAME("16028951944"), + XE_RTP_RULES(GRAPHICS_VERSION(3510), GRAPHICS_STEP(A0, B0), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(ROW_CHICKEN5, CPSS_AWARE_DIS)) }, }; @@ -706,6 +635,26 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(1200)), XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, DISABLE_TDC_LOAD_BALANCING_CALC)) }, + { XE_RTP_NAME("14019877138"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1255, 2004), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT)) + }, + { XE_RTP_NAME("14019386621"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(VF_SCRATCHPAD, XE2_VFG_TED_CREDIT_INTERFACE_DISABLE)) + }, + { XE_RTP_NAME("14019988906"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FLSH_IGNORES_PSD)) + }, + { XE_RTP_NAME("18033852989"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN1, DISABLE_BOTTOM_CLIP_RECTANGLE_TEST)) + }, + { XE_RTP_NAME("15016589081"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX)) + }, /* DG1 */ @@ -742,10 +691,6 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(PLATFORM(DG2)), XE_RTP_ACTIONS(SET(CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE)) }, - { XE_RTP_NAME("14019877138"), - XE_RTP_RULES(PLATFORM(DG2)), - XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT)) - }, /* PVC */ @@ -763,29 +708,9 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1274)), XE_RTP_ACTIONS(SET(CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE)) }, - { XE_RTP_NAME("14019877138"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1274), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT)) - }, /* Xe2_LPG */ - { XE_RTP_NAME("14019386621"), - XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(VF_SCRATCHPAD, XE2_VFG_TED_CREDIT_INTERFACE_DISABLE)) - }, - { XE_RTP_NAME("14019877138"), - XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT)) - }, - { XE_RTP_NAME("14019988906"), - XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FLSH_IGNORES_PSD)) - }, - { XE_RTP_NAME("18033852989"), - XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN1, DISABLE_BOTTOM_CLIP_RECTANGLE_TEST)) - }, { XE_RTP_NAME("14021567978"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED), ENGINE_CLASS(RENDER)), @@ -805,20 +730,9 @@ static const struct xe_rtp_entry_sr lrc_was[] = { DIS_PARTIAL_AUTOSTRIP | DIS_AUTOSTRIP)) }, - { XE_RTP_NAME("15016589081"), - XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX)) - }, /* Xe2_HPG */ - { XE_RTP_NAME("15010599737"), - XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN)) - }, - { XE_RTP_NAME("14019386621"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(VF_SCRATCHPAD, XE2_VFG_TED_CREDIT_INTERFACE_DISABLE)) - }, + { XE_RTP_NAME("14020756599"), XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(WM_CHICKEN3, HIZ_PLANE_COMPRESSION_DIS)) @@ -840,18 +754,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = { DIS_PARTIAL_AUTOSTRIP | DIS_AUTOSTRIP)) }, - { XE_RTP_NAME("15016589081"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX)) - }, { XE_RTP_NAME("22021007897"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE)) }, - { XE_RTP_NAME("18033852989"), - XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN1, DISABLE_BOTTOM_CLIP_RECTANGLE_TEST)) - }, /* Xe3_LPG */ { XE_RTP_NAME("14021490052"), @@ -877,6 +783,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = { ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX)) }, + { XE_RTP_NAME("14026781792"), + XE_RTP_RULES(GRAPHICS_VERSION(3510), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(FF_MODE, DIS_TE_PATCH_CTRL)) + }, }; static __maybe_unused const struct xe_rtp_entry oob_was[] = { @@ -943,7 +853,8 @@ void xe_wa_process_gt(struct xe_gt *gt) xe_rtp_process_ctx_enable_active_tracking(&ctx, gt->wa_active.gt, ARRAY_SIZE(gt_was)); - xe_rtp_process_to_sr(&ctx, gt_was, ARRAY_SIZE(gt_was), >->reg_sr); + xe_rtp_process_to_sr(&ctx, gt_was, ARRAY_SIZE(gt_was), + >->reg_sr, false); } EXPORT_SYMBOL_IF_KUNIT(xe_wa_process_gt); @@ -961,7 +872,8 @@ void xe_wa_process_engine(struct xe_hw_engine *hwe) xe_rtp_process_ctx_enable_active_tracking(&ctx, hwe->gt->wa_active.engine, ARRAY_SIZE(engine_was)); - xe_rtp_process_to_sr(&ctx, engine_was, ARRAY_SIZE(engine_was), &hwe->reg_sr); + xe_rtp_process_to_sr(&ctx, engine_was, ARRAY_SIZE(engine_was), + &hwe->reg_sr, false); } /** @@ -978,7 +890,8 @@ void xe_wa_process_lrc(struct xe_hw_engine *hwe) xe_rtp_process_ctx_enable_active_tracking(&ctx, hwe->gt->wa_active.lrc, ARRAY_SIZE(lrc_was)); - xe_rtp_process_to_sr(&ctx, lrc_was, ARRAY_SIZE(lrc_was), &hwe->reg_lrc); + xe_rtp_process_to_sr(&ctx, lrc_was, ARRAY_SIZE(lrc_was), + &hwe->reg_lrc, true); } /** diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 5cd7fa6d2a5c..80b54b195f20 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -2,14 +2,12 @@ 16010904313 GRAPHICS_VERSION_RANGE(1200, 1210) 18022495364 GRAPHICS_VERSION_RANGE(1200, 1210) 22012773006 GRAPHICS_VERSION_RANGE(1200, 1250) -14014475959 GRAPHICS_VERSION_RANGE(1270, 1271), GRAPHICS_STEP(A0, B0) - PLATFORM(DG2) +14014475959 PLATFORM(DG2) 22011391025 PLATFORM(DG2) 22012727170 SUBPLATFORM(DG2, G11) 22012727685 SUBPLATFORM(DG2, G11) 22016596838 PLATFORM(PVC) 18020744125 PLATFORM(PVC) -1509372804 PLATFORM(PVC), GRAPHICS_STEP(A0, C0) 1409600907 GRAPHICS_VERSION_RANGE(1200, 1250) 22014953428 SUBPLATFORM(DG2, G10) SUBPLATFORM(DG2, G12) @@ -26,18 +24,9 @@ MEDIA_VERSION(2000) 16022287689 GRAPHICS_VERSION(2001) GRAPHICS_VERSION(2004) -13011645652 GRAPHICS_VERSION(2004) - GRAPHICS_VERSION_RANGE(3000, 3001) - GRAPHICS_VERSION(3003) - GRAPHICS_VERSION_RANGE(3004, 3005) -14022293748 GRAPHICS_VERSION_RANGE(2001, 2002) - GRAPHICS_VERSION(2004) - GRAPHICS_VERSION_RANGE(3000, 3005) -22019794406 GRAPHICS_VERSION_RANGE(2001, 2002) - GRAPHICS_VERSION(2004) - GRAPHICS_VERSION_RANGE(3000, 3001) - GRAPHICS_VERSION(3003) - GRAPHICS_VERSION_RANGE(3004, 3005) +13011645652 GRAPHICS_VERSION_RANGE(2004, 3005) +14022293748 GRAPHICS_VERSION_RANGE(2001, 3005) +22019794406 GRAPHICS_VERSION_RANGE(2001, 3005) 22019338487 MEDIA_VERSION(2000) GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_not_sriov_vf) MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), FUNC(xe_rtp_match_not_sriov_vf) @@ -54,22 +43,18 @@ 18013179988 GRAPHICS_VERSION(1255) GRAPHICS_VERSION_RANGE(1270, 1274) 1508761755 GRAPHICS_VERSION(1255) - GRAPHICS_VERSION(1260), GRAPHICS_STEP(A0, B0) -16023105232 GRAPHICS_VERSION_RANGE(2001, 3001) - MEDIA_VERSION_RANGE(1301, 3000) - MEDIA_VERSION(3002) - GRAPHICS_VERSION_RANGE(3003, 3005) -16026508708 GRAPHICS_VERSION_RANGE(1200, 3001) - MEDIA_VERSION_RANGE(1300, 3000) - MEDIA_VERSION(3002) - GRAPHICS_VERSION_RANGE(3003, 3005) +16023105232 GRAPHICS_VERSION_RANGE(2001, 3005) + MEDIA_VERSION_RANGE(1301, 3002) +16026508708 GRAPHICS_VERSION_RANGE(1200, 3005) + MEDIA_VERSION_RANGE(1300, 3002) 14020001231 GRAPHICS_VERSION_RANGE(2001,2004), FUNC(xe_rtp_match_psmi_enabled) - MEDIA_VERSION(2000), FUNC(xe_rtp_match_psmi_enabled) - MEDIA_VERSION(3000), FUNC(xe_rtp_match_psmi_enabled) - MEDIA_VERSION(3002), FUNC(xe_rtp_match_psmi_enabled) + MEDIA_VERSION_RANGE(2000, 3002), FUNC(xe_rtp_match_psmi_enabled) 16023683509 MEDIA_VERSION(2000), FUNC(xe_rtp_match_psmi_enabled) MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), FUNC(xe_rtp_match_psmi_enabled) 15015404425_disable PLATFORM(PANTHERLAKE), MEDIA_STEP(B0, FOREVER) 16026007364 MEDIA_VERSION(3000) 14020316580 MEDIA_VERSION(1301) + +14025883347 MEDIA_VERSION_RANGE(1301, 3503) + GRAPHICS_VERSION_RANGE(2004, 3005) diff --git a/drivers/gpu/drm/xe/xe_wopcm.c b/drivers/gpu/drm/xe/xe_wopcm.c index dde4f4967ca3..900daf1d1b1b 100644 --- a/drivers/gpu/drm/xe/xe_wopcm.c +++ b/drivers/gpu/drm/xe/xe_wopcm.c @@ -55,8 +55,6 @@ #define MTL_WOPCM_SIZE SZ_4M #define WOPCM_SIZE SZ_2M -#define MAX_WOPCM_SIZE SZ_8M - /* 16KB WOPCM (RSVD WOPCM) is reserved from HuC firmware top. */ #define WOPCM_RESERVED_SIZE SZ_16K @@ -186,6 +184,14 @@ u32 xe_wopcm_size(struct xe_device *xe) WOPCM_SIZE; } +static u32 max_wopcm_size(struct xe_device *xe) +{ + if (xe->info.platform == XE_NOVALAKE_P) + return SZ_16M; + else + return SZ_8M; +} + /** * xe_wopcm_init() - Initialize the WOPCM structure. * @wopcm: pointer to xe_wopcm. @@ -227,8 +233,11 @@ int xe_wopcm_init(struct xe_wopcm *wopcm) * When the GuC wopcm base and size are preprogrammed by * BIOS/IFWI, check against the max allowed wopcm size to * validate if the programmed values align to the wopcm layout. + * + * FIXME: This is giving the maximum overall WOPCM size and not + * the size relative to each GT. */ - wopcm->size = MAX_WOPCM_SIZE; + wopcm->size = max_wopcm_size(xe); goto check; } diff --git a/include/drm/drm_suballoc.h b/include/drm/drm_suballoc.h index 7ba72a81a808..29befdda35d2 100644 --- a/include/drm/drm_suballoc.h +++ b/include/drm/drm_suballoc.h @@ -53,6 +53,12 @@ void drm_suballoc_manager_init(struct drm_suballoc_manager *sa_manager, void drm_suballoc_manager_fini(struct drm_suballoc_manager *sa_manager); +struct drm_suballoc *drm_suballoc_alloc(gfp_t gfp); + +int drm_suballoc_insert(struct drm_suballoc_manager *sa_manager, + struct drm_suballoc *sa, size_t size, bool intr, + size_t align); + struct drm_suballoc * drm_suballoc_new(struct drm_suballoc_manager *sa_manager, size_t size, gfp_t gfp, bool intr, size_t align); diff --git a/include/drm/intel/pciids.h b/include/drm/intel/pciids.h index 52520e684ab1..33b91cb2e684 100644 --- a/include/drm/intel/pciids.h +++ b/include/drm/intel/pciids.h @@ -900,4 +900,16 @@ #define INTEL_CRI_IDS(MACRO__, ...) \ MACRO__(0x674C, ## __VA_ARGS__) +/* NVL-P */ +#define INTEL_NVLP_IDS(MACRO__, ...) \ + MACRO__(0xD750, ## __VA_ARGS__), \ + MACRO__(0xD751, ## __VA_ARGS__), \ + MACRO__(0xD752, ## __VA_ARGS__), \ + MACRO__(0xD753, ## __VA_ARGS__), \ + MACRO__(0XD754, ## __VA_ARGS__), \ + MACRO__(0XD755, ## __VA_ARGS__), \ + MACRO__(0XD756, ## __VA_ARGS__), \ + MACRO__(0XD757, ## __VA_ARGS__), \ + MACRO__(0xD75F, ## __VA_ARGS__) + #endif /* __PCIIDS_H__ */ diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 26ca00c325d9..d5af2b7f577b 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -65,7 +65,7 @@ bool isolate_folio_to_list(struct folio *folio, struct list_head *list); int migrate_huge_page_move_mapping(struct address_space *mapping, struct folio *dst, struct folio *src); -void migration_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl) +void softleaf_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl) __releases(ptl); void folio_migrate_flags(struct folio *newfolio, struct folio *folio); int folio_migrate_mapping(struct address_space *mapping, @@ -97,6 +97,14 @@ static inline int set_movable_ops(const struct movable_operations *ops, enum pag return -ENOSYS; } +static inline void softleaf_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl) + __releases(ptl) +{ + WARN_ON_ONCE(1); + + spin_unlock(ptl); +} + #endif /* CONFIG_MIGRATION */ #ifdef CONFIG_NUMA_BALANCING diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 077e66a682e2..ef2565048bdf 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -335,10 +335,6 @@ struct drm_xe_mem_region { __u64 total_size; /** * @used: Estimate of the memory used in bytes for this region. - * - * Requires CAP_PERFMON or CAP_SYS_ADMIN to get reliable - * accounting. Without this the value here will always equal - * zero. */ __u64 used; /** @@ -363,9 +359,7 @@ struct drm_xe_mem_region { * @cpu_visible_used: Estimate of CPU visible memory used, in * bytes. * - * Requires CAP_PERFMON or CAP_SYS_ADMIN to get reliable - * accounting. Without this the value here will always equal - * zero. Note this is only currently tracked for + * Note this is only currently tracked for * DRM_XE_MEM_REGION_CLASS_VRAM regions (for other types the value * here will always be zero). */ @@ -975,6 +969,11 @@ struct drm_xe_gem_mmap_offset { * demand when accessed, and also allows per-VM overcommit of memory. * The xe driver internally uses recoverable pagefaults to implement * this. + * - %DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT - Requires also + * DRM_XE_VM_CREATE_FLAG_FAULT_MODE. This disallows per-VM overcommit + * but only during a &DRM_IOCTL_XE_VM_BIND operation with the + * %DRM_XE_VM_BIND_FLAG_IMMEDIATE flag set. This may be useful for + * user-space naively probing the amount of available memory. */ struct drm_xe_vm_create { /** @extensions: Pointer to the first extension struct, if any */ @@ -983,6 +982,7 @@ struct drm_xe_vm_create { #define DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE (1 << 0) #define DRM_XE_VM_CREATE_FLAG_LR_MODE (1 << 1) #define DRM_XE_VM_CREATE_FLAG_FAULT_MODE (1 << 2) +#define DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT (1 << 3) /** @flags: Flags */ __u32 flags; diff --git a/mm/filemap.c b/mm/filemap.c index 6cd7974d4ada..406cef06b684 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1379,14 +1379,16 @@ repeat: #ifdef CONFIG_MIGRATION /** - * migration_entry_wait_on_locked - Wait for a migration entry to be removed - * @entry: migration swap entry. + * softleaf_entry_wait_on_locked - Wait for a migration entry or + * device_private entry to be removed. + * @entry: migration or device_private swap entry. * @ptl: already locked ptl. This function will drop the lock. * - * Wait for a migration entry referencing the given page to be removed. This is + * Wait for a migration entry referencing the given page, or device_private + * entry referencing a dvice_private page to be unlocked. This is * equivalent to folio_put_wait_locked(folio, TASK_UNINTERRUPTIBLE) except * this can be called without taking a reference on the page. Instead this - * should be called while holding the ptl for the migration entry referencing + * should be called while holding the ptl for @entry referencing * the page. * * Returns after unlocking the ptl. @@ -1394,7 +1396,7 @@ repeat: * This follows the same logic as folio_wait_bit_common() so see the comments * there. */ -void migration_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl) +void softleaf_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl) __releases(ptl) { struct wait_page_queue wait_page; @@ -1428,6 +1430,9 @@ void migration_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl) * If a migration entry exists for the page the migration path must hold * a valid reference to the page, and it must take the ptl to remove the * migration entry. So the page is valid until the ptl is dropped. + * Similarly any path attempting to drop the last reference to a + * device-private page needs to grab the ptl to remove the device-private + * entry. */ spin_unlock(ptl); diff --git a/mm/memory.c b/mm/memory.c index 07778814b4a8..2f815a34d924 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4763,7 +4763,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) unlock_page(vmf->page); put_page(vmf->page); } else { - pte_unmap_unlock(vmf->pte, vmf->ptl); + pte_unmap(vmf->pte); + softleaf_entry_wait_on_locked(entry, vmf->ptl); } } else if (softleaf_is_hwpoison(entry)) { ret = VM_FAULT_HWPOISON; diff --git a/mm/migrate.c b/mm/migrate.c index 1bf2cf8c44dd..2c3d489ecf51 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -500,7 +500,7 @@ void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, if (!softleaf_is_migration(entry)) goto out; - migration_entry_wait_on_locked(entry, ptl); + softleaf_entry_wait_on_locked(entry, ptl); return; out: spin_unlock(ptl); @@ -532,10 +532,10 @@ void migration_entry_wait_huge(struct vm_area_struct *vma, unsigned long addr, p * If migration entry existed, safe to release vma lock * here because the pgtable page won't be freed without the * pgtable lock released. See comment right above pgtable - * lock release in migration_entry_wait_on_locked(). + * lock release in softleaf_entry_wait_on_locked(). */ hugetlb_vma_unlock_read(vma); - migration_entry_wait_on_locked(entry, ptl); + softleaf_entry_wait_on_locked(entry, ptl); return; } @@ -553,7 +553,7 @@ void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd) ptl = pmd_lock(mm, pmd); if (!pmd_is_migration_entry(*pmd)) goto unlock; - migration_entry_wait_on_locked(softleaf_from_pmd(*pmd), ptl); + softleaf_entry_wait_on_locked(softleaf_from_pmd(*pmd), ptl); return; unlock: spin_unlock(ptl); diff --git a/mm/migrate_device.c b/mm/migrate_device.c index 0a8b31939640..8079676c8f1f 100644 --- a/mm/migrate_device.c +++ b/mm/migrate_device.c @@ -176,7 +176,7 @@ static int migrate_vma_collect_huge_pmd(pmd_t *pmdp, unsigned long start, } if (softleaf_is_migration(entry)) { - migration_entry_wait_on_locked(entry, ptl); + softleaf_entry_wait_on_locked(entry, ptl); spin_unlock(ptl); return -EAGAIN; }