From nobody Sat Feb 7 08:44:59 2026 Received: from rtg-sunil-navi33.amd.com (unknown [165.204.156.251]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 492CA6FBF for ; Thu, 21 Mar 2024 04:38:54 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=165.204.156.251 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1710995937; cv=none; b=HTH7KJvNi6Z3MpVdecslZ/3O750nC84WyiJnJKiz8r92PesklmF31vdexHLOHTzmE+sG6dq+vGdIbUXBIfs7tz9SkoY21ggFzBirFkDbo+QlKljzIX81v0k1gqGnP2uSNtFwZYlicSZFaC166f3xFAuQXlxrZ/wk3RhZsluDvsM= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1710995937; c=relaxed/simple; bh=Nn1uD2K4r5jp5Bga3hSJV8WonZTp2/zoiODnAY4zZAI=; h=From:To:Cc:Subject:Date:Message-Id:MIME-Version; b=PCNI/TWHscR8w+yptsT274fFOVK3jaE32f3VSZOriWUqx5lQDa361Uh8i71BZCKSjjwy7IT4AMr8Qi11SXQbAoO/RKqN1emOiHPAHOYaxxEjI+W48l5++IgyGGhoZL8mLcs8b4sSZ3KOSLbS6sA9qGKIHW4hasmsdk9TY2RSqNM= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=fail (p=quarantine dis=none) header.from=amd.com; spf=none smtp.mailfrom=rtg-sunil-navi33.amd.com; arc=none smtp.client-ip=165.204.156.251 Authentication-Results: smtp.subspace.kernel.org; dmarc=fail (p=quarantine dis=none) header.from=amd.com Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=rtg-sunil-navi33.amd.com Received: from rtg-sunil-navi33.amd.com (localhost [127.0.0.1]) by rtg-sunil-navi33.amd.com (8.15.2/8.15.2/Debian-22ubuntu3) with ESMTP id 42L4ckVb1056537; Thu, 21 Mar 2024 10:08:46 +0530 Received: (from sunil@localhost) by rtg-sunil-navi33.amd.com (8.15.2/8.15.2/Submit) id 42L4ckAn1056536; Thu, 21 Mar 2024 10:08:46 +0530 From: Sunil Khatri To: Alex Deucher , =?UTF-8?q?Christian=20K=C3=B6nig?= , Shashank Sharma Cc: amd-gfx@lists.freedesktop.org, dri-devel@lists.freedesktop.org, linux-kernel@vger.kernel.org, Hawking Zhang , Felix Kuehling , Lijo Lazar , Sunil Khatri , Ivan Lipski Subject: [PATCH v2] drm/amdgpu: refactor code to split devcoredump code Date: Thu, 21 Mar 2024 10:08:43 +0530 Message-Id: <20240321043843.1056505-1-sunil.khatri@amd.com> X-Mailer: git-send-email 2.34.1 Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Refractor devcoredump code into new files since its functionality is expanded further and better to slit and devcoredump to have its own file. v2: Fix the build failure caught by arm compiler of implicit function declaration with #ifdef Cc: Ivan Lipski Signed-off-by: Sunil Khatri --- drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- .../gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c | 218 ++++++++++++++++++ .../gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h | 47 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 + drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 191 --------------- drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h | 16 -- 6 files changed, 270 insertions(+), 208 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdg= pu/Makefile index 535e3936cfe0..1f6b56ec99f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -81,7 +81,7 @@ amdgpu-y +=3D amdgpu_device.o amdgpu_doorbell_mgr.o amdgp= u_kms.o \ amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \ amdgpu_fw_attestation.o amdgpu_securedisplay.o \ amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \ - amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o + amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o amdgpu_dev_cor= edump.o =20 amdgpu-$(CONFIG_PROC_FS) +=3D amdgpu_fdinfo.o =20 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c b/drivers/gpu= /drm/amd/amdgpu/amdgpu_dev_coredump.c new file mode 100644 index 000000000000..f3a0f5857598 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c @@ -0,0 +1,218 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software= "), + * to deal in the Software without restriction, including without limitati= on + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included= in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS= OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include + +#ifndef CONFIG_DEV_COREDUMP +void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, + struct amdgpu_reset_context *reset_context) +{ +} +#else + +#include +#include "amdgpu_dev_coredump.h" + +const char *hw_ip_names[MAX_HWIP] =3D { + [GC_HWIP] =3D "GC", + [HDP_HWIP] =3D "HDP", + [SDMA0_HWIP] =3D "SDMA0", + [SDMA1_HWIP] =3D "SDMA1", + [SDMA2_HWIP] =3D "SDMA2", + [SDMA3_HWIP] =3D "SDMA3", + [SDMA4_HWIP] =3D "SDMA4", + [SDMA5_HWIP] =3D "SDMA5", + [SDMA6_HWIP] =3D "SDMA6", + [SDMA7_HWIP] =3D "SDMA7", + [LSDMA_HWIP] =3D "LSDMA", + [MMHUB_HWIP] =3D "MMHUB", + [ATHUB_HWIP] =3D "ATHUB", + [NBIO_HWIP] =3D "NBIO", + [MP0_HWIP] =3D "MP0", + [MP1_HWIP] =3D "MP1", + [UVD_HWIP] =3D "UVD/JPEG/VCN", + [VCN1_HWIP] =3D "VCN1", + [VCE_HWIP] =3D "VCE", + [VPE_HWIP] =3D "VPE", + [DF_HWIP] =3D "DF", + [DCE_HWIP] =3D "DCE", + [OSSSYS_HWIP] =3D "OSSSYS", + [SMUIO_HWIP] =3D "SMUIO", + [PWR_HWIP] =3D "PWR", + [NBIF_HWIP] =3D "NBIF", + [THM_HWIP] =3D "THM", + [CLK_HWIP] =3D "CLK", + [UMC_HWIP] =3D "UMC", + [RSMU_HWIP] =3D "RSMU", + [XGMI_HWIP] =3D "XGMI", + [DCI_HWIP] =3D "DCI", + [PCIE_HWIP] =3D "PCIE", +}; + +static ssize_t +amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, + void *data, size_t datalen) +{ + struct drm_printer p; + struct amdgpu_coredump_info *coredump =3D data; + struct drm_print_iterator iter; + struct amdgpu_vm_fault_info *fault_info; + int i, ver; + + iter.data =3D buffer; + iter.offset =3D 0; + iter.start =3D offset; + iter.remain =3D count; + + p =3D drm_coredump_printer(&iter); + + drm_printf(&p, "**** AMDGPU Device Coredump ****\n"); + drm_printf(&p, "version: " AMDGPU_COREDUMP_VERSION "\n"); + drm_printf(&p, "kernel: " UTS_RELEASE "\n"); + drm_printf(&p, "module: " KBUILD_MODNAME "\n"); + drm_printf(&p, "time: %lld.%09ld\n", coredump->reset_time.tv_sec, + coredump->reset_time.tv_nsec); + + if (coredump->reset_task_info.pid) + drm_printf(&p, "process_name: %s PID: %d\n", + coredump->reset_task_info.process_name, + coredump->reset_task_info.pid); + + /* GPU IP's information of the SOC */ + drm_printf(&p, "\nIP Information\n"); + drm_printf(&p, "SOC Family: %d\n", coredump->adev->family); + drm_printf(&p, "SOC Revision id: %d\n", coredump->adev->rev_id); + drm_printf(&p, "SOC External Revision id: %d\n", coredump->adev->external= _rev_id); + + for (int i =3D 1; i < MAX_HWIP; i++) { + for (int j =3D 0; j < HWIP_MAX_INSTANCE; j++) { + ver =3D coredump->adev->ip_versions[i][j]; + if (ver) + drm_printf(&p, "HWIP: %s[%d][%d]: v%d.%d.%d.%d.%d\n", + hw_ip_names[i], i, j, + IP_VERSION_MAJ(ver), + IP_VERSION_MIN(ver), + IP_VERSION_REV(ver), + IP_VERSION_VARIANT(ver), + IP_VERSION_SUBREV(ver)); + } + } + + if (coredump->ring) { + drm_printf(&p, "\nRing timed out details\n"); + drm_printf(&p, "IP Type: %d Ring Name: %s\n", + coredump->ring->funcs->type, + coredump->ring->name); + } + + /* Add page fault information */ + fault_info =3D &coredump->adev->vm_manager.fault_info; + drm_printf(&p, "\n[%s] Page fault observed\n", + fault_info->vmhub ? "mmhub" : "gfxhub"); + drm_printf(&p, "Faulty page starting at address: 0x%016llx\n", fault_info= ->addr); + drm_printf(&p, "Protection fault status register: 0x%x\n\n", fault_info->= status); + + /* Add ring buffer information */ + drm_printf(&p, "Ring buffer information\n"); + for (int i =3D 0; i < coredump->adev->num_rings; i++) { + int j =3D 0; + struct amdgpu_ring *ring =3D coredump->adev->rings[i]; + + drm_printf(&p, "ring name: %s\n", ring->name); + drm_printf(&p, "Rptr: 0x%llx Wptr: 0x%llx RB mask: %x\n", + amdgpu_ring_get_rptr(ring), + amdgpu_ring_get_wptr(ring), + ring->buf_mask); + drm_printf(&p, "Ring size in dwords: %d\n", + ring->ring_size / 4); + drm_printf(&p, "Ring contents\n"); + drm_printf(&p, "Offset \t Value\n"); + + while (j < ring->ring_size) { + drm_printf(&p, "0x%x \t 0x%x\n", j, ring->ring[j/4]); + j +=3D 4; + } + } + + if (coredump->reset_vram_lost) + drm_printf(&p, "VRAM is lost due to GPU reset!\n"); + if (coredump->adev->reset_info.num_regs) { + drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n"); + + for (i =3D 0; i < coredump->adev->reset_info.num_regs; i++) + drm_printf(&p, "0x%08x: 0x%08x\n", + coredump->adev->reset_info.reset_dump_reg_list[i], + coredump->adev->reset_info.reset_dump_reg_value[i]); + } + + return count - iter.remain; +} + +static void amdgpu_devcoredump_free(void *data) +{ + kfree(data); +} + +void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, + struct amdgpu_reset_context *reset_context) +{ + struct amdgpu_coredump_info *coredump; + struct drm_device *dev =3D adev_to_drm(adev); + struct amdgpu_job *job =3D reset_context->job; + struct drm_sched_job *s_job; + + coredump =3D kzalloc(sizeof(*coredump), GFP_NOWAIT); + + if (!coredump) { + DRM_ERROR("%s: failed to allocate memory for coredump\n", __func__); + return; + } + + coredump->reset_vram_lost =3D vram_lost; + + if (reset_context->job && reset_context->job->vm) { + struct amdgpu_task_info *ti; + struct amdgpu_vm *vm =3D reset_context->job->vm; + + ti =3D amdgpu_vm_get_task_info_vm(vm); + if (ti) { + coredump->reset_task_info =3D *ti; + amdgpu_vm_put_task_info(ti); + } + } + + if (job) { + s_job =3D &job->base; + coredump->ring =3D to_amdgpu_ring(s_job->sched); + } + + coredump->adev =3D adev; + + ktime_get_ts64(&coredump->reset_time); + + dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT, + amdgpu_devcoredump_read, amdgpu_devcoredump_free); +} +#endif + diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h b/drivers/gpu= /drm/amd/amdgpu/amdgpu_dev_coredump.h new file mode 100644 index 000000000000..52459512cb2b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software= "), + * to deal in the Software without restriction, including without limitati= on + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included= in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS= OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_DEV_COREDUMP_H__ +#define __AMDGPU_DEV_COREDUMP_H__ + +#include "amdgpu.h" +#include "amdgpu_reset.h" + +#ifdef CONFIG_DEV_COREDUMP + +#define AMDGPU_COREDUMP_VERSION "1" + +struct amdgpu_coredump_info { + struct amdgpu_device *adev; + struct amdgpu_task_info reset_task_info; + struct timespec64 reset_time; + bool reset_vram_lost; + struct amdgpu_ring *ring; +}; +#endif + +void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, + struct amdgpu_reset_context *reset_context); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/a= md/amdgpu/amdgpu_device.c index 3204b8f6edeb..95028f57cb56 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -75,6 +75,10 @@ #include "amdgpu_reset.h" #include "amdgpu_virt.h" =20 +#ifdef CONFIG_DEV_COREDUMP +#include "amdgpu_dev_coredump.h" +#endif + #include #include #include diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/am= d/amdgpu/amdgpu_reset.c index 3398f2a368d5..ea4873f6ccd1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c @@ -21,50 +21,11 @@ * */ =20 -#include -#include - #include "amdgpu_reset.h" #include "aldebaran.h" #include "sienna_cichlid.h" #include "smu_v13_0_10.h" =20 -const char *hw_ip_names[MAX_HWIP] =3D { - [GC_HWIP] =3D "GC", - [HDP_HWIP] =3D "HDP", - [SDMA0_HWIP] =3D "SDMA0", - [SDMA1_HWIP] =3D "SDMA1", - [SDMA2_HWIP] =3D "SDMA2", - [SDMA3_HWIP] =3D "SDMA3", - [SDMA4_HWIP] =3D "SDMA4", - [SDMA5_HWIP] =3D "SDMA5", - [SDMA6_HWIP] =3D "SDMA6", - [SDMA7_HWIP] =3D "SDMA7", - [LSDMA_HWIP] =3D "LSDMA", - [MMHUB_HWIP] =3D "MMHUB", - [ATHUB_HWIP] =3D "ATHUB", - [NBIO_HWIP] =3D "NBIO", - [MP0_HWIP] =3D "MP0", - [MP1_HWIP] =3D "MP1", - [UVD_HWIP] =3D "UVD/JPEG/VCN", - [VCN1_HWIP] =3D "VCN1", - [VCE_HWIP] =3D "VCE", - [VPE_HWIP] =3D "VPE", - [DF_HWIP] =3D "DF", - [DCE_HWIP] =3D "DCE", - [OSSSYS_HWIP] =3D "OSSSYS", - [SMUIO_HWIP] =3D "SMUIO", - [PWR_HWIP] =3D "PWR", - [NBIF_HWIP] =3D "NBIF", - [THM_HWIP] =3D "THM", - [CLK_HWIP] =3D "CLK", - [UMC_HWIP] =3D "UMC", - [RSMU_HWIP] =3D "RSMU", - [XGMI_HWIP] =3D "XGMI", - [DCI_HWIP] =3D "DCI", - [PCIE_HWIP] =3D "PCIE", -}; - int amdgpu_reset_init(struct amdgpu_device *adev) { int ret =3D 0; @@ -197,155 +158,3 @@ void amdgpu_device_unlock_reset_domain(struct amdgpu_= reset_domain *reset_domain) atomic_set(&reset_domain->in_gpu_reset, 0); up_write(&reset_domain->sem); } - -#ifndef CONFIG_DEV_COREDUMP -void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, - struct amdgpu_reset_context *reset_context) -{ -} -#else -static ssize_t -amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, - void *data, size_t datalen) -{ - struct drm_printer p; - struct amdgpu_coredump_info *coredump =3D data; - struct drm_print_iterator iter; - struct amdgpu_vm_fault_info *fault_info; - int i, ver; - - iter.data =3D buffer; - iter.offset =3D 0; - iter.start =3D offset; - iter.remain =3D count; - - p =3D drm_coredump_printer(&iter); - - drm_printf(&p, "**** AMDGPU Device Coredump ****\n"); - drm_printf(&p, "version: " AMDGPU_COREDUMP_VERSION "\n"); - drm_printf(&p, "kernel: " UTS_RELEASE "\n"); - drm_printf(&p, "module: " KBUILD_MODNAME "\n"); - drm_printf(&p, "time: %lld.%09ld\n", coredump->reset_time.tv_sec, - coredump->reset_time.tv_nsec); - - if (coredump->reset_task_info.pid) - drm_printf(&p, "process_name: %s PID: %d\n", - coredump->reset_task_info.process_name, - coredump->reset_task_info.pid); - - /* GPU IP's information of the SOC */ - drm_printf(&p, "\nIP Information\n"); - drm_printf(&p, "SOC Family: %d\n", coredump->adev->family); - drm_printf(&p, "SOC Revision id: %d\n", coredump->adev->rev_id); - drm_printf(&p, "SOC External Revision id: %d\n", coredump->adev->external= _rev_id); - - for (int i =3D 1; i < MAX_HWIP; i++) { - for (int j =3D 0; j < HWIP_MAX_INSTANCE; j++) { - ver =3D coredump->adev->ip_versions[i][j]; - if (ver) - drm_printf(&p, "HWIP: %s[%d][%d]: v%d.%d.%d.%d.%d\n", - hw_ip_names[i], i, j, - IP_VERSION_MAJ(ver), - IP_VERSION_MIN(ver), - IP_VERSION_REV(ver), - IP_VERSION_VARIANT(ver), - IP_VERSION_SUBREV(ver)); - } - } - - if (coredump->ring) { - drm_printf(&p, "\nRing timed out details\n"); - drm_printf(&p, "IP Type: %d Ring Name: %s\n", - coredump->ring->funcs->type, - coredump->ring->name); - } - - /* Add page fault information */ - fault_info =3D &coredump->adev->vm_manager.fault_info; - drm_printf(&p, "\n[%s] Page fault observed\n", - fault_info->vmhub ? "mmhub" : "gfxhub"); - drm_printf(&p, "Faulty page starting at address: 0x%016llx\n", fault_info= ->addr); - drm_printf(&p, "Protection fault status register: 0x%x\n\n", fault_info->= status); - - /* Add ring buffer information */ - drm_printf(&p, "Ring buffer information\n"); - for (int i =3D 0; i < coredump->adev->num_rings; i++) { - int j =3D 0; - struct amdgpu_ring *ring =3D coredump->adev->rings[i]; - - drm_printf(&p, "ring name: %s\n", ring->name); - drm_printf(&p, "Rptr: 0x%llx Wptr: 0x%llx RB mask: %x\n", - amdgpu_ring_get_rptr(ring), - amdgpu_ring_get_wptr(ring), - ring->buf_mask); - drm_printf(&p, "Ring size in dwords: %d\n", - ring->ring_size / 4); - drm_printf(&p, "Ring contents\n"); - drm_printf(&p, "Offset \t Value\n"); - - while (j < ring->ring_size) { - drm_printf(&p, "0x%x \t 0x%x\n", j, ring->ring[j/4]); - j +=3D 4; - } - } - - if (coredump->reset_vram_lost) - drm_printf(&p, "VRAM is lost due to GPU reset!\n"); - if (coredump->adev->reset_info.num_regs) { - drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n"); - - for (i =3D 0; i < coredump->adev->reset_info.num_regs; i++) - drm_printf(&p, "0x%08x: 0x%08x\n", - coredump->adev->reset_info.reset_dump_reg_list[i], - coredump->adev->reset_info.reset_dump_reg_value[i]); - } - - return count - iter.remain; -} - -static void amdgpu_devcoredump_free(void *data) -{ - kfree(data); -} - -void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, - struct amdgpu_reset_context *reset_context) -{ - struct amdgpu_coredump_info *coredump; - struct drm_device *dev =3D adev_to_drm(adev); - struct amdgpu_job *job =3D reset_context->job; - struct drm_sched_job *s_job; - - coredump =3D kzalloc(sizeof(*coredump), GFP_NOWAIT); - - if (!coredump) { - DRM_ERROR("%s: failed to allocate memory for coredump\n", __func__); - return; - } - - coredump->reset_vram_lost =3D vram_lost; - - if (reset_context->job && reset_context->job->vm) { - struct amdgpu_task_info *ti; - struct amdgpu_vm *vm =3D reset_context->job->vm; - - ti =3D amdgpu_vm_get_task_info_vm(vm); - if (ti) { - coredump->reset_task_info =3D *ti; - amdgpu_vm_put_task_info(ti); - } - } - - if (job) { - s_job =3D &job->base; - coredump->ring =3D to_amdgpu_ring(s_job->sched); - } - - coredump->adev =3D adev; - - ktime_get_ts64(&coredump->reset_time); - - dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT, - amdgpu_devcoredump_read, amdgpu_devcoredump_free); -} -#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/am= d/amdgpu/amdgpu_reset.h index 60522963aaca..66125d43cf21 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h @@ -88,19 +88,6 @@ struct amdgpu_reset_domain { atomic_t reset_res; }; =20 -#ifdef CONFIG_DEV_COREDUMP - -#define AMDGPU_COREDUMP_VERSION "1" - -struct amdgpu_coredump_info { - struct amdgpu_device *adev; - struct amdgpu_task_info reset_task_info; - struct timespec64 reset_time; - bool reset_vram_lost; - struct amdgpu_ring *ring; -}; -#endif - int amdgpu_reset_init(struct amdgpu_device *adev); int amdgpu_reset_fini(struct amdgpu_device *adev); =20 @@ -141,9 +128,6 @@ void amdgpu_device_lock_reset_domain(struct amdgpu_rese= t_domain *reset_domain); =20 void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_d= omain); =20 -void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, - struct amdgpu_reset_context *reset_context); - #define for_each_handler(i, handler, reset_ctl) \ for (i =3D 0; (i < AMDGPU_RESET_MAX_HANDLERS) && \ (handler =3D (*reset_ctl->reset_handlers)[i]); \ --=20 2.34.1