From nobody Sat Nov 23 13:49:54 2024 Received: from frasgout.his.huawei.com (frasgout.his.huawei.com [185.176.79.56]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 7C2F51A9B54; Wed, 20 Nov 2024 10:00:35 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=185.176.79.56 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1732096837; cv=none; b=QZcNxIKBnIqFeCKG2Q897+D6/OdpVq2jv//QNW/8BBWPQz5K032eChR9NgSHrqwGTSxzZd0PM6jHmNBs7qH5CmO9jQcK54sH6MQ88DHoBxhdEo1w12/6OItzdzjSdEuqWR7qWtL8X9Exz2B4hT6h2NtOFKq8QXj415fUfna6PJA= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1732096837; c=relaxed/simple; bh=8tow5TpsCCDUPIdZt8gavZ1OoctIto5lKBkIKyiDrOA=; h=From:To:CC:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=ekZa4y/SPBI5sN/6qMlE4zzGlLJkSE45dDN3+w0+aL2Qkkb8W6ruGroNgPxA0vDEjC/Jx0qYWeQLUZuQY0qh7GMbv0c8sfGVbwoeq8rb8/X5NCK/qYd35/EWi4OnsDpnlBlGwYhHcMcXDDeyptWK5e47An0aNAfDBu3qwfHPelc= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=quarantine dis=none) header.from=huawei.com; spf=pass smtp.mailfrom=huawei.com; arc=none smtp.client-ip=185.176.79.56 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=quarantine dis=none) header.from=huawei.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=huawei.com Received: from mail.maildlp.com (unknown [172.18.186.216]) by frasgout.his.huawei.com (SkyGuard) with ESMTP id 4XtcHr73f1z6K98N; Wed, 20 Nov 2024 17:58:16 +0800 (CST) Received: from frapeml500007.china.huawei.com (unknown [7.182.85.172]) by mail.maildlp.com (Postfix) with ESMTPS id 88BA0140119; Wed, 20 Nov 2024 18:00:33 +0800 (CST) Received: from P_UKIT01-A7bmah.china.huawei.com (10.195.247.212) by frapeml500007.china.huawei.com (7.182.85.172) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.1.2507.39; Wed, 20 Nov 2024 11:00:32 +0100 From: To: , , , , , , , , , , CC: , , , , Subject: [PATCH 12/13] rasdaemon: ras-mc-ctl: Update logging of CXL DRAM event data to align with CXL spec rev 3.1 Date: Wed, 20 Nov 2024 09:59:22 +0000 Message-ID: <20241120095923.1891-13-shiju.jose@huawei.com> X-Mailer: git-send-email 2.43.0.windows.1 In-Reply-To: <20241120095923.1891-1-shiju.jose@huawei.com> References: <20241120095923.1891-1-shiju.jose@huawei.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-ClientProxiedBy: lhrpeml100001.china.huawei.com (7.191.160.183) To frapeml500007.china.huawei.com (7.182.85.172) Content-Type: text/plain; charset="utf-8" From: Shiju Jose CXL spec 3.1 section 8.2.9.2.1.2 Table 8-46, DRAM Event Record has updated with following new fields and new types for Memory Event Type, Transaction Type and Validity Flags fields. 1. Component Identifier 2. Sub-channel 3. Advanced Programmable Corrected Memory Error Threshold Event Flags 4. Corrected Volatile Memory Error Count at Event 5. Memory Event Sub-Type This update modifies ras-mc-ctl to parse and log CXL DRAM event data stored in the RAS SQLite database table, reflecting the specification changes introduced in revision 3.1. Example output, ./util/ras-mc-ctl --errors ... CXL DRAM events: 1 2024-11-20 00:18:53 +0000 error: memdev=3Dmem0, host=3D0000:0f:00.0, seri= al=3D0x3, \ log=3DInformational, hdr_uuid=3D00000000-0000-0000-0000-000000000000, \ hdr_flags=3D0x1, , hdr_handle=3D0x1, hdr_related_handle=3D0x0, \ hdr_timestamp=3D1970-01-01 00:00:58 +0000, hdr_length=3D128, hdr_maint_op_c= lass=3D1, \ hdr_maint_op_sub_class=3D3, dpa=3D0x18680, dpa_flags: , \ descriptor_flags: 'UNCORRECTABLE EVENT' , 'THRESHOLD EVENT' , \ memory event type: Data Path Error, memory event sub type: Media Link CRC E= rror, \ transaction_type: Internal Media Scrub, channel=3D3, sub_channel=3D0, rank= =3D17, \ nibble_mask=3D3866802, bank_group=3D7, bank=3D11, row=3D2, column=3D77, \ correction_mask:21 00 00 00 00 00 00 00 2c 00 00 00 00 00 00 00 37 00 00 \ 00 00 00 00 00 42 00 00 00 00 00 00 00 hpa=3D0xffffffffffffffff, \ region_uuid=3D00000000-0000-0000-0000-000000000000, \ component_id:01 74 c5 08 9a 1a 0b fc d2 7e 2f 31 9b 3c 81 4d \ pldm_entity_id:74 c5 08 9a 1a 0b pldm_resource_id:00 00 00 00 \ cme_threshold_ev_flags: 'Corrected Memory Errors in Multiple Media Componen= ts' , \ 'Exceeded Programmable Threshold' , cvme_count=3D0x94,=20 ... Signed-off-by: Shiju Jose Reviewed-by: Jonathan Cameron --- util/ras-mc-ctl.in | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in index f8efa91..dbb1607 100755 --- a/util/ras-mc-ctl.in +++ b/util/ras-mc-ctl.in @@ -1402,14 +1402,17 @@ sub get_cxl_der_mem_event_type { my @types; =20 - if ($_[0] < 0 || $_[0] > 3) { + if ($_[0] < 0 || $_[0] > 6) { return "unknown-type"; } =20 @types =3D ("Media ECC Error", "Scrub Media ECC Error", "Invalid Address", - "Data Path Error"); + "Data Path Error", + "TE State Violation", + "Advanced Programmable CME Counter Expiration", + "CKID Violation"); =20 return $types[$_[0]]; } @@ -1819,7 +1822,7 @@ sub errors my ($dpa_flags, $descriptor, $mem_event_type, $mem_event_sub_type, $tr= ansaction_type, $channel, $rank, $device, $comp_id, $pldm_entity_id, $pldm_= res_id); my ($nibble_mask, $bank_group, $row, $column, $cor_mask); my ($event_type, $health_status, $media_status, $life_used, $dirty_shu= tdown_cnt, $cor_vol_err_cnt, $cor_per_err_cnt, $device_temp, $add_status); - my ($cme_threshold_ev_flags, $cme_count); + my ($sub_channel, $cme_threshold_ev_flags, $cme_count, $cvme_count); =20 my $dbh =3D DBI->connect("dbi:SQLite:dbname=3D$dbname", "", "", {}); =20 @@ -2089,10 +2092,10 @@ sub errors =20 # CXL DRAM errors use constant CXL_EVENT_DER_CORRECTION_MASK_SIZE =3D> 0x20; - $query =3D "select id, timestamp, memdev, host, serial, log_type, hdr_uui= d, hdr_flags, hdr_handle, hdr_related_handle, hdr_ts, hdr_length, hdr_maint= _op_class, dpa, dpa_flags, descriptor, type, transaction_type, channel, ran= k, nibble_mask, bank_group, bank, row, column, cor_mask, hpa, region, regio= n_uuid, hdr_maint_op_sub_class from cxl_dram_event$conf{opt}{since} order b= y id"; + $query =3D "select id, timestamp, memdev, host, serial, log_type, hdr_uui= d, hdr_flags, hdr_handle, hdr_related_handle, hdr_ts, hdr_length, hdr_maint= _op_class, dpa, dpa_flags, descriptor, type, transaction_type, channel, ran= k, nibble_mask, bank_group, bank, row, column, cor_mask, hpa, region, regio= n_uuid, hdr_maint_op_sub_class, comp_id, pldm_entity_id, pldm_resource_id, = sub_type, sub_channel, cme_threshold_ev_flags, cvme_count from cxl_dram_eve= nt$conf{opt}{since} order by id"; $query_handle =3D $dbh->prepare($query); $query_handle->execute(); - $query_handle->bind_columns(\($id, $timestamp, $memdev, $host, $serial, $= log_type, $hdr_uuid, $hdr_flags, $hdr_handle, $hdr_related_handle, $hdr_ts,= $hdr_length, $hdr_maint_op_class, $dpa, $dpa_flags, $descriptor, $type, $t= ransaction_type, $channel, $rank, $nibble_mask, $bank_group, $bank, $row, $= column, $cor_mask, $hpa, $region, $region_uuid, $hdr_maint_op_sub_class)); + $query_handle->bind_columns(\($id, $timestamp, $memdev, $host, $serial, $= log_type, $hdr_uuid, $hdr_flags, $hdr_handle, $hdr_related_handle, $hdr_ts,= $hdr_length, $hdr_maint_op_class, $dpa, $dpa_flags, $descriptor, $type, $t= ransaction_type, $channel, $rank, $nibble_mask, $bank_group, $bank, $row, $= column, $cor_mask, $hpa, $region, $region_uuid, $hdr_maint_op_sub_class, $c= omp_id, $pldm_entity_id, $pldm_res_id, $mem_event_sub_type, $sub_channel, $= cme_threshold_ev_flags, $cvme_count)); $out =3D ""; while($query_handle->fetch()) { $out .=3D "$id $timestamp error: "; @@ -2112,8 +2115,10 @@ sub errors $out .=3D sprintf "dpa_flags: %s, ", get_cxl_dpa_flags_text($dpa_flag= s) if (defined $dpa_flags && length $dpa_flags); $out .=3D sprintf "descriptor_flags: %s, ", get_cxl_descriptor_flags_= text($descriptor) if (defined $descriptor && length $descriptor); $out .=3D sprintf "memory event type: %s, ", get_cxl_der_mem_event_ty= pe($type) if (defined $type && length $type); + $out .=3D sprintf "memory event sub type: %s, ", get_cxl_mem_event_su= b_type($mem_event_sub_type) if (defined $mem_event_sub_type && length $mem_= event_sub_type); $out .=3D sprintf "transaction_type: %s, ", get_cxl_transaction_type(= $transaction_type) if (defined $transaction_type && length $transaction_typ= e); $out .=3D sprintf "channel=3D%u, ", $channel if (defined $channel && = length $channel); + $out .=3D sprintf "sub_channel=3D%u, ", $sub_channel if (defined $sub= _channel && length $sub_channel); $out .=3D sprintf "rank=3D%u, ", $rank if (defined $rank && length $r= ank); $out .=3D sprintf "nibble_mask=3D%u, ", $nibble_mask if (defined $nib= ble_mask && length $nibble_mask); $out .=3D sprintf "bank_group=3D%u, ", $bank_group if (defined $bank_= group && length $bank_group); @@ -2130,6 +2135,17 @@ sub errors $out .=3D sprintf "hpa=3D0x%llx, ", $hpa if (defined $hpa && l= ength $hpa); $out .=3D "region=3D$region, " if (defined $region && length $= region); $out .=3D "region_uuid=3D$region_uuid, " if (defined $region_u= uid && length $region_uuid); + if (defined $comp_id && length $comp_id) { + print_cxl_dev_id("component_id", $comp_id, CXL_EVENT_GEN_M= ED_COMP_ID_SIZE, $out); + } + if (defined $pldm_entity_id && length $pldm_entity_id) { + print_cxl_dev_id("pldm_entity_id", $pldm_entity_id, CXL_EV= ENT_GEN_PLDM_ENTITY_ID_SIZE, $out); + } + if (defined $pldm_res_id && length $pldm_res_id) { + print_cxl_dev_id("pldm_resource_id", $pldm_res_id, CXL_EVE= NT_GEN_PLDM_RES_ID_SIZE, $out); + } + $out .=3D sprintf "cme_threshold_ev_flags: %s, ", get_cxl_cme_= threshold_ev_flags_text($cme_threshold_ev_flags) if (defined $cme_threshold= _ev_flags && length $cme_threshold_ev_flags); + $out .=3D sprintf "cvme_count=3D0x%x, ", $cvme_count if (defin= ed $cvme_count && length $cvme_count); $out .=3D "\n"; } if ($out ne "") { --=20 2.43.0