From nobody Thu Apr 2 12:41:27 2026 Received: from angie.orcam.me.uk (angie.orcam.me.uk [78.133.224.34]) by smtp.subspace.kernel.org (Postfix) with ESMTP id ED46233DEF7; Sat, 28 Mar 2026 15:49:58 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=78.133.224.34 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1774713001; cv=none; b=l71QjHO5nEslsPHuzqTowGC6r2F8iYiL4C9ttBjSesCMqMPzpME+6vzBAjDUMN3JaeruUSVuNjgtNniuuHrr4CEFnHbMga2kEVUWQQPASOyFXfkSghcDyYcE+nQIIBsggC+Doi+hVdJ/Riv4+c8iXHPVAjVjXNAg79y93KUzL+E= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1774713001; c=relaxed/simple; bh=WFn5ooLwSf5MqA9ihfEYpV1xJZmOEde7I0+xg7wZrOc=; h=Date:From:To:cc:Subject:In-Reply-To:Message-ID:References: MIME-Version:Content-Type; b=GKN/fL+w0F9JaCBlwNpHROZrATEQjplO1ktm++Emo2qoamYsbrRX9Uv3/suWUZ4T1TpfAiLw6lLfI+e66tNCEIX3SCr+kjCS+zDYNoOuWkvFeNsI5l7/VDWgRULFRYPetfiN5cVcbyXcN59FV2bGw6lwEPE/rbyN8mhsivbIJGs= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=orcam.me.uk; spf=none smtp.mailfrom=orcam.me.uk; arc=none smtp.client-ip=78.133.224.34 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=orcam.me.uk Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=orcam.me.uk Received: by angie.orcam.me.uk (Postfix, from userid 500) id DE39692009C; Sat, 28 Mar 2026 16:49:57 +0100 (CET) Received: from localhost (localhost [127.0.0.1]) by angie.orcam.me.uk (Postfix) with ESMTP id D819092009B; Sat, 28 Mar 2026 15:49:57 +0000 (GMT) Date: Sat, 28 Mar 2026 15:49:57 +0000 (GMT) From: "Maciej W. Rozycki" To: Thomas Bogendoerfer cc: linux-mips@vger.kernel.org, linux-kernel@vger.kernel.org Subject: [PATCH 1/3] MIPS: DEC: Rate-limit memory errors for ECC systems In-Reply-To: Message-ID: References: User-Agent: Alpine 2.21 (DEB 202 2017-01-01) Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Prevent the system from becoming unusable due to a flood of memory error=20 messages with DECstation and DECsystem models using ECC, that is KN02,=20 KN03 and KN05 systems. It seems common for gradual oxidation of memory=20 module contacts to cause memory errors to eventually develop and while=20 ECC takes care of correcting them and the system affected can continue=20 operating normally until the contacts have been cleaned, the unlimited=20 messages make the system spend all its time on producing them, therefore=20 preventing it from being used. Rate-limiting removes the load from the system and enables its normal=20 operation, e.g.: Bus error interrupt: CPU memory read ECC error at 0x139cfb04 ECC syndrome 0x54 -- corrected single bit error at data bit D3 Bus error interrupt: CPU partial memory write ECC error at 0x138c1f5c ECC syndrome 0x54 -- corrected single bit error at data bit D3 Bus error interrupt: CPU partial memory write ECC error at 0x138c1f6c ECC syndrome 0x54 -- corrected single bit error at data bit D3 Bus error interrupt: CPU memory read ECC error at 0x139cff64 ECC syndrome 0x54 -- corrected single bit error at data bit D3 Bus error interrupt: CPU memory read ECC error at 0x136af00c ECC syndrome 0x54 -- corrected single bit error at data bit D3 Bus error interrupt: CPU memory read ECC error at 0x136af044 ECC syndrome 0x54 -- corrected single bit error at data bit D3 Bus error interrupt: CPU memory read ECC error at 0x136af0cc ECC syndrome 0x54 -- corrected single bit error at data bit D3 Bus error interrupt: CPU memory read ECC error at 0x136af0cc ECC syndrome 0x54 -- corrected single bit error at data bit D3 Bus error interrupt: CPU memory read ECC error at 0x136af0e4 ECC syndrome 0x54 -- corrected single bit error at data bit D3 Bus error interrupt: CPU memory read ECC error at 0x136af104 ECC syndrome 0x54 -- corrected single bit error at data bit D3 dec_ecc_be_backend: 34455 callbacks suppressed Signed-off-by: Maciej W. Rozycki --- arch/mips/dec/ecc-berr.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) linux-mips-dec-berr-ratelimit-ecc.diff Index: linux-macro/arch/mips/dec/ecc-berr.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-macro.orig/arch/mips/dec/ecc-berr.c +++ linux-macro/arch/mips/dec/ecc-berr.c @@ -5,12 +5,13 @@ * 5000/240 (KN03), 5000/260 (KN05) and DECsystem 5900 (KN03), * 5900/260 (KN05) systems. * - * Copyright (c) 2003, 2005 Maciej W. Rozycki + * Copyright (c) 2003, 2005, 2026 Maciej W. Rozycki */ =20 #include #include #include +#include #include #include =20 @@ -51,6 +52,10 @@ static int dec_ecc_be_backend(struct pt_ static const char overstr[] =3D "overrun"; static const char eccstr[] =3D "ECC error"; =20 + static DEFINE_RATELIMIT_STATE(rs, + DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); + const char *kind, *agent, *cycle, *event; const char *status =3D "", *xbit =3D "", *fmt =3D ""; unsigned long address; @@ -70,7 +75,7 @@ static int dec_ecc_be_backend(struct pt_ =20 if (!(erraddr & KN0X_EAR_VALID)) { /* No idea what happened. */ - printk(KERN_ALERT "Unidentified bus error %s\n", kind); + pr_alert_ratelimited("Unidentified bus error %s\n", kind); return action; } =20 @@ -180,12 +185,13 @@ static int dec_ecc_be_backend(struct pt_ } } =20 - if (action !=3D MIPS_BE_FIXUP) + if (action !=3D MIPS_BE_FIXUP && __ratelimit(&rs)) { printk(KERN_ALERT "Bus error %s: %s %s %s at %#010lx\n", kind, agent, cycle, event, address); =20 - if (action !=3D MIPS_BE_FIXUP && erraddr & KN0X_EAR_ECCERR) - printk(fmt, " ECC syndrome ", syn, status, xbit, i); + if (erraddr & KN0X_EAR_ECCERR) + printk(fmt, " ECC syndrome ", syn, status, xbit, i); + } =20 return action; } From nobody Thu Apr 2 12:41:27 2026 Received: from angie.orcam.me.uk (angie.orcam.me.uk [78.133.224.34]) by smtp.subspace.kernel.org (Postfix) with ESMTP id E969A387582; Sat, 28 Mar 2026 15:50:02 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=78.133.224.34 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1774713004; cv=none; b=PMaPf/eSTz/I/HICN7VvhfN79rm05ORYnpgjiCaKU+DuAdGr5WhMgdm7UdM3g23qWDANZwC/ObjhALBGS40ecirk4FuKC6cd6xX1VlkeArinhymzzO/ZmX56z3SwbbKRwTLQcpfBZokMQwxhqzv6GWC5e97lNUEECqNJ4pvf+m8= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1774713004; c=relaxed/simple; bh=z43/IEmW/ES6bq+gVhx4qcIV7gcTgXnq2zS0b43QB/8=; h=Date:From:To:cc:Subject:In-Reply-To:Message-ID:References: MIME-Version:Content-Type; b=aT+vCTKRptb8fwv5T829X/VDxtX/aBO6BN4jf3y6Sa+0p1jbmD4RbgzRgNF7Cuphw6r0R7YNrmfNu/KtvPbHYBpGru4Gz9a/NK13ixu1o+2/vPkNJgR61LqVJ7LFahGKjN/ODsuNltCIgFICgaI3ttCSFs1lyFxZTHnPLStO5IA= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=orcam.me.uk; spf=none smtp.mailfrom=orcam.me.uk; arc=none smtp.client-ip=78.133.224.34 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=orcam.me.uk Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=orcam.me.uk Received: by angie.orcam.me.uk (Postfix, from userid 500) id 8630C92009D; Sat, 28 Mar 2026 16:50:01 +0100 (CET) Received: from localhost (localhost [127.0.0.1]) by angie.orcam.me.uk (Postfix) with ESMTP id 83CD292009B; Sat, 28 Mar 2026 15:50:01 +0000 (GMT) Date: Sat, 28 Mar 2026 15:50:01 +0000 (GMT) From: "Maciej W. Rozycki" To: Thomas Bogendoerfer cc: linux-mips@vger.kernel.org, linux-kernel@vger.kernel.org Subject: [PATCH 2/3] MIPS: DEC: Rate-limit memory errors for KN01 systems In-Reply-To: Message-ID: References: User-Agent: Alpine 2.21 (DEB 202 2017-01-01) Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Similarly to memory errors in ECC systems also rate-limit memory parity=20 errors for KN01 DECstation and DECsystem models. Unlike with ECC these=20 events are always fatal and are less likely to cause a message flood,=20 but handle them the same way for consistency. Signed-off-by: Maciej W. Rozycki --- arch/mips/dec/kn01-berr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) linux-mips-dec-berr-ratelimit-kn01.diff Index: linux-macro/arch/mips/dec/kn01-berr.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-macro.orig/arch/mips/dec/kn01-berr.c +++ linux-macro/arch/mips/dec/kn01-berr.c @@ -4,7 +4,7 @@ * and 2100 (KN01) systems equipped with parity error detection * logic. * - * Copyright (c) 2005 Maciej W. Rozycki + * Copyright (c) 2005, 2026 Maciej W. Rozycki */ =20 #include @@ -134,8 +134,8 @@ static int dec_kn01_be_backend(struct pt action =3D MIPS_BE_FIXUP; =20 if (action !=3D MIPS_BE_FIXUP) - printk(KERN_ALERT "Bus error %s: %s %s %s at %#010lx\n", - kind, agent, cycle, event, address); + pr_alert_ratelimited("Bus error %s: %s %s %s at %#010lx\n", + kind, agent, cycle, event, address); =20 return action; } From nobody Thu Apr 2 12:41:27 2026 Received: from angie.orcam.me.uk (angie.orcam.me.uk [78.133.224.34]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 0D94E34889C; Sat, 28 Mar 2026 15:50:06 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=78.133.224.34 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1774713009; cv=none; b=nveKZZxnvzmGJT3pWVmaYOTJ87JxRG5Gmpl0KrpVSsD3VOQl7PFoCk2xnJOs00LrHqs+CoUbm3yFZ5e6aZhdWUkvO5diUmAOl977y+ez6ePXAuodharSqohYHGdPXjYzeVkY630GifHCylWgO8QJxjDMpk+5jhQ0IQpccs3ntgs= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1774713009; c=relaxed/simple; bh=7QqrdrR9mDzQY48hShkFWod0E5CztNim8nORaozBYNE=; h=Date:From:To:cc:Subject:In-Reply-To:Message-ID:References: MIME-Version:Content-Type; b=H+uH9nbYKZkcC+rpu0I52SjyOwW/9/Z4Z91CbC8SANlJuljLnHbV4Iw+UbUQ2tr/+JG+sYE6pipOnVkQlIOR9tdyJNQL8m7lMYRRSb334gGdQCh+9jCX6U7HvfWjg69KbluQ5++GpHI2mESL7zeuQSpuIwp7kYgxrl+3F0vnSr8= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=orcam.me.uk; spf=none smtp.mailfrom=orcam.me.uk; arc=none smtp.client-ip=78.133.224.34 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=orcam.me.uk Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=orcam.me.uk Received: by angie.orcam.me.uk (Postfix, from userid 500) id 166B592009D; Sat, 28 Mar 2026 16:50:06 +0100 (CET) Received: from localhost (localhost [127.0.0.1]) by angie.orcam.me.uk (Postfix) with ESMTP id 1017292009C; Sat, 28 Mar 2026 15:50:06 +0000 (GMT) Date: Sat, 28 Mar 2026 15:50:05 +0000 (GMT) From: "Maciej W. Rozycki" To: Thomas Bogendoerfer cc: linux-mips@vger.kernel.org, linux-kernel@vger.kernel.org Subject: [PATCH 3/3] MIPS: DEC: Rate-limit memory errors for non-KN01 parity systems In-Reply-To: Message-ID: References: User-Agent: Alpine 2.21 (DEB 202 2017-01-01) Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Similarly to memory errors in ECC systems also rate-limit memory parity=20 errors for KN02-BA, KN02-CA, KN04-BA, KN04-CA DECstation and DECsystem=20 models. Unlike with ECC these events are always fatal and are less=20 likely to cause a message flood, but handle them the same way for=20 consistency. Signed-off-by: Maciej W. Rozycki --- arch/mips/dec/kn02xa-berr.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) linux-mips-dec-berr-ratelimit-kn02xa.diff Index: linux-macro/arch/mips/dec/kn02xa-berr.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-macro.orig/arch/mips/dec/kn02xa-berr.c +++ linux-macro/arch/mips/dec/kn02xa-berr.c @@ -6,12 +6,13 @@ * DECstation/DECsystem 5000/20, /25, /33 (KN02-CA), 5000/50 * (KN04-CA) systems. * - * Copyright (c) 2005 Maciej W. Rozycki + * Copyright (c) 2005, 2026 Maciej W. Rozycki */ =20 #include #include #include +#include #include =20 #include @@ -50,6 +51,10 @@ static int dec_kn02xa_be_backend(struct static const char paritystr[] =3D "parity error"; static const char lanestat[][4] =3D { " OK", "BAD" }; =20 + static DEFINE_RATELIMIT_STATE(rs, + DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); + const char *kind, *agent, *cycle, *event; unsigned long address; =20 @@ -79,18 +84,19 @@ static int dec_kn02xa_be_backend(struct if (is_fixup) action =3D MIPS_BE_FIXUP; =20 - if (action !=3D MIPS_BE_FIXUP) + if (action !=3D MIPS_BE_FIXUP && __ratelimit(&rs)) { printk(KERN_ALERT "Bus error %s: %s %s %s at %#010lx\n", kind, agent, cycle, event, address); =20 - if (action !=3D MIPS_BE_FIXUP && address < 0x10000000) - printk(KERN_ALERT " Byte lane status %#3x -- " - "#3: %s, #2: %s, #1: %s, #0: %s\n", - (mer & KN02XA_MER_BYTERR) >> 8, - lanestat[(mer & KN02XA_MER_BYTERR_3) !=3D 0], - lanestat[(mer & KN02XA_MER_BYTERR_2) !=3D 0], - lanestat[(mer & KN02XA_MER_BYTERR_1) !=3D 0], - lanestat[(mer & KN02XA_MER_BYTERR_0) !=3D 0]); + if (address < 0x10000000) + printk(KERN_ALERT " Byte lane status %#3x -- " + "#3: %s, #2: %s, #1: %s, #0: %s\n", + (mer & KN02XA_MER_BYTERR) >> 8, + lanestat[(mer & KN02XA_MER_BYTERR_3) !=3D 0], + lanestat[(mer & KN02XA_MER_BYTERR_2) !=3D 0], + lanestat[(mer & KN02XA_MER_BYTERR_1) !=3D 0], + lanestat[(mer & KN02XA_MER_BYTERR_0) !=3D 0]); + } =20 return action; }