[PATCH v2] ACPI: APEI: fix missing erst record id

Liu Xinpeng posted 1 patch 4 years, 2 months ago
There is a newer version of this series
drivers/acpi/apei/erst-dbg.c |  4 +++-
drivers/acpi/apei/erst.c     | 34 +++++++++++++++++++++++++++++++---
include/acpi/apei.h          |  1 +
3 files changed, 35 insertions(+), 4 deletions(-)
[PATCH v2] ACPI: APEI: fix missing erst record id
Posted by Liu Xinpeng 4 years, 2 months ago
record_id is in the erst_record_id_cache but not in storage,so
erst_read will return -ENOENT, and then goto retry_next,
erst_get_record_id_next skip a record_id. This can result in
printing the records just in the cache.

A reproducer of the problem(retry many times):

[root@localhost erst-inject]# ./erst-inject -c 0xaaaaa00011
[root@localhost erst-inject]# ./erst-inject -p
rc: 273
rcd sig: CPER
rcd id: 0xaaaaa00012
rc: 273
rcd sig: CPER
rcd id: 0xaaaaa00013
rc: 273
rcd sig: CPER
rcd id: 0xaaaaa00014
[root@localhost erst-inject]# ./erst-inject -i 0xaaaaa000006
[root@localhost erst-inject]# ./erst-inject -i 0xaaaaa000007
[root@localhost erst-inject]# ./erst-inject -i 0xaaaaa000008
[root@localhost erst-inject]# ./erst-inject -p
rc: 273
rcd sig: CPER
rcd id: 0xaaaaa00012
rc: 273
rcd sig: CPER
rcd id: 0xaaaaa00013
rc: 273
rcd sig: CPER
rcd id: 0xaaaaa00014
[root@localhost erst-inject]# ./erst-inject -n
total error record count: 6

Signed-off-by: Liu Xinpeng <liuxp11@chinatelecom.cn>
---
 drivers/acpi/apei/erst-dbg.c |  4 +++-
 drivers/acpi/apei/erst.c     | 34 +++++++++++++++++++++++++++++++---
 include/acpi/apei.h          |  1 +
 3 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/drivers/acpi/apei/erst-dbg.c b/drivers/acpi/apei/erst-dbg.c
index c740f0faad39..5b8164280a17 100644
--- a/drivers/acpi/apei/erst-dbg.c
+++ b/drivers/acpi/apei/erst-dbg.c
@@ -113,8 +113,10 @@ static ssize_t erst_dbg_read(struct file *filp, char __user *ubuf,
 retry:
 	rc = len = erst_read(id, erst_dbg_buf, erst_dbg_buf_len);
 	/* The record may be cleared by others, try read next record */
-	if (rc == -ENOENT)
+	if (rc == -ENOENT) {
+		erst_clear_cache(id);
 		goto retry_next;
+	}
 	if (rc < 0)
 		goto out;
 	if (len > ERST_DBG_RECORD_LEN_MAX) {
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index 698d67cee052..07d69dc7fd62 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -856,6 +856,31 @@ ssize_t erst_read(u64 record_id, struct cper_record_header *record,
 }
 EXPORT_SYMBOL_GPL(erst_read);
 
+int erst_clear_cache(u64 record_id)
+{
+	int rc, i;
+	u64 *entries;
+
+	if (erst_disable)
+		return -ENODEV;
+
+	rc = mutex_lock_interruptible(&erst_record_id_cache.lock);
+	if (rc)
+		return rc;
+
+	entries = erst_record_id_cache.entries;
+	for (i = 0; i < erst_record_id_cache.len; i++) {
+		if (entries[i] == record_id)
+			entries[i] = APEI_ERST_INVALID_RECORD_ID;
+	}
+	__erst_record_id_cache_compact();
+
+	mutex_unlock(&erst_record_id_cache.lock);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(erst_clear_cache);
+
 int erst_clear(u64 record_id)
 {
 	int rc, i;
@@ -998,14 +1023,17 @@ static ssize_t erst_reader(struct pstore_record *record)
 
 	len = erst_read(record_id, &rcd->hdr, rcd_len);
 	/* The record may be cleared by others, try read next record */
-	if (len == -ENOENT)
+	if (len == -ENOENT) {
+		erst_clear_cache(record_id);
 		goto skip;
-	else if (len < 0 || len < sizeof(*rcd)) {
+	} else if (len < 0 || len < sizeof(*rcd)) {
 		rc = -EIO;
 		goto out;
 	}
-	if (!guid_equal(&rcd->hdr.creator_id, &CPER_CREATOR_PSTORE))
+	if (!guid_equal(&rcd->hdr.creator_id, &CPER_CREATOR_PSTORE)) {
+		erst_clear_cache(record_id);
 		goto skip;
+	}
 
 	record->buf = kmalloc(len, GFP_KERNEL);
 	if (record->buf == NULL) {
diff --git a/include/acpi/apei.h b/include/acpi/apei.h
index afaca3a075e8..f8c11ff4115a 100644
--- a/include/acpi/apei.h
+++ b/include/acpi/apei.h
@@ -47,6 +47,7 @@ void erst_get_record_id_end(void);
 ssize_t erst_read(u64 record_id, struct cper_record_header *record,
 		  size_t buflen);
 int erst_clear(u64 record_id);
+int erst_clear_cache(u64 record_id);
 
 int arch_apei_enable_cmcff(struct acpi_hest_header *hest_hdr, void *data);
 void arch_apei_report_mem_error(int sev, struct cper_sec_mem_err *mem_err);
-- 
2.23.0
Re: [PATCH v2] ACPI: APEI: fix missing erst record id
Posted by Rafael J. Wysocki 4 years, 2 months ago
APEI reviewers, your input is needed here.

On Tue, Apr 5, 2022 at 8:14 AM Liu Xinpeng <liuxp11@chinatelecom.cn> wrote:
>
> record_id is in the erst_record_id_cache but not in storage,so
> erst_read will return -ENOENT, and then goto retry_next,
> erst_get_record_id_next skip a record_id. This can result in
> printing the records just in the cache.
>
> A reproducer of the problem(retry many times):
>
> [root@localhost erst-inject]# ./erst-inject -c 0xaaaaa00011
> [root@localhost erst-inject]# ./erst-inject -p
> rc: 273
> rcd sig: CPER
> rcd id: 0xaaaaa00012
> rc: 273
> rcd sig: CPER
> rcd id: 0xaaaaa00013
> rc: 273
> rcd sig: CPER
> rcd id: 0xaaaaa00014
> [root@localhost erst-inject]# ./erst-inject -i 0xaaaaa000006
> [root@localhost erst-inject]# ./erst-inject -i 0xaaaaa000007
> [root@localhost erst-inject]# ./erst-inject -i 0xaaaaa000008
> [root@localhost erst-inject]# ./erst-inject -p
> rc: 273
> rcd sig: CPER
> rcd id: 0xaaaaa00012
> rc: 273
> rcd sig: CPER
> rcd id: 0xaaaaa00013
> rc: 273
> rcd sig: CPER
> rcd id: 0xaaaaa00014
> [root@localhost erst-inject]# ./erst-inject -n
> total error record count: 6
>
> Signed-off-by: Liu Xinpeng <liuxp11@chinatelecom.cn>
> ---
>  drivers/acpi/apei/erst-dbg.c |  4 +++-
>  drivers/acpi/apei/erst.c     | 34 +++++++++++++++++++++++++++++++---
>  include/acpi/apei.h          |  1 +
>  3 files changed, 35 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/acpi/apei/erst-dbg.c b/drivers/acpi/apei/erst-dbg.c
> index c740f0faad39..5b8164280a17 100644
> --- a/drivers/acpi/apei/erst-dbg.c
> +++ b/drivers/acpi/apei/erst-dbg.c
> @@ -113,8 +113,10 @@ static ssize_t erst_dbg_read(struct file *filp, char __user *ubuf,
>  retry:
>         rc = len = erst_read(id, erst_dbg_buf, erst_dbg_buf_len);
>         /* The record may be cleared by others, try read next record */
> -       if (rc == -ENOENT)
> +       if (rc == -ENOENT) {
> +               erst_clear_cache(id);
>                 goto retry_next;
> +       }
>         if (rc < 0)
>                 goto out;
>         if (len > ERST_DBG_RECORD_LEN_MAX) {
> diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
> index 698d67cee052..07d69dc7fd62 100644
> --- a/drivers/acpi/apei/erst.c
> +++ b/drivers/acpi/apei/erst.c
> @@ -856,6 +856,31 @@ ssize_t erst_read(u64 record_id, struct cper_record_header *record,
>  }
>  EXPORT_SYMBOL_GPL(erst_read);
>
> +int erst_clear_cache(u64 record_id)
> +{
> +       int rc, i;
> +       u64 *entries;
> +
> +       if (erst_disable)
> +               return -ENODEV;
> +
> +       rc = mutex_lock_interruptible(&erst_record_id_cache.lock);
> +       if (rc)
> +               return rc;
> +
> +       entries = erst_record_id_cache.entries;
> +       for (i = 0; i < erst_record_id_cache.len; i++) {
> +               if (entries[i] == record_id)
> +                       entries[i] = APEI_ERST_INVALID_RECORD_ID;
> +       }
> +       __erst_record_id_cache_compact();
> +
> +       mutex_unlock(&erst_record_id_cache.lock);
> +
> +       return rc;
> +}
> +EXPORT_SYMBOL_GPL(erst_clear_cache);
> +
>  int erst_clear(u64 record_id)
>  {
>         int rc, i;
> @@ -998,14 +1023,17 @@ static ssize_t erst_reader(struct pstore_record *record)
>
>         len = erst_read(record_id, &rcd->hdr, rcd_len);
>         /* The record may be cleared by others, try read next record */
> -       if (len == -ENOENT)
> +       if (len == -ENOENT) {
> +               erst_clear_cache(record_id);
>                 goto skip;
> -       else if (len < 0 || len < sizeof(*rcd)) {
> +       } else if (len < 0 || len < sizeof(*rcd)) {
>                 rc = -EIO;
>                 goto out;
>         }
> -       if (!guid_equal(&rcd->hdr.creator_id, &CPER_CREATOR_PSTORE))
> +       if (!guid_equal(&rcd->hdr.creator_id, &CPER_CREATOR_PSTORE)) {
> +               erst_clear_cache(record_id);
>                 goto skip;
> +       }
>
>         record->buf = kmalloc(len, GFP_KERNEL);
>         if (record->buf == NULL) {
> diff --git a/include/acpi/apei.h b/include/acpi/apei.h
> index afaca3a075e8..f8c11ff4115a 100644
> --- a/include/acpi/apei.h
> +++ b/include/acpi/apei.h
> @@ -47,6 +47,7 @@ void erst_get_record_id_end(void);
>  ssize_t erst_read(u64 record_id, struct cper_record_header *record,
>                   size_t buflen);
>  int erst_clear(u64 record_id);
> +int erst_clear_cache(u64 record_id);
>
>  int arch_apei_enable_cmcff(struct acpi_hest_header *hest_hdr, void *data);
>  void arch_apei_report_mem_error(int sev, struct cper_sec_mem_err *mem_err);
> --
> 2.23.0
>