[PATCH] powerpc/pseries/eeh: move pseries_eeh_err_inject() outside CONFIG_DEBUG_FS block

Narayana Murty N posted 1 patch 2 months, 1 week ago
arch/powerpc/kernel/eeh.c | 198 +++++++++++++++++++-------------------
1 file changed, 99 insertions(+), 99 deletions(-)
[PATCH] powerpc/pseries/eeh: move pseries_eeh_err_inject() outside CONFIG_DEBUG_FS block
Posted by Narayana Murty N 2 months, 1 week ago
Makes pseries_eeh_err_inject() available even when debugfs
is disabled (CONFIG_DEBUG_FS=n). It moves eeh_debugfs_break_device()
and eeh_pe_inject_mmio_error() out of the CONFIG_DEBUG_FS block
and renames it as eeh_break_device().

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202409170509.VWC6jadC-lkp@intel.com/
Fixes: b0e2b828dfca ("powerpc/pseries/eeh: Fix pseries_eeh_err_inject")
Signed-off-by: Narayana Murty N <nnmlinux@linux.ibm.com>
---
 arch/powerpc/kernel/eeh.c | 198 +++++++++++++++++++-------------------
 1 file changed, 99 insertions(+), 99 deletions(-)

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 49ab11a287a3..0fe25e907ea6 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1574,6 +1574,104 @@ static int proc_eeh_show(struct seq_file *m, void *v)
 }
 #endif /* CONFIG_PROC_FS */
 
+static int eeh_break_device(struct pci_dev *pdev)
+{
+	struct resource *bar = NULL;
+	void __iomem *mapped;
+	u16 old, bit;
+	int i, pos;
+
+	/* Do we have an MMIO BAR to disable? */
+	for (i = 0; i <= PCI_STD_RESOURCE_END; i++) {
+		struct resource *r = &pdev->resource[i];
+
+		if (!r->flags || !r->start)
+			continue;
+		if (r->flags & IORESOURCE_IO)
+			continue;
+		if (r->flags & IORESOURCE_UNSET)
+			continue;
+
+		bar = r;
+		break;
+	}
+
+	if (!bar) {
+		pci_err(pdev, "Unable to find Memory BAR to cause EEH with\n");
+		return -ENXIO;
+	}
+
+	pci_err(pdev, "Going to break: %pR\n", bar);
+
+	if (pdev->is_virtfn) {
+#ifndef CONFIG_PCI_IOV
+		return -ENXIO;
+#else
+		/*
+		 * VFs don't have a per-function COMMAND register, so the best
+		 * we can do is clear the Memory Space Enable bit in the PF's
+		 * SRIOV control reg.
+		 *
+		 * Unfortunately, this requires that we have a PF (i.e doesn't
+		 * work for a passed-through VF) and it has the potential side
+		 * effect of also causing an EEH on every other VF under the
+		 * PF. Oh well.
+		 */
+		pdev = pdev->physfn;
+		if (!pdev)
+			return -ENXIO; /* passed through VFs have no PF */
+
+		pos  = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
+		pos += PCI_SRIOV_CTRL;
+		bit  = PCI_SRIOV_CTRL_MSE;
+#endif /* !CONFIG_PCI_IOV */
+	} else {
+		bit = PCI_COMMAND_MEMORY;
+		pos = PCI_COMMAND;
+	}
+
+	/*
+	 * Process here is:
+	 *
+	 * 1. Disable Memory space.
+	 *
+	 * 2. Perform an MMIO to the device. This should result in an error
+	 *    (CA  / UR) being raised by the device which results in an EEH
+	 *    PE freeze. Using the in_8() accessor skips the eeh detection hook
+	 *    so the freeze hook so the EEH Detection machinery won't be
+	 *    triggered here. This is to match the usual behaviour of EEH
+	 *    where the HW will asynchronously freeze a PE and it's up to
+	 *    the kernel to notice and deal with it.
+	 *
+	 * 3. Turn Memory space back on. This is more important for VFs
+	 *    since recovery will probably fail if we don't. For normal
+	 *    the COMMAND register is reset as a part of re-initialising
+	 *    the device.
+	 *
+	 * Breaking stuff is the point so who cares if it's racy ;)
+	 */
+	pci_read_config_word(pdev, pos, &old);
+
+	mapped = ioremap(bar->start, PAGE_SIZE);
+	if (!mapped) {
+		pci_err(pdev, "Unable to map MMIO BAR %pR\n", bar);
+		return -ENXIO;
+	}
+
+	pci_write_config_word(pdev, pos, old & ~bit);
+	in_8(mapped);
+	pci_write_config_word(pdev, pos, old);
+
+	iounmap(mapped);
+
+	return 0;
+}
+
+int eeh_pe_inject_mmio_error(struct pci_dev *pdev)
+{
+	return eeh_break_device(pdev);
+}
+
 #ifdef CONFIG_DEBUG_FS
 
 
@@ -1727,99 +1825,6 @@ static const struct file_operations eeh_dev_check_fops = {
 	.read   = eeh_debugfs_dev_usage,
 };
 
-static int eeh_debugfs_break_device(struct pci_dev *pdev)
-{
-	struct resource *bar = NULL;
-	void __iomem *mapped;
-	u16 old, bit;
-	int i, pos;
-
-	/* Do we have an MMIO BAR to disable? */
-	for (i = 0; i <= PCI_STD_RESOURCE_END; i++) {
-		struct resource *r = &pdev->resource[i];
-
-		if (!r->flags || !r->start)
-			continue;
-		if (r->flags & IORESOURCE_IO)
-			continue;
-		if (r->flags & IORESOURCE_UNSET)
-			continue;
-
-		bar = r;
-		break;
-	}
-
-	if (!bar) {
-		pci_err(pdev, "Unable to find Memory BAR to cause EEH with\n");
-		return -ENXIO;
-	}
-
-	pci_err(pdev, "Going to break: %pR\n", bar);
-
-	if (pdev->is_virtfn) {
-#ifndef CONFIG_PCI_IOV
-		return -ENXIO;
-#else
-		/*
-		 * VFs don't have a per-function COMMAND register, so the best
-		 * we can do is clear the Memory Space Enable bit in the PF's
-		 * SRIOV control reg.
-		 *
-		 * Unfortunately, this requires that we have a PF (i.e doesn't
-		 * work for a passed-through VF) and it has the potential side
-		 * effect of also causing an EEH on every other VF under the
-		 * PF. Oh well.
-		 */
-		pdev = pdev->physfn;
-		if (!pdev)
-			return -ENXIO; /* passed through VFs have no PF */
-
-		pos  = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
-		pos += PCI_SRIOV_CTRL;
-		bit  = PCI_SRIOV_CTRL_MSE;
-#endif /* !CONFIG_PCI_IOV */
-	} else {
-		bit = PCI_COMMAND_MEMORY;
-		pos = PCI_COMMAND;
-	}
-
-	/*
-	 * Process here is:
-	 *
-	 * 1. Disable Memory space.
-	 *
-	 * 2. Perform an MMIO to the device. This should result in an error
-	 *    (CA  / UR) being raised by the device which results in an EEH
-	 *    PE freeze. Using the in_8() accessor skips the eeh detection hook
-	 *    so the freeze hook so the EEH Detection machinery won't be
-	 *    triggered here. This is to match the usual behaviour of EEH
-	 *    where the HW will asynchronously freeze a PE and it's up to
-	 *    the kernel to notice and deal with it.
-	 *
-	 * 3. Turn Memory space back on. This is more important for VFs
-	 *    since recovery will probably fail if we don't. For normal
-	 *    the COMMAND register is reset as a part of re-initialising
-	 *    the device.
-	 *
-	 * Breaking stuff is the point so who cares if it's racy ;)
-	 */
-	pci_read_config_word(pdev, pos, &old);
-
-	mapped = ioremap(bar->start, PAGE_SIZE);
-	if (!mapped) {
-		pci_err(pdev, "Unable to map MMIO BAR %pR\n", bar);
-		return -ENXIO;
-	}
-
-	pci_write_config_word(pdev, pos, old & ~bit);
-	in_8(mapped);
-	pci_write_config_word(pdev, pos, old);
-
-	iounmap(mapped);
-
-	return 0;
-}
-
 static ssize_t eeh_dev_break_write(struct file *filp,
 				const char __user *user_buf,
 				size_t count, loff_t *ppos)
@@ -1831,7 +1836,7 @@ static ssize_t eeh_dev_break_write(struct file *filp,
 	if (IS_ERR(pdev))
 		return PTR_ERR(pdev);
 
-	ret = eeh_debugfs_break_device(pdev);
+	ret = eeh_break_device(pdev);
 	pci_dev_put(pdev);
 
 	if (ret < 0)
@@ -1847,11 +1852,6 @@ static const struct file_operations eeh_dev_break_fops = {
 	.read   = eeh_debugfs_dev_usage,
 };
 
-int eeh_pe_inject_mmio_error(struct pci_dev *pdev)
-{
-	return eeh_debugfs_break_device(pdev);
-}
-
 static ssize_t eeh_dev_can_recover(struct file *filp,
 				   const char __user *user_buf,
 				   size_t count, loff_t *ppos)
-- 
2.39.2
Re: [PATCH] powerpc/pseries/eeh: move pseries_eeh_err_inject() outside CONFIG_DEBUG_FS block
Posted by Michael Ellerman 2 months, 1 week ago
On Tue, 17 Sep 2024 09:24:45 -0400, Narayana Murty N wrote:
> Makes pseries_eeh_err_inject() available even when debugfs
> is disabled (CONFIG_DEBUG_FS=n). It moves eeh_debugfs_break_device()
> and eeh_pe_inject_mmio_error() out of the CONFIG_DEBUG_FS block
> and renames it as eeh_break_device().
> 
> 

Applied to powerpc/fixes.

[1/1] powerpc/pseries/eeh: move pseries_eeh_err_inject() outside CONFIG_DEBUG_FS block
      https://git.kernel.org/powerpc/c/3af2e2f68cc6baf0a11f662d30b0bf981f77bfea

cheers
Re: [PATCH] powerpc/pseries/eeh: move pseries_eeh_err_inject() outside CONFIG_DEBUG_FS block
Posted by Ritesh Harjani (IBM) 2 months, 1 week ago
Narayana Murty N <nnmlinux@linux.ibm.com> writes:

> Makes pseries_eeh_err_inject() available even when debugfs
> is disabled (CONFIG_DEBUG_FS=n). It moves eeh_debugfs_break_device()
> and eeh_pe_inject_mmio_error() out of the CONFIG_DEBUG_FS block
> and renames it as eeh_break_device().
>
> Reported-by: kernel test robot <lkp@intel.com>
> Closes: https://lore.kernel.org/oe-kbuild-all/202409170509.VWC6jadC-lkp@intel.com/
> Fixes: b0e2b828dfca ("powerpc/pseries/eeh: Fix pseries_eeh_err_inject")
> Signed-off-by: Narayana Murty N <nnmlinux@linux.ibm.com>
> ---
>  arch/powerpc/kernel/eeh.c | 198 +++++++++++++++++++-------------------
>  1 file changed, 99 insertions(+), 99 deletions(-)

Ok, so in your original patch you implemented eeh_inject ops for pseries
using mmio based eeh error injection (eeh_pe_inject_mmio_error()), which
uses the functions defined under debugfs -> eeh_debugfs_break_device(). 

This was failing when CONFIG_DEBUGFS is not defined, thus referring to
undefined function definition. 

Minor nit below.

>
> diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
> index 49ab11a287a3..0fe25e907ea6 100644
> --- a/arch/powerpc/kernel/eeh.c
> +++ b/arch/powerpc/kernel/eeh.c
> @@ -1574,6 +1574,104 @@ static int proc_eeh_show(struct seq_file *m, void *v)
>  }
>  #endif /* CONFIG_PROC_FS */
>  
> +static int eeh_break_device(struct pci_dev *pdev)
> +{
> +	struct resource *bar = NULL;
> +	void __iomem *mapped;
> +	u16 old, bit;
> +	int i, pos;
> +
> +	/* Do we have an MMIO BAR to disable? */
> +	for (i = 0; i <= PCI_STD_RESOURCE_END; i++) {
> +		struct resource *r = &pdev->resource[i];
> +
> +		if (!r->flags || !r->start)
> +			continue;
> +		if (r->flags & IORESOURCE_IO)
> +			continue;
> +		if (r->flags & IORESOURCE_UNSET)
> +			continue;
> +
> +		bar = r;
> +		break;
> +	}
> +
> +	if (!bar) {
> +		pci_err(pdev, "Unable to find Memory BAR to cause EEH with\n");
> +		return -ENXIO;
> +	}
> +
> +	pci_err(pdev, "Going to break: %pR\n", bar);
> +
> +	if (pdev->is_virtfn) {
> +#ifndef CONFIG_PCI_IOV
> +		return -ENXIO;
> +#else
> +		/*
> +		 * VFs don't have a per-function COMMAND register, so the best
> +		 * we can do is clear the Memory Space Enable bit in the PF's
> +		 * SRIOV control reg.
> +		 *
> +		 * Unfortunately, this requires that we have a PF (i.e doesn't
> +		 * work for a passed-through VF) and it has the potential side
> +		 * effect of also causing an EEH on every other VF under the
> +		 * PF. Oh well.
> +		 */
> +		pdev = pdev->physfn;
> +		if (!pdev)
> +			return -ENXIO; /* passed through VFs have no PF */
> +
> +		pos  = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
> +		pos += PCI_SRIOV_CTRL;
> +		bit  = PCI_SRIOV_CTRL_MSE;
> +#endif /* !CONFIG_PCI_IOV */
> +	} else {
> +		bit = PCI_COMMAND_MEMORY;
> +		pos = PCI_COMMAND;
> +	}
> +
> +	/*
> +	 * Process here is:
> +	 *
> +	 * 1. Disable Memory space.
> +	 *
> +	 * 2. Perform an MMIO to the device. This should result in an error
> +	 *    (CA  / UR) being raised by the device which results in an EEH
> +	 *    PE freeze. Using the in_8() accessor skips the eeh detection hook
> +	 *    so the freeze hook so the EEH Detection machinery won't be
> +	 *    triggered here. This is to match the usual behaviour of EEH
> +	 *    where the HW will asynchronously freeze a PE and it's up to
> +	 *    the kernel to notice and deal with it.
> +	 *
> +	 * 3. Turn Memory space back on. This is more important for VFs
> +	 *    since recovery will probably fail if we don't. For normal
> +	 *    the COMMAND register is reset as a part of re-initialising
> +	 *    the device.
> +	 *
> +	 * Breaking stuff is the point so who cares if it's racy ;)
> +	 */
> +	pci_read_config_word(pdev, pos, &old);
> +
> +	mapped = ioremap(bar->start, PAGE_SIZE);
> +	if (!mapped) {
> +		pci_err(pdev, "Unable to map MMIO BAR %pR\n", bar);
> +		return -ENXIO;
> +	}
> +
> +	pci_write_config_word(pdev, pos, old & ~bit);
> +	in_8(mapped);
> +	pci_write_config_word(pdev, pos, old);
> +
> +	iounmap(mapped);
> +
> +	return 0;
> +}
> +
> +int eeh_pe_inject_mmio_error(struct pci_dev *pdev)
> +{
> +	return eeh_break_device(pdev);
> +}
> +

Why have an extra eeh_pe_inject_mmio_error() function which only calls
eeh_break_device()?

Maybe we can rename eeh_break_device() to eeh_mmio_break_device() and use
this function itself at both call sites?

-ritesh
Re: [PATCH] powerpc/pseries/eeh: move pseries_eeh_err_inject() outside CONFIG_DEBUG_FS block
Posted by Vaibhav Jain 2 months, 1 week ago
Hi Ritesh,

Thanks for looking into this patch. My responses your review inline
below:

Ritesh Harjani (IBM) <ritesh.list@gmail.com> writes:

> Narayana Murty N <nnmlinux@linux.ibm.com> writes:
>
>> Makes pseries_eeh_err_inject() available even when debugfs
>> is disabled (CONFIG_DEBUG_FS=n). It moves eeh_debugfs_break_device()
>> and eeh_pe_inject_mmio_error() out of the CONFIG_DEBUG_FS block
>> and renames it as eeh_break_device().
>>
>> Reported-by: kernel test robot <lkp@intel.com>
>> Closes: https://lore.kernel.org/oe-kbuild-all/202409170509.VWC6jadC-lkp@intel.com/
>> Fixes: b0e2b828dfca ("powerpc/pseries/eeh: Fix pseries_eeh_err_inject")
>> Signed-off-by: Narayana Murty N <nnmlinux@linux.ibm.com>
>> ---
>>  arch/powerpc/kernel/eeh.c | 198 +++++++++++++++++++-------------------
>>  1 file changed, 99 insertions(+), 99 deletions(-)
>
> Ok, so in your original patch you implemented eeh_inject ops for pseries
> using mmio based eeh error injection (eeh_pe_inject_mmio_error()), which
> uses the functions defined under debugfs -> eeh_debugfs_break_device(). 
>
> This was failing when CONFIG_DEBUGFS is not defined, thus referring to
> undefined function definition. 
>
> Minor nit below.
>
>>
>> diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
>> index 49ab11a287a3..0fe25e907ea6 100644
>> --- a/arch/powerpc/kernel/eeh.c
>> +++ b/arch/powerpc/kernel/eeh.c
>> @@ -1574,6 +1574,104 @@ static int proc_eeh_show(struct seq_file *m, void *v)
>>  }
>>  #endif /* CONFIG_PROC_FS */
>>  
>> +static int eeh_break_device(struct pci_dev *pdev)
>> +{
>> +	struct resource *bar = NULL;
>> +	void __iomem *mapped;
>> +	u16 old, bit;
>> +	int i, pos;
>> +
>> +	/* Do we have an MMIO BAR to disable? */
>> +	for (i = 0; i <= PCI_STD_RESOURCE_END; i++) {
>> +		struct resource *r = &pdev->resource[i];
>> +
>> +		if (!r->flags || !r->start)
>> +			continue;
>> +		if (r->flags & IORESOURCE_IO)
>> +			continue;
>> +		if (r->flags & IORESOURCE_UNSET)
>> +			continue;
>> +
>> +		bar = r;
>> +		break;
>> +	}
>> +
>> +	if (!bar) {
>> +		pci_err(pdev, "Unable to find Memory BAR to cause EEH with\n");
>> +		return -ENXIO;
>> +	}
>> +
>> +	pci_err(pdev, "Going to break: %pR\n", bar);
>> +
>> +	if (pdev->is_virtfn) {
>> +#ifndef CONFIG_PCI_IOV
>> +		return -ENXIO;
>> +#else
>> +		/*
>> +		 * VFs don't have a per-function COMMAND register, so the best
>> +		 * we can do is clear the Memory Space Enable bit in the PF's
>> +		 * SRIOV control reg.
>> +		 *
>> +		 * Unfortunately, this requires that we have a PF (i.e doesn't
>> +		 * work for a passed-through VF) and it has the potential side
>> +		 * effect of also causing an EEH on every other VF under the
>> +		 * PF. Oh well.
>> +		 */
>> +		pdev = pdev->physfn;
>> +		if (!pdev)
>> +			return -ENXIO; /* passed through VFs have no PF */
>> +
>> +		pos  = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
>> +		pos += PCI_SRIOV_CTRL;
>> +		bit  = PCI_SRIOV_CTRL_MSE;
>> +#endif /* !CONFIG_PCI_IOV */
>> +	} else {
>> +		bit = PCI_COMMAND_MEMORY;
>> +		pos = PCI_COMMAND;
>> +	}
>> +
>> +	/*
>> +	 * Process here is:
>> +	 *
>> +	 * 1. Disable Memory space.
>> +	 *
>> +	 * 2. Perform an MMIO to the device. This should result in an error
>> +	 *    (CA  / UR) being raised by the device which results in an EEH
>> +	 *    PE freeze. Using the in_8() accessor skips the eeh detection hook
>> +	 *    so the freeze hook so the EEH Detection machinery won't be
>> +	 *    triggered here. This is to match the usual behaviour of EEH
>> +	 *    where the HW will asynchronously freeze a PE and it's up to
>> +	 *    the kernel to notice and deal with it.
>> +	 *
>> +	 * 3. Turn Memory space back on. This is more important for VFs
>> +	 *    since recovery will probably fail if we don't. For normal
>> +	 *    the COMMAND register is reset as a part of re-initialising
>> +	 *    the device.
>> +	 *
>> +	 * Breaking stuff is the point so who cares if it's racy ;)
>> +	 */
>> +	pci_read_config_word(pdev, pos, &old);
>> +
>> +	mapped = ioremap(bar->start, PAGE_SIZE);
>> +	if (!mapped) {
>> +		pci_err(pdev, "Unable to map MMIO BAR %pR\n", bar);
>> +		return -ENXIO;
>> +	}
>> +
>> +	pci_write_config_word(pdev, pos, old & ~bit);
>> +	in_8(mapped);
>> +	pci_write_config_word(pdev, pos, old);
>> +
>> +	iounmap(mapped);
>> +
>> +	return 0;
>> +}
>> +
>> +int eeh_pe_inject_mmio_error(struct pci_dev *pdev)
>> +{
>> +	return eeh_break_device(pdev);
>> +}
>> +
>
> Why have an extra eeh_pe_inject_mmio_error() function which only calls
> eeh_break_device()?
>
> Maybe we can rename eeh_break_device() to eeh_mmio_break_device() and use
> this function itself at both call sites?

Fair suggestion,

However we want to keep the method debugfs interface uses
to inject EEH (thats ppc platform agonistic), decoupled from what pseries
uses. Right now to support as initial work VFIO EEH injection on
pseries, we are piggy backing on eeh_debugfs_break_device().

This will change in future as we add more capabilities to pseries EEH
injection and this will change working of eeh_pe_inject_mmio_error()
without impacting the semantics of existing eeh_break_device().

-- 
Cheers
~ Vaibhav
Re: [PATCH] powerpc/pseries/eeh: move pseries_eeh_err_inject() outside CONFIG_DEBUG_FS block
Posted by Ritesh Harjani (IBM) 2 months, 1 week ago
Vaibhav Jain <vaibhav@linux.ibm.com> writes:

> Hi Ritesh,
>
> Thanks for looking into this patch. My responses your review inline
> below:
>
> Ritesh Harjani (IBM) <ritesh.list@gmail.com> writes:
>
>> Narayana Murty N <nnmlinux@linux.ibm.com> writes:
>>
>>> Makes pseries_eeh_err_inject() available even when debugfs
>>> is disabled (CONFIG_DEBUG_FS=n). It moves eeh_debugfs_break_device()
>>> and eeh_pe_inject_mmio_error() out of the CONFIG_DEBUG_FS block
>>> and renames it as eeh_break_device().
>>>
>>> Reported-by: kernel test robot <lkp@intel.com>
>>> Closes: https://lore.kernel.org/oe-kbuild-all/202409170509.VWC6jadC-lkp@intel.com/
>>> Fixes: b0e2b828dfca ("powerpc/pseries/eeh: Fix pseries_eeh_err_inject")
>>> Signed-off-by: Narayana Murty N <nnmlinux@linux.ibm.com>
>>> ---
>>>  arch/powerpc/kernel/eeh.c | 198 +++++++++++++++++++-------------------
>>>  1 file changed, 99 insertions(+), 99 deletions(-)
>>
>> Ok, so in your original patch you implemented eeh_inject ops for pseries
>> using mmio based eeh error injection (eeh_pe_inject_mmio_error()), which
>> uses the functions defined under debugfs -> eeh_debugfs_break_device(). 
>>
>> This was failing when CONFIG_DEBUGFS is not defined, thus referring to
>> undefined function definition. 
>>
>> Minor nit below.
>>
>>>
>>> diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
>>> index 49ab11a287a3..0fe25e907ea6 100644
>>> --- a/arch/powerpc/kernel/eeh.c
>>> +++ b/arch/powerpc/kernel/eeh.c
>>> @@ -1574,6 +1574,104 @@ static int proc_eeh_show(struct seq_file *m, void *v)
>>>  }
>>>  #endif /* CONFIG_PROC_FS */
>>>  
>>> +static int eeh_break_device(struct pci_dev *pdev)
>>> +{
>>> +	struct resource *bar = NULL;
>>> +	void __iomem *mapped;
>>> +	u16 old, bit;
>>> +	int i, pos;
>>> +
>>> +	/* Do we have an MMIO BAR to disable? */
>>> +	for (i = 0; i <= PCI_STD_RESOURCE_END; i++) {
>>> +		struct resource *r = &pdev->resource[i];
>>> +
>>> +		if (!r->flags || !r->start)
>>> +			continue;
>>> +		if (r->flags & IORESOURCE_IO)
>>> +			continue;
>>> +		if (r->flags & IORESOURCE_UNSET)
>>> +			continue;
>>> +
>>> +		bar = r;
>>> +		break;
>>> +	}
>>> +
>>> +	if (!bar) {
>>> +		pci_err(pdev, "Unable to find Memory BAR to cause EEH with\n");
>>> +		return -ENXIO;
>>> +	}
>>> +
>>> +	pci_err(pdev, "Going to break: %pR\n", bar);
>>> +
>>> +	if (pdev->is_virtfn) {
>>> +#ifndef CONFIG_PCI_IOV
>>> +		return -ENXIO;
>>> +#else
>>> +		/*
>>> +		 * VFs don't have a per-function COMMAND register, so the best
>>> +		 * we can do is clear the Memory Space Enable bit in the PF's
>>> +		 * SRIOV control reg.
>>> +		 *
>>> +		 * Unfortunately, this requires that we have a PF (i.e doesn't
>>> +		 * work for a passed-through VF) and it has the potential side
>>> +		 * effect of also causing an EEH on every other VF under the
>>> +		 * PF. Oh well.
>>> +		 */
>>> +		pdev = pdev->physfn;
>>> +		if (!pdev)
>>> +			return -ENXIO; /* passed through VFs have no PF */
>>> +
>>> +		pos  = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
>>> +		pos += PCI_SRIOV_CTRL;
>>> +		bit  = PCI_SRIOV_CTRL_MSE;
>>> +#endif /* !CONFIG_PCI_IOV */
>>> +	} else {
>>> +		bit = PCI_COMMAND_MEMORY;
>>> +		pos = PCI_COMMAND;
>>> +	}
>>> +
>>> +	/*
>>> +	 * Process here is:
>>> +	 *
>>> +	 * 1. Disable Memory space.
>>> +	 *
>>> +	 * 2. Perform an MMIO to the device. This should result in an error
>>> +	 *    (CA  / UR) being raised by the device which results in an EEH
>>> +	 *    PE freeze. Using the in_8() accessor skips the eeh detection hook
>>> +	 *    so the freeze hook so the EEH Detection machinery won't be
>>> +	 *    triggered here. This is to match the usual behaviour of EEH
>>> +	 *    where the HW will asynchronously freeze a PE and it's up to
>>> +	 *    the kernel to notice and deal with it.
>>> +	 *
>>> +	 * 3. Turn Memory space back on. This is more important for VFs
>>> +	 *    since recovery will probably fail if we don't. For normal
>>> +	 *    the COMMAND register is reset as a part of re-initialising
>>> +	 *    the device.
>>> +	 *
>>> +	 * Breaking stuff is the point so who cares if it's racy ;)
>>> +	 */
>>> +	pci_read_config_word(pdev, pos, &old);
>>> +
>>> +	mapped = ioremap(bar->start, PAGE_SIZE);
>>> +	if (!mapped) {
>>> +		pci_err(pdev, "Unable to map MMIO BAR %pR\n", bar);
>>> +		return -ENXIO;
>>> +	}
>>> +
>>> +	pci_write_config_word(pdev, pos, old & ~bit);
>>> +	in_8(mapped);
>>> +	pci_write_config_word(pdev, pos, old);
>>> +
>>> +	iounmap(mapped);
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +int eeh_pe_inject_mmio_error(struct pci_dev *pdev)
>>> +{
>>> +	return eeh_break_device(pdev);
>>> +}
>>> +
>>
>> Why have an extra eeh_pe_inject_mmio_error() function which only calls
>> eeh_break_device()?
>>
>> Maybe we can rename eeh_break_device() to eeh_mmio_break_device() and use
>> this function itself at both call sites?
>
> Fair suggestion,
>
> However we want to keep the method debugfs interface uses
> to inject EEH (thats ppc platform agonistic), decoupled from what pseries
> uses. Right now to support as initial work VFIO EEH injection on
> pseries, we are piggy backing on eeh_debugfs_break_device().

Right. 

>
> This will change in future as we add more capabilities to pseries EEH
> injection and this will change working of eeh_pe_inject_mmio_error()
> without impacting the semantics of existing eeh_break_device().

Thanks Vaibhav for the context. 

The debugfs interface "eeh_break_device()" is defined here in
"arch/powerpc/kernel/eeh.c". Those "future pseries changes" could remain
in arch/powerpc/platforms/pseries/eeh_pseries.c using the generic
functions defined from <>/kernel/eeh.c, right. And today
eeh_pe_inject_mmio_error() has nothing pseries specific anyway.

But I get it that this is a minor compile fix for the patch that has
already landed in 6.12 now. As I said earlier too, this was just a minor
nit.

Maybe we could get rid of this redundant function later when we add
pseries specific capabilities (if we still find this extra function has
no use).

So - 

Please feel free to add - 
Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>