diff mbox series

fast-reboot: move pci_reset error handling into fast-reboot code

Message ID 20180202054624.18552-1-npiggin@gmail.com
State Accepted
Headers show
Series fast-reboot: move pci_reset error handling into fast-reboot code | expand

Commit Message

Nicholas Piggin Feb. 2, 2018, 5:46 a.m. UTC
pci_reset() currently does a platform reboot if it fails. It
should not know about fast-reboot at this level, so instead have
it return an error, and the fast reboot caller will do the
platform reboot.

The code essentially does the same thing, but flexibility is
improved. Ideally the fast reboot code should perform pci_reset
and all such fail-able operations before the CPU resets itself
and destroys its own stack. That's not the case now, but that
should be the goal.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 core/fast-reboot.c | 13 ++++++++++++-
 core/pci.c         | 11 +++++------
 include/pci.h      |  2 +-
 3 files changed, 18 insertions(+), 8 deletions(-)

Comments

Vaidyanathan Srinivasan Feb. 2, 2018, 6:05 p.m. UTC | #1
* Nicholas Piggin <npiggin@gmail.com> [2018-02-02 15:46:24]:

> pci_reset() currently does a platform reboot if it fails. It
> should not know about fast-reboot at this level, so instead have
> it return an error, and the fast reboot caller will do the
> platform reboot.
> 
> The code essentially does the same thing, but flexibility is
> improved. Ideally the fast reboot code should perform pci_reset
> and all such fail-able operations before the CPU resets itself
> and destroys its own stack. That's not the case now, but that
> should be the goal.
> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>

Acked-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>

> ---
>  core/fast-reboot.c | 13 ++++++++++++-
>  core/pci.c         | 11 +++++------
>  include/pci.h      |  2 +-
>  3 files changed, 18 insertions(+), 8 deletions(-)
> 
> diff --git a/core/fast-reboot.c b/core/fast-reboot.c
> index 1c76c0891..382e781ae 100644
> --- a/core/fast-reboot.c
> +++ b/core/fast-reboot.c
> @@ -348,7 +348,18 @@ void __noreturn fast_reboot_entry(void)
>  	}
>  
>  	/* Remove all PCI devices */
> -	pci_reset();
> +	if (pci_reset()) {
> +		prlog(PR_NOTICE, "RESET: Fast reboot failed to reset PCI\n");
> +
> +		/*
> +		 * Can't return to caller here because we're past no-return.
> +		 * Attempt an IPL here which is what the caller would do.
> +		 */
> +		if (platform.cec_reboot)
> +			platform.cec_reboot();
> +		for (;;)
> +			;
> +	}

The code change improves error handling.


>  
>  	ipmi_set_fw_progress_sensor(IPMI_FW_PCI_INIT);
>  
> diff --git a/core/pci.c b/core/pci.c
> index 0809521f8..494a33a45 100644
> --- a/core/pci.c
> +++ b/core/pci.c
> @@ -1668,7 +1668,7 @@ static void __pci_reset(struct list_head *list)
>  	}
>  }
>  
> -void pci_reset(void)
> +int64_t pci_reset(void)
>  {
>  	unsigned int i;
>  	struct pci_slot *slot;
> @@ -1695,11 +1695,9 @@ void pci_reset(void)
>  				rc = slot->ops.run_sm(slot);
>  			}
>  			if (rc < 0) {
> -				PCIERR(phb, 0, "Complete reset failed, aborting"
> -				               "fast reboot (rc=%lld)\n", rc);
> -				if (platform.cec_reboot)
> -					platform.cec_reboot();
> -				while (true) {}
> +				PCIERR(phb, 0, "Complete reset failed "
> +				               "(rc=%lld)\n", rc);
> +				return rc;

However...

If we abort at the PCI link that fails and return at this point, we
are giving up too early. If subsequent links can get re-enabled then
fast-reboot can be made to work.

[  141.208697990,7] PHB#0003[0:3]: LINK: Link is stable
[  141.208763671,7] PHB#0003[0:3]: LINK: Card [15b3:1013] Degraded Retry:disabled
[  141.208832155,7] PHB#0003[0:3]: LINK: Speed Train:GEN3 PHB:GEN4 DEV:GEN3
[  141.208872220,7] PHB#0003[0:3]: LINK: Width Train:x08 PHB:x16 DEV:x16 *
[  141.214051722,3] PCI-SLOT-0000000000000003 Invalid state 00000000
[  141.215181558,3] PHB#0003:00:00.0 Complete reset failed (rc=-6)
[  141.216418848,5] RESET: Fast reboot failed to reset PCI
[  141.217638534,6] IPMI: sending chassis control request 0x03
[  141.217676559,6] BT: seq 0x1c netfn 0x00 cmd 0x02: Message sent to host
[  141.482616079,7] MBOX-FLASH: Adjusting the window
[  141.482676812,7] LPC-MBOX: Sending BMC interrupt
[  142.246839786,7] MBOX-FLASH: Adjusting the window
[  142.247414703,7] LPC-MBOX: Sending BMC interrupt
[  142.349862574,6] BT: seq 0x1c netfn 0x00 cmd 0x02: IPMI MSG done
[  143.015709854,6] STB: BOOTKERNEL verified
[  143.015768123,7] LPC-MBOX: Sending BMC interrupt
[  143.118218291,7] blocklevel_read: 0x0        0x31c03b48      0x30
[  143.118283317,7] blocklevel_raw_read: 0x0    0x31c03b48      0x30
[  143.118352801,7] MBOX-FLASH: Adjusting the window
[  143.118382662,7] LPC-MBOX: Sending BMC interrupt
[  143.220835377,7] FFS: Partition map size: 0x1000
[  143.221135902,7] blocklevel_read: 0x0        0x30a3f218      0x1000
[  143.221166425,7] blocklevel_raw_read: 0x0    0x30a3f218      0x1000
[  143.223081918,7] FLASH: No ROOTFS partition

On one of the failing system where one of the PHB link does not train,
I am able to skip by continuing this loop and successfully complete
the fast-reset.

In this specific case there are no devices attached to that PHB link.
We need a way to understand why the link reset fails and skip it if
this is not due to failing hardware and proceed with reset/train of
all other links and complete the loop.

diff --git a/core/pci.c b/core/pci.c
index 0809521f..f708b1a0 100644
--- a/core/pci.c
+++ b/core/pci.c
@@ -1695,11 +1695,8 @@ void pci_reset(void)
                                rc = slot->ops.run_sm(slot);
                        }
                        if (rc < 0) {
-                               PCIERR(phb, 0, "Complete reset failed, aborting"
+                               PCIERR(phb, 0, "Complete reset failed, CONTINUING "
                                               "fast reboot (rc=%lld)\n", rc);
-                               if (platform.cec_reboot)
-                                       platform.cec_reboot();
-                               while (true) {}
                        }

The above hack makes fast-reboot work well on one of the test system.
We need a way to avoid treating all PCI reset fails as fatal and fallback
to full IPL.

I am using fast-reboot for runtime PState (OCC) reload and related
tests and hence making fast-reboot work is less than ideal scenarios
will be very helpful :)

--Vaidy
Nicholas Piggin Feb. 3, 2018, 5:36 a.m. UTC | #2
On Fri, 2 Feb 2018 23:35:05 +0530
Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com> wrote:

> * Nicholas Piggin <npiggin@gmail.com> [2018-02-02 15:46:24]:
> 
> > pci_reset() currently does a platform reboot if it fails. It
> > should not know about fast-reboot at this level, so instead have
> > it return an error, and the fast reboot caller will do the
> > platform reboot.
> > 
> > The code essentially does the same thing, but flexibility is
> > improved. Ideally the fast reboot code should perform pci_reset
> > and all such fail-able operations before the CPU resets itself
> > and destroys its own stack. That's not the case now, but that
> > should be the goal.
> > 
> > Signed-off-by: Nicholas Piggin <npiggin@gmail.com>  
> 
> Acked-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
> 
> > ---
> >  core/fast-reboot.c | 13 ++++++++++++-
> >  core/pci.c         | 11 +++++------
> >  include/pci.h      |  2 +-
> >  3 files changed, 18 insertions(+), 8 deletions(-)
> > 
> > diff --git a/core/fast-reboot.c b/core/fast-reboot.c
> > index 1c76c0891..382e781ae 100644
> > --- a/core/fast-reboot.c
> > +++ b/core/fast-reboot.c
> > @@ -348,7 +348,18 @@ void __noreturn fast_reboot_entry(void)
> >  	}
> >  
> >  	/* Remove all PCI devices */
> > -	pci_reset();
> > +	if (pci_reset()) {
> > +		prlog(PR_NOTICE, "RESET: Fast reboot failed to reset PCI\n");
> > +
> > +		/*
> > +		 * Can't return to caller here because we're past no-return.
> > +		 * Attempt an IPL here which is what the caller would do.
> > +		 */
> > +		if (platform.cec_reboot)
> > +			platform.cec_reboot();
> > +		for (;;)
> > +			;
> > +	}  
> 
> The code change improves error handling.
> 
> 
> >  
> >  	ipmi_set_fw_progress_sensor(IPMI_FW_PCI_INIT);
> >  
> > diff --git a/core/pci.c b/core/pci.c
> > index 0809521f8..494a33a45 100644
> > --- a/core/pci.c
> > +++ b/core/pci.c
> > @@ -1668,7 +1668,7 @@ static void __pci_reset(struct list_head *list)
> >  	}
> >  }
> >  
> > -void pci_reset(void)
> > +int64_t pci_reset(void)
> >  {
> >  	unsigned int i;
> >  	struct pci_slot *slot;
> > @@ -1695,11 +1695,9 @@ void pci_reset(void)
> >  				rc = slot->ops.run_sm(slot);
> >  			}
> >  			if (rc < 0) {
> > -				PCIERR(phb, 0, "Complete reset failed, aborting"
> > -				               "fast reboot (rc=%lld)\n", rc);
> > -				if (platform.cec_reboot)
> > -					platform.cec_reboot();
> > -				while (true) {}
> > +				PCIERR(phb, 0, "Complete reset failed "
> > +				               "(rc=%lld)\n", rc);
> > +				return rc;  
> 
> However...
> 
> If we abort at the PCI link that fails and return at this point, we
> are giving up too early. If subsequent links can get re-enabled then
> fast-reboot can be made to work.
> 
> [  141.208697990,7] PHB#0003[0:3]: LINK: Link is stable
> [  141.208763671,7] PHB#0003[0:3]: LINK: Card [15b3:1013] Degraded Retry:disabled
> [  141.208832155,7] PHB#0003[0:3]: LINK: Speed Train:GEN3 PHB:GEN4 DEV:GEN3
> [  141.208872220,7] PHB#0003[0:3]: LINK: Width Train:x08 PHB:x16 DEV:x16 *
> [  141.214051722,3] PCI-SLOT-0000000000000003 Invalid state 00000000
> [  141.215181558,3] PHB#0003:00:00.0 Complete reset failed (rc=-6)
> [  141.216418848,5] RESET: Fast reboot failed to reset PCI
> [  141.217638534,6] IPMI: sending chassis control request 0x03
> [  141.217676559,6] BT: seq 0x1c netfn 0x00 cmd 0x02: Message sent to host
> [  141.482616079,7] MBOX-FLASH: Adjusting the window
> [  141.482676812,7] LPC-MBOX: Sending BMC interrupt
> [  142.246839786,7] MBOX-FLASH: Adjusting the window
> [  142.247414703,7] LPC-MBOX: Sending BMC interrupt
> [  142.349862574,6] BT: seq 0x1c netfn 0x00 cmd 0x02: IPMI MSG done
> [  143.015709854,6] STB: BOOTKERNEL verified
> [  143.015768123,7] LPC-MBOX: Sending BMC interrupt
> [  143.118218291,7] blocklevel_read: 0x0        0x31c03b48      0x30
> [  143.118283317,7] blocklevel_raw_read: 0x0    0x31c03b48      0x30
> [  143.118352801,7] MBOX-FLASH: Adjusting the window
> [  143.118382662,7] LPC-MBOX: Sending BMC interrupt
> [  143.220835377,7] FFS: Partition map size: 0x1000
> [  143.221135902,7] blocklevel_read: 0x0        0x30a3f218      0x1000
> [  143.221166425,7] blocklevel_raw_read: 0x0    0x30a3f218      0x1000
> [  143.223081918,7] FLASH: No ROOTFS partition
> 
> On one of the failing system where one of the PHB link does not train,
> I am able to skip by continuing this loop and successfully complete
> the fast-reset.
> 
> In this specific case there are no devices attached to that PHB link.
> We need a way to understand why the link reset fails and skip it if
> this is not due to failing hardware and proceed with reset/train of
> all other links and complete the loop.

Yes, we need to get someone to understand what is happening with the
PCI device.

Did you get to the bottom of why we're getting the state machine into
an invalid state there? Presumably that should not happen even if we
are getting errors back from the hardware it should not go into unknown
state?

> 
> diff --git a/core/pci.c b/core/pci.c
> index 0809521f..f708b1a0 100644
> --- a/core/pci.c
> +++ b/core/pci.c
> @@ -1695,11 +1695,8 @@ void pci_reset(void)
>                                 rc = slot->ops.run_sm(slot);
>                         }
>                         if (rc < 0) {
> -                               PCIERR(phb, 0, "Complete reset failed, aborting"
> +                               PCIERR(phb, 0, "Complete reset failed, CONTINUING "
>                                                "fast reboot (rc=%lld)\n", rc);
> -                               if (platform.cec_reboot)
> -                                       platform.cec_reboot();
> -                               while (true) {}
>                         }
> 
> The above hack makes fast-reboot work well on one of the test system.
> We need a way to avoid treating all PCI reset fails as fatal and fallback
> to full IPL.
> 
> I am using fast-reboot for runtime PState (OCC) reload and related
> tests and hence making fast-reboot work is less than ideal scenarios
> will be very helpful :)

Of course for production systems, default always needs to err on the side of
caution and IPL if we hit some condition we don't understand. But I don't
think it would be a problem to add an nvram config as an unsupported option
to ignore as many errors as possible in fast reboot.

Are you just using fast reboot to speed up testing cycles?

Thanks,
Nick
Vaidyanathan Srinivasan Feb. 3, 2018, 6:16 a.m. UTC | #3
* Nicholas Piggin <npiggin@gmail.com> [2018-02-03 15:36:19]:

> On Fri, 2 Feb 2018 23:35:05 +0530
> Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com> wrote:
> 
> > * Nicholas Piggin <npiggin@gmail.com> [2018-02-02 15:46:24]:
> > 
> > > pci_reset() currently does a platform reboot if it fails. It
> > > should not know about fast-reboot at this level, so instead have
> > > it return an error, and the fast reboot caller will do the
> > > platform reboot.
> > > 
> > > The code essentially does the same thing, but flexibility is
> > > improved. Ideally the fast reboot code should perform pci_reset
> > > and all such fail-able operations before the CPU resets itself
> > > and destroys its own stack. That's not the case now, but that
> > > should be the goal.
> > > 
> > > Signed-off-by: Nicholas Piggin <npiggin@gmail.com>  
> > 
> > Acked-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
> > 
> > > ---
> > >  core/fast-reboot.c | 13 ++++++++++++-
> > >  core/pci.c         | 11 +++++------
> > >  include/pci.h      |  2 +-
> > >  3 files changed, 18 insertions(+), 8 deletions(-)
> > > 
> > > diff --git a/core/fast-reboot.c b/core/fast-reboot.c
> > > index 1c76c0891..382e781ae 100644
> > > --- a/core/fast-reboot.c
> > > +++ b/core/fast-reboot.c
> > > @@ -348,7 +348,18 @@ void __noreturn fast_reboot_entry(void)
> > >  	}
> > >  
> > >  	/* Remove all PCI devices */
> > > -	pci_reset();
> > > +	if (pci_reset()) {
> > > +		prlog(PR_NOTICE, "RESET: Fast reboot failed to reset PCI\n");
> > > +
> > > +		/*
> > > +		 * Can't return to caller here because we're past no-return.
> > > +		 * Attempt an IPL here which is what the caller would do.
> > > +		 */
> > > +		if (platform.cec_reboot)
> > > +			platform.cec_reboot();
> > > +		for (;;)
> > > +			;
> > > +	}  
> > 
> > The code change improves error handling.
> > 
> > 
> > >  
> > >  	ipmi_set_fw_progress_sensor(IPMI_FW_PCI_INIT);
> > >  
> > > diff --git a/core/pci.c b/core/pci.c
> > > index 0809521f8..494a33a45 100644
> > > --- a/core/pci.c
> > > +++ b/core/pci.c
> > > @@ -1668,7 +1668,7 @@ static void __pci_reset(struct list_head *list)
> > >  	}
> > >  }
> > >  
> > > -void pci_reset(void)
> > > +int64_t pci_reset(void)
> > >  {
> > >  	unsigned int i;
> > >  	struct pci_slot *slot;
> > > @@ -1695,11 +1695,9 @@ void pci_reset(void)
> > >  				rc = slot->ops.run_sm(slot);
> > >  			}
> > >  			if (rc < 0) {
> > > -				PCIERR(phb, 0, "Complete reset failed, aborting"
> > > -				               "fast reboot (rc=%lld)\n", rc);
> > > -				if (platform.cec_reboot)
> > > -					platform.cec_reboot();
> > > -				while (true) {}
> > > +				PCIERR(phb, 0, "Complete reset failed "
> > > +				               "(rc=%lld)\n", rc);
> > > +				return rc;  
> > 
> > However...
> > 
> > If we abort at the PCI link that fails and return at this point, we
> > are giving up too early. If subsequent links can get re-enabled then
> > fast-reboot can be made to work.
> > 
> > [  141.208697990,7] PHB#0003[0:3]: LINK: Link is stable
> > [  141.208763671,7] PHB#0003[0:3]: LINK: Card [15b3:1013] Degraded Retry:disabled
> > [  141.208832155,7] PHB#0003[0:3]: LINK: Speed Train:GEN3 PHB:GEN4 DEV:GEN3
> > [  141.208872220,7] PHB#0003[0:3]: LINK: Width Train:x08 PHB:x16 DEV:x16 *
> > [  141.214051722,3] PCI-SLOT-0000000000000003 Invalid state 00000000
> > [  141.215181558,3] PHB#0003:00:00.0 Complete reset failed (rc=-6)
> > [  141.216418848,5] RESET: Fast reboot failed to reset PCI
> > [  141.217638534,6] IPMI: sending chassis control request 0x03
> > [  141.217676559,6] BT: seq 0x1c netfn 0x00 cmd 0x02: Message sent to host
> > [  141.482616079,7] MBOX-FLASH: Adjusting the window
> > [  141.482676812,7] LPC-MBOX: Sending BMC interrupt
> > [  142.246839786,7] MBOX-FLASH: Adjusting the window
> > [  142.247414703,7] LPC-MBOX: Sending BMC interrupt
> > [  142.349862574,6] BT: seq 0x1c netfn 0x00 cmd 0x02: IPMI MSG done
> > [  143.015709854,6] STB: BOOTKERNEL verified
> > [  143.015768123,7] LPC-MBOX: Sending BMC interrupt
> > [  143.118218291,7] blocklevel_read: 0x0        0x31c03b48      0x30
> > [  143.118283317,7] blocklevel_raw_read: 0x0    0x31c03b48      0x30
> > [  143.118352801,7] MBOX-FLASH: Adjusting the window
> > [  143.118382662,7] LPC-MBOX: Sending BMC interrupt
> > [  143.220835377,7] FFS: Partition map size: 0x1000
> > [  143.221135902,7] blocklevel_read: 0x0        0x30a3f218      0x1000
> > [  143.221166425,7] blocklevel_raw_read: 0x0    0x30a3f218      0x1000
> > [  143.223081918,7] FLASH: No ROOTFS partition
> > 
> > On one of the failing system where one of the PHB link does not train,
> > I am able to skip by continuing this loop and successfully complete
> > the fast-reset.
> > 
> > In this specific case there are no devices attached to that PHB link.
> > We need a way to understand why the link reset fails and skip it if
> > this is not due to failing hardware and proceed with reset/train of
> > all other links and complete the loop.
> 
> Yes, we need to get someone to understand what is happening with the
> PCI device.
> 
> Did you get to the bottom of why we're getting the state machine into
> an invalid state there? Presumably that should not happen even if we
> are getting errors back from the hardware it should not go into unknown
> state?

Not yet, I have two PHB bus/slot that gets us to error our at
different stages.  I do not yet understand the state machine.  Let me
get so more test runs and try to understand and report.
 
> > 
> > diff --git a/core/pci.c b/core/pci.c
> > index 0809521f..f708b1a0 100644
> > --- a/core/pci.c
> > +++ b/core/pci.c
> > @@ -1695,11 +1695,8 @@ void pci_reset(void)
> >                                 rc = slot->ops.run_sm(slot);
> >                         }
> >                         if (rc < 0) {
> > -                               PCIERR(phb, 0, "Complete reset failed, aborting"
> > +                               PCIERR(phb, 0, "Complete reset failed, CONTINUING "
> >                                                "fast reboot (rc=%lld)\n", rc);
> > -                               if (platform.cec_reboot)
> > -                                       platform.cec_reboot();
> > -                               while (true) {}
> >                         }
> > 
> > The above hack makes fast-reboot work well on one of the test system.
> > We need a way to avoid treating all PCI reset fails as fatal and fallback
> > to full IPL.
> > 
> > I am using fast-reboot for runtime PState (OCC) reload and related
> > tests and hence making fast-reboot work is less than ideal scenarios
> > will be very helpful :)
> 
> Of course for production systems, default always needs to err on the side of
> caution and IPL if we hit some condition we don't understand. But I don't
> think it would be a problem to add an nvram config as an unsupported option
> to ignore as many errors as possible in fast reboot.

Good idea.  We can add a nvram setting to continue-on-error. Or
override the "experimental-fast-reset=feeling-lucky" with something
risky :)

experimental-fast-reset=feeling-adventurous ?

> Are you just using fast reboot to speed up testing cycles?

The use case is as follows: we can provide runtime parameter to OCC
for system test purpose.  OCC needs a soft reboot to make the settings
take effect and init various PState tables.  OPAL+Linux need to
re-read this and this is where fast-reboot helps.

When we fallback to full IPL, OCC is reloaded by hostboot and loses
full context and goes back to power-on settings and hence blows away
the runtime test parameters.  We are indeed building a chain of
soft-reboot at OCC level and then at OPAL+Linux level to test runtime
parameters to OCC and drive those from Linux using tools. 

--Vaidy
Stewart Smith Feb. 9, 2018, 5:07 a.m. UTC | #4
Nicholas Piggin <npiggin@gmail.com> writes:
> pci_reset() currently does a platform reboot if it fails. It
> should not know about fast-reboot at this level, so instead have
> it return an error, and the fast reboot caller will do the
> platform reboot.
>
> The code essentially does the same thing, but flexibility is
> improved. Ideally the fast reboot code should perform pci_reset
> and all such fail-able operations before the CPU resets itself
> and destroys its own stack. That's not the case now, but that
> should be the goal.

I like it!

Merged to master as of d2cc3957abef48f3afdf8ec61d69e87e3446d562

HOpefully we can figure out the remaining weirdness that Vaidy mentioned
and fix it up.
diff mbox series

Patch

diff --git a/core/fast-reboot.c b/core/fast-reboot.c
index 1c76c0891..382e781ae 100644
--- a/core/fast-reboot.c
+++ b/core/fast-reboot.c
@@ -348,7 +348,18 @@  void __noreturn fast_reboot_entry(void)
 	}
 
 	/* Remove all PCI devices */
-	pci_reset();
+	if (pci_reset()) {
+		prlog(PR_NOTICE, "RESET: Fast reboot failed to reset PCI\n");
+
+		/*
+		 * Can't return to caller here because we're past no-return.
+		 * Attempt an IPL here which is what the caller would do.
+		 */
+		if (platform.cec_reboot)
+			platform.cec_reboot();
+		for (;;)
+			;
+	}
 
 	ipmi_set_fw_progress_sensor(IPMI_FW_PCI_INIT);
 
diff --git a/core/pci.c b/core/pci.c
index 0809521f8..494a33a45 100644
--- a/core/pci.c
+++ b/core/pci.c
@@ -1668,7 +1668,7 @@  static void __pci_reset(struct list_head *list)
 	}
 }
 
-void pci_reset(void)
+int64_t pci_reset(void)
 {
 	unsigned int i;
 	struct pci_slot *slot;
@@ -1695,11 +1695,9 @@  void pci_reset(void)
 				rc = slot->ops.run_sm(slot);
 			}
 			if (rc < 0) {
-				PCIERR(phb, 0, "Complete reset failed, aborting"
-				               "fast reboot (rc=%lld)\n", rc);
-				if (platform.cec_reboot)
-					platform.cec_reboot();
-				while (true) {}
+				PCIERR(phb, 0, "Complete reset failed "
+				               "(rc=%lld)\n", rc);
+				return rc;
 			}
 		}
 
@@ -1710,6 +1708,7 @@  void pci_reset(void)
 	/* Re-Initialize all discovered PCI slots */
 	pci_init_slots();
 
+	return 0;
 }
 
 static void pci_do_jobs(void (*fn)(void *))
diff --git a/include/pci.h b/include/pci.h
index c085b6b86..508ebf45f 100644
--- a/include/pci.h
+++ b/include/pci.h
@@ -475,7 +475,7 @@  extern void pci_std_swizzle_irq_map(struct dt_node *dt_node,
 
 /* Initialize all PCI slots */
 extern void pci_init_slots(void);
-extern void pci_reset(void);
+extern int64_t pci_reset(void);
 
 extern void opal_pci_eeh_set_evt(uint64_t phb_id);
 extern void opal_pci_eeh_clear_evt(uint64_t phb_id);