diff mbox series

opal: prd_info: Add resilience to service check

Message ID 1523110237-25133-1-git-send-email-debmc@linux.vnet.ibm.com
State Accepted
Headers show
Series opal: prd_info: Add resilience to service check | expand

Commit Message

Deb McLemore April 7, 2018, 2:10 p.m. UTC
When the opal-prd.service is running and attempt to stop is
performed, ignore the exit status and continue.

Signed-off-by: Deb McLemore <debmc@linux.vnet.ibm.com>
---
 src/opal/prd_info.c | 20 ++++----------------
 1 file changed, 4 insertions(+), 16 deletions(-)

Comments

Vasant Hegde April 7, 2018, 2:23 p.m. UTC | #1
On 04/07/2018 07:40 PM, Deb McLemore wrote:
> When the opal-prd.service is running and attempt to stop is
> performed, ignore the exit status and continue.

Deb,

Can you please explain why do you want to ignore exit status here?
Is there any issues?

-Vasant



> 
> Signed-off-by: Deb McLemore <debmc@linux.vnet.ibm.com>
> ---
>   src/opal/prd_info.c | 20 ++++----------------
>   1 file changed, 4 insertions(+), 16 deletions(-)
> 
> diff --git a/src/opal/prd_info.c b/src/opal/prd_info.c
> index 4082a18..2db9413 100644
> --- a/src/opal/prd_info.c
> +++ b/src/opal/prd_info.c
> @@ -73,7 +73,7 @@ static int prd_dev_query(fwts_framework *fw)
>   
>   static int prd_service_check(fwts_framework *fw, int *restart)
>   {
> -	int rc = FWTS_OK, status = 0, stop_status = 0;
> +	int rc = FWTS_OK, status = 0;
>   	char *command;
>   	char *output = NULL;
>   
> @@ -97,25 +97,13 @@ static int prd_service_check(fwts_framework *fw, int *restart)
>   		goto out;
>   	case 0: /* "running" */
>   		command = "systemctl stop opal-prd.service 2>&1";
> -		stop_status = fwts_exec2(command, &output);
> +		fwts_exec2(command, &output);
>   
>   		if (output)
>   			free(output);
>   
> -		switch (stop_status) {
> -		case 0:
> -                        *restart = 1;
> -                        break;
> -		default:
> -                        fwts_failed(fw, LOG_LEVEL_HIGH, "OPAL PRD Info",
> -                                "Attempt was made to stop the "
> -                                "opal-prd.service but was not "
> -                                "successful. Try to "
> -                                "\"sudo systemctl stop "
> -                                "opal-prd.service\" and retry.");
> -                        rc = FWTS_ERROR;
> -                        goto out;
> -		}
> +		*restart = 1;
> +		break;
>   	default:
>   		break;
>   	}
>
Deborah McLemore April 7, 2018, 3:20 p.m. UTC | #2
We are getting -1 back, what is the expected exit status from systemd
stop ?

Sent from my iPhone

> On Apr 7, 2018, at 9:23 AM, Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
wrote:
>
>> On 04/07/2018 07:40 PM, Deb McLemore wrote:
>> When the opal-prd.service is running and attempt to stop is
>> performed, ignore the exit status and continue.
>
> Deb,
>
> Can you please explain why do you want to ignore exit status here?
> Is there any issues?
>
> -Vasant
>
>
>
>>
>> Signed-off-by: Deb McLemore <debmc@linux.vnet.ibm.com>
>> ---
>>  src/opal/prd_info.c | 20 ++++----------------
>>  1 file changed, 4 insertions(+), 16 deletions(-)
>>
>> diff --git a/src/opal/prd_info.c b/src/opal/prd_info.c
>> index 4082a18..2db9413 100644
>> --- a/src/opal/prd_info.c
>> +++ b/src/opal/prd_info.c
>> @@ -73,7 +73,7 @@ static int prd_dev_query(fwts_framework *fw)
>>
>>  static int prd_service_check(fwts_framework *fw, int *restart)
>>  {
>> -    int rc = FWTS_OK, status = 0, stop_status = 0;
>> +    int rc = FWTS_OK, status = 0;
>>      char *command;
>>      char *output = NULL;
>>
>> @@ -97,25 +97,13 @@ static int prd_service_check(fwts_framework *fw, int
*restart)
>>          goto out;
>>      case 0: /* "running" */
>>          command = "systemctl stop opal-prd.service 2>&1";
>> -        stop_status = fwts_exec2(command, &output);
>> +        fwts_exec2(command, &output);
>>
>>          if (output)
>>              free(output);
>>
>> -        switch (stop_status) {
>> -        case 0:
>> -                        *restart = 1;
>> -                        break;
>> -        default:
>> -                        fwts_failed(fw, LOG_LEVEL_HIGH, "OPAL PRD
Info",
>> -                                "Attempt was made to stop the "
>> -                                "opal-prd.service but was not "
>> -                                "successful. Try to "
>> -                                "\"sudo systemctl stop "
>> -                                "opal-prd.service\" and retry.");
>> -                        rc = FWTS_ERROR;
>> -                        goto out;
>> -        }
>> +        *restart = 1;
>> +        break;
>>      default:
>>          break;
>>      }
>>
>
>
> --
> fwts-devel mailing list
> fwts-devel@lists.ubuntu.com
> Modify settings or unsubscribe at:
https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.ubuntu.com_mailman_listinfo_fwts-2Ddevel&d=DwIGaQ&c=jf_iaSHvJObTbx-siA1ZOg&r=V3KRDPsp3yMosW9R4elWYg&m=Sy-O20yWd_N3piZoJOEzigB1XzmLV4OUCfEyl3ENAcc&s=oPh1ACx1NGTgif-0V5BIQffXXqjymI8QC_bagI2jZsA&e=

>
<html><body><p><font size="2">We are getting -1 back, what is the expected exit status from systemd stop ?<br></font><font size="2"><br></font><font size="2">Sent from my iPhone<br></font><font size="2"><br></font><font size="2">&gt; On Apr 7, 2018, at 9:23 AM, Vasant Hegde &lt;hegdevasant@linux.vnet.ibm.com&gt; wrote:<br></font><font size="2">&gt; <br></font><font size="2">&gt;&gt; On 04/07/2018 07:40 PM, Deb McLemore wrote:<br></font><font size="2">&gt;&gt; When the opal-prd.service is running and attempt to stop is<br></font><font size="2">&gt;&gt; performed, ignore the exit status and continue.<br></font><font size="2">&gt; <br></font><font size="2">&gt; Deb,<br></font><font size="2">&gt; <br></font><font size="2">&gt; Can you please explain why do you want to ignore exit status here?<br></font><font size="2">&gt; Is there any issues?<br></font><font size="2">&gt; <br></font><font size="2">&gt; -Vasant<br></font><font size="2">&gt; <br></font><font size="2">&gt; <br></font><font size="2">&gt; <br></font><font size="2">&gt;&gt; <br></font><font size="2">&gt;&gt; Signed-off-by: Deb McLemore &lt;debmc@linux.vnet.ibm.com&gt;<br></font><font size="2">&gt;&gt; ---<br></font><font size="2">&gt;&gt;  src/opal/prd_info.c | 20 ++++----------------<br></font><font size="2">&gt;&gt;  1 file changed, 4 insertions(+), 16 deletions(-)<br></font><font size="2">&gt;&gt; <br></font><font size="2">&gt;&gt; diff --git a/src/opal/prd_info.c b/src/opal/prd_info.c<br></font><font size="2">&gt;&gt; index 4082a18..2db9413 100644<br></font><font size="2">&gt;&gt; --- a/src/opal/prd_info.c<br></font><font size="2">&gt;&gt; +++ b/src/opal/prd_info.c<br></font><font size="2">&gt;&gt; @@ -73,7 +73,7 @@ static int prd_dev_query(fwts_framework *fw)<br></font><font size="2">&gt;&gt; <br></font><font size="2">&gt;&gt;  static int prd_service_check(fwts_framework *fw, int *restart)<br></font><font size="2">&gt;&gt;  {<br></font><font size="2">&gt;&gt; -    int rc = FWTS_OK, status = 0, stop_status = 0;<br></font><font size="2">&gt;&gt; +    int rc = FWTS_OK, status = 0;<br></font><font size="2">&gt;&gt;      char *command;<br></font><font size="2">&gt;&gt;      char *output = NULL;<br></font><font size="2">&gt;&gt; <br></font><font size="2">&gt;&gt; @@ -97,25 +97,13 @@ static int prd_service_check(fwts_framework *fw, int *restart)<br></font><font size="2">&gt;&gt;          goto out;<br></font><font size="2">&gt;&gt;      case 0: /* &quot;running&quot; */<br></font><font size="2">&gt;&gt;          command = &quot;systemctl stop opal-prd.service 2&gt;&amp;1&quot;;<br></font><font size="2">&gt;&gt; -        stop_status = fwts_exec2(command, &amp;output);<br></font><font size="2">&gt;&gt; +        fwts_exec2(command, &amp;output);<br></font><font size="2">&gt;&gt; <br></font><font size="2">&gt;&gt;          if (output)<br></font><font size="2">&gt;&gt;              free(output);<br></font><font size="2">&gt;&gt; <br></font><font size="2">&gt;&gt; -        switch (stop_status) {<br></font><font size="2">&gt;&gt; -        case 0:<br></font><font size="2">&gt;&gt; -                        *restart = 1;<br></font><font size="2">&gt;&gt; -                        break;<br></font><font size="2">&gt;&gt; -        default:<br></font><font size="2">&gt;&gt; -                        fwts_failed(fw, LOG_LEVEL_HIGH, &quot;OPAL PRD Info&quot;,<br></font><font size="2">&gt;&gt; -                                &quot;Attempt was made to stop the &quot;<br></font><font size="2">&gt;&gt; -                                &quot;opal-prd.service but was not &quot;<br></font><font size="2">&gt;&gt; -                                &quot;successful. Try to &quot;<br></font><font size="2">&gt;&gt; -                                &quot;\&quot;sudo systemctl stop &quot;<br></font><font size="2">&gt;&gt; -                                &quot;opal-prd.service\&quot; and retry.&quot;);<br></font><font size="2">&gt;&gt; -                        rc = FWTS_ERROR;<br></font><font size="2">&gt;&gt; -                        goto out;<br></font><font size="2">&gt;&gt; -        }<br></font><font size="2">&gt;&gt; +        *restart = 1;<br></font><font size="2">&gt;&gt; +        break;<br></font><font size="2">&gt;&gt;      default:<br></font><font size="2">&gt;&gt;          break;<br></font><font size="2">&gt;&gt;      }<br></font><font size="2">&gt;&gt; <br></font><font size="2">&gt; <br></font><font size="2">&gt; <br></font><font size="2">&gt; -- <br></font><font size="2">&gt; fwts-devel mailing list<br></font><font size="2">&gt; fwts-devel@lists.ubuntu.com<br></font><font size="2">&gt; Modify settings or unsubscribe at: <a href="https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.ubuntu.com_mailman_listinfo_fwts-2Ddevel&d=DwIGaQ&c=jf_iaSHvJObTbx-siA1ZOg&r=V3KRDPsp3yMosW9R4elWYg&m=Sy-O20yWd_N3piZoJOEzigB1XzmLV4OUCfEyl3ENAcc&s=oPh1ACx1NGTgif-0V5BIQffXXqjymI8QC_bagI2jZsA&e=">https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.ubuntu.com_mailman_listinfo_fwts-2Ddevel&amp;d=DwIGaQ&amp;c=jf_iaSHvJObTbx-siA1ZOg&amp;r=V3KRDPsp3yMosW9R4elWYg&amp;m=Sy-O20yWd_N3piZoJOEzigB1XzmLV4OUCfEyl3ENAcc&amp;s=oPh1ACx1NGTgif-0V5BIQffXXqjymI8QC_bagI2jZsA&amp;e=</a><br></font><font size="2">&gt; <br></font><BR>
</body></html>
ppaidipe April 7, 2018, 6:17 p.m. UTC | #3
On 2018-04-07 20:50, Deborah McLemore wrote:
> We are getting -1 back, what is the expected exit status from systemd
> stop ?
> 

 From the execution of test what i understand is we are requesting 
start/stop
the service too quickly which made the test fail.

Apr 07 13:11:18 xxxxxxxxxxx systemd[1]: opal-prd.service: Start request 
repeated too quickly.
Apr 07 13:11:18 xxxxxxxxxxx systemd[1]: opal-prd.service: Failed with 
result 'start-limit-hit'.
Apr 07 13:11:18 xxxxxxxxxxx systemd[1]: Failed to start OPAL PRD daemon.

So we need to request start/restart only when it is done with stop, and 
also request for stop
only when the daemon is already started.


Thanks
Pridhiviraj

> Sent from my iPhone
> 
>> On Apr 7, 2018, at 9:23 AM, Vasant Hegde
> <hegdevasant@linux.vnet.ibm.com> wrote:
>> 
>>> On 04/07/2018 07:40 PM, Deb McLemore wrote:
>>> When the opal-prd.service is running and attempt to stop is
>>> performed, ignore the exit status and continue.
>> 
>> Deb,
>> 
>> Can you please explain why do you want to ignore exit status here?
>> Is there any issues?
>> 
>> -Vasant
>> 
>> 
>> 
>>> 
>>> Signed-off-by: Deb McLemore <debmc@linux.vnet.ibm.com>
>>> ---
>>> src/opal/prd_info.c | 20 ++++----------------
>>> 1 file changed, 4 insertions(+), 16 deletions(-)
>>> 
>>> diff --git a/src/opal/prd_info.c b/src/opal/prd_info.c
>>> index 4082a18..2db9413 100644
>>> --- a/src/opal/prd_info.c
>>> +++ b/src/opal/prd_info.c
>>> @@ -73,7 +73,7 @@ static int prd_dev_query(fwts_framework *fw)
>>> 
>>> static int prd_service_check(fwts_framework *fw, int *restart)
>>> {
>>> - int rc = FWTS_OK, status = 0, stop_status = 0;
>>> + int rc = FWTS_OK, status = 0;
>>> char *command;
>>> char *output = NULL;
>>> 
>>> @@ -97,25 +97,13 @@ static int prd_service_check(fwts_framework
> *fw, int *restart)
>>> goto out;
>>> case 0: /* "running" */
>>> command = "systemctl stop opal-prd.service 2>&1";
>>> - stop_status = fwts_exec2(command, &output);
>>> + fwts_exec2(command, &output);
>>> 
>>> if (output)
>>> free(output);
>>> 
>>> - switch (stop_status) {
>>> - case 0:
>>> - *restart = 1;
>>> - break;
>>> - default:
>>> - fwts_failed(fw, LOG_LEVEL_HIGH, "OPAL PRD Info",
>>> - "Attempt was made to stop the "
>>> - "opal-prd.service but was not "
>>> - "successful. Try to "
>>> - ""sudo systemctl stop "
>>> - "opal-prd.service" and retry.");
>>> - rc = FWTS_ERROR;
>>> - goto out;
>>> - }
>>> + *restart = 1;
>>> + break;
>>> default:
>>> break;
>>> }
>>> 
>> 
>> 
>> --
>> fwts-devel mailing list
>> fwts-devel@lists.ubuntu.com
>> Modify settings or unsubscribe at:
> https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.ubuntu.com_mailman_listinfo_fwts-2Ddevel&d=DwIGaQ&c=jf_iaSHvJObTbx-siA1ZOg&r=V3KRDPsp3yMosW9R4elWYg&m=Sy-O20yWd_N3piZoJOEzigB1XzmLV4OUCfEyl3ENAcc&s=oPh1ACx1NGTgif-0V5BIQffXXqjymI8QC_bagI2jZsA&e=
> [1]
>> 
> 
> 
> 
> Links:
> ------
> [1]
> https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.ubuntu.com_mailman_listinfo_fwts-2Ddevel&d=DwIGaQ&c=jf_iaSHvJObTbx-siA1ZOg&r=V3KRDPsp3yMosW9R4elWYg&m=Sy-O20yWd_N3piZoJOEzigB1XzmLV4OUCfEyl3ENAcc&s=oPh1ACx1NGTgif-0V5BIQffXXqjymI8QC_bagI2jZsA&e=
Deborah McLemore April 7, 2018, 6:41 p.m. UTC | #4
<div class="socmaildefaultfont" dir="ltr" style="font-family:Arial, Helvetica, sans-serif;font-size:10.5pt" ><div dir="ltr" >The case I reproduced was manually running the "fwts prd_info" and all it does is a 'systemd status', then if 'running', 'systemd stop'.&nbsp; The 'systemd stop' fails with -1.</div>
<div dir="ltr" >&nbsp;</div>
<div dir="ltr" >It works ok on some levels of Ubuntu and others not, I will do more investigation to see the root differences, but the proposed enhancement</div>
<div dir="ltr" >is a good one to ignore 'systemd stop' exit status since we did get a successful status of 'running' from the 'systemd status' query.</div>
<div dir="ltr" >&nbsp;</div>
<div dir="ltr" >The 'systemd stop' functionally works (the service is stopped), its just the exit status from the 'systemd stop' which is the -1 on some OS's.&nbsp; We should be</div>
<div dir="ltr" >more resilient.&nbsp; We only attempt to 'systemd start' after the test runs if we had determined that we were 'running' and tried the 'systemd stop', so its not so quick, but possibly.</div>
<div dir="ltr" >&nbsp;</div>
<div dir="ltr" >=====================================<br>Deb McLemore<br>IBM OpenPower - IBM Systems<br>(512) 286 9980<br><br>debmc@us.ibm.com<br>debmc@linux.vnet.ibm.com - (plain text)<br>=====================================</div>
<div dir="ltr" >&nbsp;</div>
<div dir="ltr" >&nbsp;</div>
<blockquote data-history-content-modified="1" dir="ltr" style="border-left:solid #aaaaaa 2px; margin-left:5px; padding-left:5px; direction:ltr; margin-right:0px" >----- Original message -----<br>From: ppaidipe &lt;ppaidipe@linux.vnet.ibm.com&gt;<br>To: Deborah McLemore/Austin/IBM@IBMUS<br>Cc: Vasant Hegde &lt;hegdevasant@linux.vnet.ibm.com&gt;, Deb McLemore &lt;debmc@linux.vnet.ibm.com&gt;, fwts-devel@lists.ubuntu.com<br>Subject: Re: [PATCH] opal: prd_info: Add resilience to service check<br>Date: Sat, Apr 7, 2018 1:16 PM<br>&nbsp;
<div><font size="2" face="Default Monospace,Courier New,Courier,monospace" >On 2018-04-07 20:50, Deborah McLemore wrote:<br>&gt; We are getting -1 back, what is the expected exit status from systemd<br>&gt; stop ?<br>&gt;<br><br>&nbsp;From the execution of test what i understand is we are requesting<br>start/stop<br>the service too quickly which made the test fail.<br><br>Apr 07 13:11:18 xxxxxxxxxxx systemd[1]: opal-prd.service: Start request<br>repeated too quickly.<br>Apr 07 13:11:18 xxxxxxxxxxx systemd[1]: opal-prd.service: Failed with<br>result 'start-limit-hit'.<br>Apr 07 13:11:18 xxxxxxxxxxx systemd[1]: Failed to start OPAL PRD daemon.<br><br>So we need to request start/restart only when it is done with stop, and<br>also request for stop<br>only when the daemon is already started.<br><br><br>Thanks<br>Pridhiviraj<br><br>&gt; Sent from my iPhone<br>&gt;<br>&gt;&gt; On Apr 7, 2018, at 9:23 AM, Vasant Hegde<br>&gt; &lt;hegdevasant@linux.vnet.ibm.com&gt; wrote:<br>&gt;&gt;<br>&gt;&gt;&gt; On 04/07/2018 07:40 PM, Deb McLemore wrote:<br>&gt;&gt;&gt; When the opal-prd.service is running and attempt to stop is<br>&gt;&gt;&gt; performed, ignore the exit status and continue.<br>&gt;&gt;<br>&gt;&gt; Deb,<br>&gt;&gt;<br>&gt;&gt; Can you please explain why do you want to ignore exit status here?<br>&gt;&gt; Is there any issues?<br>&gt;&gt;<br>&gt;&gt; -Vasant<br>&gt;&gt;<br>&gt;&gt;<br>&gt;&gt;<br>&gt;&gt;&gt;<br>&gt;&gt;&gt; Signed-off-by: Deb McLemore &lt;debmc@linux.vnet.ibm.com&gt;<br>&gt;&gt;&gt; ---<br>&gt;&gt;&gt; src/opal/prd_info.c | 20 ++++----------------<br>&gt;&gt;&gt; 1 file changed, 4 insertions(+), 16 deletions(-)<br>&gt;&gt;&gt;<br>&gt;&gt;&gt; diff --git a/src/opal/prd_info.c b/src/opal/prd_info.c<br>&gt;&gt;&gt; index 4082a18..2db9413 100644<br>&gt;&gt;&gt; --- a/src/opal/prd_info.c<br>&gt;&gt;&gt; +++ b/src/opal/prd_info.c<br>&gt;&gt;&gt; @@ -73,7 +73,7 @@ static int prd_dev_query(fwts_framework *fw)<br>&gt;&gt;&gt;<br>&gt;&gt;&gt; static int prd_service_check(fwts_framework *fw, int *restart)<br>&gt;&gt;&gt; {<br>&gt;&gt;&gt; - int rc = FWTS_OK, status = 0, stop_status = 0;<br>&gt;&gt;&gt; + int rc = FWTS_OK, status = 0;<br>&gt;&gt;&gt; char *command;<br>&gt;&gt;&gt; char *output = NULL;<br>&gt;&gt;&gt;<br>&gt;&gt;&gt; @@ -97,25 +97,13 @@ static int prd_service_check(fwts_framework<br>&gt; *fw, int *restart)<br>&gt;&gt;&gt; goto out;<br>&gt;&gt;&gt; case 0: /* "running" */<br>&gt;&gt;&gt; command = "systemctl stop opal-prd.service 2&gt;&amp;1";<br>&gt;&gt;&gt; - stop_status = fwts_exec2(command, &amp;output);<br>&gt;&gt;&gt; + fwts_exec2(command, &amp;output);<br>&gt;&gt;&gt;<br>&gt;&gt;&gt; if (output)<br>&gt;&gt;&gt; free(output);<br>&gt;&gt;&gt;<br>&gt;&gt;&gt; - switch (stop_status) {<br>&gt;&gt;&gt; - case 0:<br>&gt;&gt;&gt; - *restart = 1;<br>&gt;&gt;&gt; - break;<br>&gt;&gt;&gt; - default:<br>&gt;&gt;&gt; - fwts_failed(fw, LOG_LEVEL_HIGH, "OPAL PRD Info",<br>&gt;&gt;&gt; - "Attempt was made to stop the "<br>&gt;&gt;&gt; - "opal-prd.service but was not "<br>&gt;&gt;&gt; - "successful. Try to "<br>&gt;&gt;&gt; - ""sudo systemctl stop "<br>&gt;&gt;&gt; - "opal-prd.service" and retry.");<br>&gt;&gt;&gt; - rc = FWTS_ERROR;<br>&gt;&gt;&gt; - goto out;<br>&gt;&gt;&gt; - }<br>&gt;&gt;&gt; + *restart = 1;<br>&gt;&gt;&gt; + break;<br>&gt;&gt;&gt; default:<br>&gt;&gt;&gt; break;<br>&gt;&gt;&gt; }<br>&gt;&gt;&gt;<br>&gt;&gt;<br>&gt;&gt;<br>&gt;&gt; --<br>&gt;&gt; fwts-devel mailing list<br>&gt;&gt; fwts-devel@lists.ubuntu.com<br>&gt;&gt; Modify settings or unsubscribe at:<br>&gt; <a href="https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.ubuntu.com_mailman_listinfo_fwts-2Ddevel&amp;d=DwIGaQ&amp;c=jf_iaSHvJObTbx-siA1ZOg&amp;r=V3KRDPsp3yMosW9R4elWYg&amp;m=Sy-O20yWd_N3piZoJOEzigB1XzmLV4OUCfEyl3ENAcc&amp;s=oPh1ACx1NGTgif-0V5BIQffXXqjymI8QC_bagI2jZsA&amp;e=" target="_blank" >https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.ubuntu.com_mailman_listinfo_fwts-2Ddevel&amp;d=DwIGaQ&amp;c=jf_iaSHvJObTbx-siA1ZOg&amp;r=V3KRDPsp3yMosW9R4elWYg&amp;m=Sy-O20yWd_N3piZoJOEzigB1XzmLV4OUCfEyl3ENAcc&amp;s=oPh1ACx1NGTgif-0V5BIQffXXqjymI8QC_bagI2jZsA&amp;e=</a><br>&gt; [1]<br>&gt;&gt;<br>&gt;<br>&gt;<br>&gt;<br>&gt; Links:<br>&gt; ------<br>&gt; [1]<br>&gt; <a href="https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.ubuntu.com_mailman_listinfo_fwts-2Ddevel&amp;d=DwIGaQ&amp;c=jf_iaSHvJObTbx-siA1ZOg&amp;r=V3KRDPsp3yMosW9R4elWYg&amp;m=Sy-O20yWd_N3piZoJOEzigB1XzmLV4OUCfEyl3ENAcc&amp;s=oPh1ACx1NGTgif-0V5BIQffXXqjymI8QC_bagI2jZsA&amp;e=" target="_blank" >https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.ubuntu.com_mailman_listinfo_fwts-2Ddevel&amp;d=DwIGaQ&amp;c=jf_iaSHvJObTbx-siA1ZOg&amp;r=V3KRDPsp3yMosW9R4elWYg&amp;m=Sy-O20yWd_N3piZoJOEzigB1XzmLV4OUCfEyl3ENAcc&amp;s=oPh1ACx1NGTgif-0V5BIQffXXqjymI8QC_bagI2jZsA&amp;e=</a></font></div></blockquote>
<div dir="ltr" >&nbsp;</div></div><BR>
Deb McLemore April 9, 2018, 1:07 p.m. UTC | #5
Just an update on this, narrowing this down to the Host OS (Ubuntu 16.04)

has different levels of opal-prd daemon.  So far it seems that some

changes to the fwts_pipe_readwrite does not return some socket info that it use to

and so maybe different paths.  There is a fix we can do to properly

only look at the return code from the child exit process (fwts_pipe_close2) on the case

where there is no socket data coming back on the systemctl stop command and not the

output buffer of the socket handling, but really need to look deeper to

see the underlying issue more clearly, but I wanted to update the mailing

list.


$ opal-prd --version
opal-prd opal-prd-5.1.13


$ opal-prd --version
opal-prd opal-prd-5.4.3


On 04/07/2018 01:41 PM, Deborah McLemore wrote:
> The case I reproduced was manually running the "fwts prd_info" and all it does 
> is a 'systemd status', then if 'running', 'systemd stop'.  The 'systemd stop' 
> fails with -1.
> It works ok on some levels of Ubuntu and others not, I will do more 
> investigation to see the root differences, but the proposed enhancement
> is a good one to ignore 'systemd stop' exit status since we did get a successful 
> status of 'running' from the 'systemd status' query.
> The 'systemd stop' functionally works (the service is stopped), its just the 
> exit status from the 'systemd stop' which is the -1 on some OS's.  We should be
> more resilient.  We only attempt to 'systemd start' after the test runs if we 
> had determined that we were 'running' and tried the 'systemd stop', so its not 
> so quick, but possibly.
> =====================================
> Deb McLemore
> IBM OpenPower - IBM Systems
> (512) 286 9980
>
> debmc@us.ibm.com
> debmc@linux.vnet.ibm.com - (plain text)
> =====================================
>
>     ----- Original message -----
>     From: ppaidipe <ppaidipe@linux.vnet.ibm.com>
>     To: Deborah McLemore/Austin/IBM@IBMUS
>     Cc: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>, Deb McLemore
>     <debmc@linux.vnet.ibm.com>, fwts-devel@lists.ubuntu.com
>     Subject: Re: [PATCH] opal: prd_info: Add resilience to service check
>     Date: Sat, Apr 7, 2018 1:16 PM
>     On 2018-04-07 20:50, Deborah McLemore wrote:
>      > We are getting -1 back, what is the expected exit status from systemd
>      > stop ?
>      >
>
>       From the execution of test what i understand is we are requesting
>     start/stop
>     the service too quickly which made the test fail.
>
>     Apr 07 13:11:18 xxxxxxxxxxx systemd[1]: opal-prd.service: Start request
>     repeated too quickly.
>     Apr 07 13:11:18 xxxxxxxxxxx systemd[1]: opal-prd.service: Failed with
>     result 'start-limit-hit'.
>     Apr 07 13:11:18 xxxxxxxxxxx systemd[1]: Failed to start OPAL PRD daemon.
>
>     So we need to request start/restart only when it is done with stop, and
>     also request for stop
>     only when the daemon is already started.
>
>
>     Thanks
>     Pridhiviraj
>
>      > Sent from my iPhone
>      >
>      >> On Apr 7, 2018, at 9:23 AM, Vasant Hegde
>      > <hegdevasant@linux.vnet.ibm.com> wrote:
>      >>
>      >>> On 04/07/2018 07:40 PM, Deb McLemore wrote:
>      >>> When the opal-prd.service is running and attempt to stop is
>      >>> performed, ignore the exit status and continue.
>      >>
>      >> Deb,
>      >>
>      >> Can you please explain why do you want to ignore exit status here?
>      >> Is there any issues?
>      >>
>      >> -Vasant
>      >>
>      >>
>      >>
>      >>>
>      >>> Signed-off-by: Deb McLemore <debmc@linux.vnet.ibm.com>
>      >>> ---
>      >>> src/opal/prd_info.c | 20 ++++----------------
>      >>> 1 file changed, 4 insertions(+), 16 deletions(-)
>      >>>
>      >>> diff --git a/src/opal/prd_info.c b/src/opal/prd_info.c
>      >>> index 4082a18..2db9413 100644
>      >>> --- a/src/opal/prd_info.c
>      >>> +++ b/src/opal/prd_info.c
>      >>> @@ -73,7 +73,7 @@ static int prd_dev_query(fwts_framework *fw)
>      >>>
>      >>> static int prd_service_check(fwts_framework *fw, int *restart)
>      >>> {
>      >>> - int rc = FWTS_OK, status = 0, stop_status = 0;
>      >>> + int rc = FWTS_OK, status = 0;
>      >>> char *command;
>      >>> char *output = NULL;
>      >>>
>      >>> @@ -97,25 +97,13 @@ static int prd_service_check(fwts_framework
>      > *fw, int *restart)
>      >>> goto out;
>      >>> case 0: /* "running" */
>      >>> command = "systemctl stop opal-prd.service 2>&1";
>      >>> - stop_status = fwts_exec2(command, &output);
>      >>> + fwts_exec2(command, &output);
>      >>>
>      >>> if (output)
>      >>> free(output);
>      >>>
>      >>> - switch (stop_status) {
>      >>> - case 0:
>      >>> - *restart = 1;
>      >>> - break;
>      >>> - default:
>      >>> - fwts_failed(fw, LOG_LEVEL_HIGH, "OPAL PRD Info",
>      >>> - "Attempt was made to stop the "
>      >>> - "opal-prd.service but was not "
>      >>> - "successful. Try to "
>      >>> - ""sudo systemctl stop "
>      >>> - "opal-prd.service" and retry.");
>      >>> - rc = FWTS_ERROR;
>      >>> - goto out;
>      >>> - }
>      >>> + *restart = 1;
>      >>> + break;
>      >>> default:
>      >>> break;
>      >>> }
>      >>>
>      >>
>      >>
>      >> --
>      >> fwts-devel mailing list
>      >> fwts-devel@lists.ubuntu.com
>      >> Modify settings or unsubscribe at:
>      >
>     https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.ubuntu.com_mailman_listinfo_fwts-2Ddevel&d=DwIGaQ&c=jf_iaSHvJObTbx-siA1ZOg&r=V3KRDPsp3yMosW9R4elWYg&m=Sy-O20yWd_N3piZoJOEzigB1XzmLV4OUCfEyl3ENAcc&s=oPh1ACx1NGTgif-0V5BIQffXXqjymI8QC_bagI2jZsA&e=
>      > [1]
>      >>
>      >
>      >
>      >
>      > Links:
>      > ------
>      > [1]
>      >
>     https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.ubuntu.com_mailman_listinfo_fwts-2Ddevel&d=DwIGaQ&c=jf_iaSHvJObTbx-siA1ZOg&r=V3KRDPsp3yMosW9R4elWYg&m=Sy-O20yWd_N3piZoJOEzigB1XzmLV4OUCfEyl3ENAcc&s=oPh1ACx1NGTgif-0V5BIQffXXqjymI8QC_bagI2jZsA&e=
>
>
Deb McLemore April 9, 2018, 1:28 p.m. UTC | #6
Output from each system:


~/fwts$ opal-prd --version
opal-prd opal-prd-5.1.13

~/fwts$ sudo systemctl stop opal-prd.service
Warning: Stopping opal-prd.service, but it can still be activated by:
  opal-prd.socket


~/fwts$ opal-prd --version
opal-prd opal-prd-5.4.3

~/fwts$ sudo systemctl stop opal-prd.service

On 04/09/2018 08:07 AM, Deb McLemore wrote:

> Just an update on this, narrowing this down to the Host OS (Ubuntu 16.04)
>
> has different levels of opal-prd daemon.  So far it seems that some
>
> changes to the fwts_pipe_readwrite does not return some socket info that it use to
>
> and so maybe different paths.  There is a fix we can do to properly
>
> only look at the return code from the child exit process (fwts_pipe_close2) on the case
>
> where there is no socket data coming back on the systemctl stop command and not the
>
> output buffer of the socket handling, but really need to look deeper to
>
> see the underlying issue more clearly, but I wanted to update the mailing
>
> list.
>
>
> $ opal-prd --version
> opal-prd opal-prd-5.1.13
>
>
> $ opal-prd --version
> opal-prd opal-prd-5.4.3
>
>
> On 04/07/2018 01:41 PM, Deborah McLemore wrote:
>> The case I reproduced was manually running the "fwts prd_info" and all it does 
>> is a 'systemd status', then if 'running', 'systemd stop'.  The 'systemd stop' 
>> fails with -1.
>> It works ok on some levels of Ubuntu and others not, I will do more 
>> investigation to see the root differences, but the proposed enhancement
>> is a good one to ignore 'systemd stop' exit status since we did get a successful 
>> status of 'running' from the 'systemd status' query.
>> The 'systemd stop' functionally works (the service is stopped), its just the 
>> exit status from the 'systemd stop' which is the -1 on some OS's.  We should be
>> more resilient.  We only attempt to 'systemd start' after the test runs if we 
>> had determined that we were 'running' and tried the 'systemd stop', so its not 
>> so quick, but possibly.
>> =====================================
>> Deb McLemore
>> IBM OpenPower - IBM Systems
>> (512) 286 9980
>>
>> debmc@us.ibm.com
>> debmc@linux.vnet.ibm.com - (plain text)
>> =====================================
>>
>>     ----- Original message -----
>>     From: ppaidipe <ppaidipe@linux.vnet.ibm.com>
>>     To: Deborah McLemore/Austin/IBM@IBMUS
>>     Cc: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>, Deb McLemore
>>     <debmc@linux.vnet.ibm.com>, fwts-devel@lists.ubuntu.com
>>     Subject: Re: [PATCH] opal: prd_info: Add resilience to service check
>>     Date: Sat, Apr 7, 2018 1:16 PM
>>     On 2018-04-07 20:50, Deborah McLemore wrote:
>>      > We are getting -1 back, what is the expected exit status from systemd
>>      > stop ?
>>      >
>>
>>       From the execution of test what i understand is we are requesting
>>     start/stop
>>     the service too quickly which made the test fail.
>>
>>     Apr 07 13:11:18 xxxxxxxxxxx systemd[1]: opal-prd.service: Start request
>>     repeated too quickly.
>>     Apr 07 13:11:18 xxxxxxxxxxx systemd[1]: opal-prd.service: Failed with
>>     result 'start-limit-hit'.
>>     Apr 07 13:11:18 xxxxxxxxxxx systemd[1]: Failed to start OPAL PRD daemon.
>>
>>     So we need to request start/restart only when it is done with stop, and
>>     also request for stop
>>     only when the daemon is already started.
>>
>>
>>     Thanks
>>     Pridhiviraj
>>
>>      > Sent from my iPhone
>>      >
>>      >> On Apr 7, 2018, at 9:23 AM, Vasant Hegde
>>      > <hegdevasant@linux.vnet.ibm.com> wrote:
>>      >>
>>      >>> On 04/07/2018 07:40 PM, Deb McLemore wrote:
>>      >>> When the opal-prd.service is running and attempt to stop is
>>      >>> performed, ignore the exit status and continue.
>>      >>
>>      >> Deb,
>>      >>
>>      >> Can you please explain why do you want to ignore exit status here?
>>      >> Is there any issues?
>>      >>
>>      >> -Vasant
>>      >>
>>      >>
>>      >>
>>      >>>
>>      >>> Signed-off-by: Deb McLemore <debmc@linux.vnet.ibm.com>
>>      >>> ---
>>      >>> src/opal/prd_info.c | 20 ++++----------------
>>      >>> 1 file changed, 4 insertions(+), 16 deletions(-)
>>      >>>
>>      >>> diff --git a/src/opal/prd_info.c b/src/opal/prd_info.c
>>      >>> index 4082a18..2db9413 100644
>>      >>> --- a/src/opal/prd_info.c
>>      >>> +++ b/src/opal/prd_info.c
>>      >>> @@ -73,7 +73,7 @@ static int prd_dev_query(fwts_framework *fw)
>>      >>>
>>      >>> static int prd_service_check(fwts_framework *fw, int *restart)
>>      >>> {
>>      >>> - int rc = FWTS_OK, status = 0, stop_status = 0;
>>      >>> + int rc = FWTS_OK, status = 0;
>>      >>> char *command;
>>      >>> char *output = NULL;
>>      >>>
>>      >>> @@ -97,25 +97,13 @@ static int prd_service_check(fwts_framework
>>      > *fw, int *restart)
>>      >>> goto out;
>>      >>> case 0: /* "running" */
>>      >>> command = "systemctl stop opal-prd.service 2>&1";
>>      >>> - stop_status = fwts_exec2(command, &output);
>>      >>> + fwts_exec2(command, &output);
>>      >>>
>>      >>> if (output)
>>      >>> free(output);
>>      >>>
>>      >>> - switch (stop_status) {
>>      >>> - case 0:
>>      >>> - *restart = 1;
>>      >>> - break;
>>      >>> - default:
>>      >>> - fwts_failed(fw, LOG_LEVEL_HIGH, "OPAL PRD Info",
>>      >>> - "Attempt was made to stop the "
>>      >>> - "opal-prd.service but was not "
>>      >>> - "successful. Try to "
>>      >>> - ""sudo systemctl stop "
>>      >>> - "opal-prd.service" and retry.");
>>      >>> - rc = FWTS_ERROR;
>>      >>> - goto out;
>>      >>> - }
>>      >>> + *restart = 1;
>>      >>> + break;
>>      >>> default:
>>      >>> break;
>>      >>> }
>>      >>>
>>      >>
>>      >>
>>      >> --
>>      >> fwts-devel mailing list
>>      >> fwts-devel@lists.ubuntu.com
>>      >> Modify settings or unsubscribe at:
>>      >
>>     https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.ubuntu.com_mailman_listinfo_fwts-2Ddevel&d=DwIGaQ&c=jf_iaSHvJObTbx-siA1ZOg&r=V3KRDPsp3yMosW9R4elWYg&m=Sy-O20yWd_N3piZoJOEzigB1XzmLV4OUCfEyl3ENAcc&s=oPh1ACx1NGTgif-0V5BIQffXXqjymI8QC_bagI2jZsA&e=
>>      > [1]
>>      >>
>>      >
>>      >
>>      >
>>      > Links:
>>      > ------
>>      > [1]
>>      >
>>     https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.ubuntu.com_mailman_listinfo_fwts-2Ddevel&d=DwIGaQ&c=jf_iaSHvJObTbx-siA1ZOg&r=V3KRDPsp3yMosW9R4elWYg&m=Sy-O20yWd_N3piZoJOEzigB1XzmLV4OUCfEyl3ENAcc&s=oPh1ACx1NGTgif-0V5BIQffXXqjymI8QC_bagI2jZsA&e=
>>
>>
Alex Hung May 2, 2018, 7:38 p.m. UTC | #7
On Mon, Apr 9, 2018 at 6:07 AM, Deb McLemore <debmc@linux.vnet.ibm.com> wrote:
> Just an update on this, narrowing this down to the Host OS (Ubuntu 16.04)
>
> has different levels of opal-prd daemon.  So far it seems that some
>
> changes to the fwts_pipe_readwrite does not return some socket info that it use to
>
> and so maybe different paths.  There is a fix we can do to properly
>
> only look at the return code from the child exit process (fwts_pipe_close2) on the case
>
> where there is no socket data coming back on the systemctl stop command and not the
>
> output buffer of the socket handling, but really need to look deeper to
>
> see the underlying issue more clearly, but I wanted to update the mailing
>
> list.

Hi Deb,

Are we expecting an updated patch for this or do you think this patch
is in a good shape?

There was no FWTS 18.04.00 but there will be 18.05.00 in two weeks
(hopefully). If everybody agrees, this should be included in 18.05.00.

>
>
> $ opal-prd --version
> opal-prd opal-prd-5.1.13
>
>
> $ opal-prd --version
> opal-prd opal-prd-5.4.3
>
>
> On 04/07/2018 01:41 PM, Deborah McLemore wrote:
>> The case I reproduced was manually running the "fwts prd_info" and all it does
>> is a 'systemd status', then if 'running', 'systemd stop'.  The 'systemd stop'
>> fails with -1.
>> It works ok on some levels of Ubuntu and others not, I will do more
>> investigation to see the root differences, but the proposed enhancement
>> is a good one to ignore 'systemd stop' exit status since we did get a successful
>> status of 'running' from the 'systemd status' query.
>> The 'systemd stop' functionally works (the service is stopped), its just the
>> exit status from the 'systemd stop' which is the -1 on some OS's.  We should be
>> more resilient.  We only attempt to 'systemd start' after the test runs if we
>> had determined that we were 'running' and tried the 'systemd stop', so its not
>> so quick, but possibly.
>> =====================================
>> Deb McLemore
>> IBM OpenPower - IBM Systems
>> (512) 286 9980
>>
>> debmc@us.ibm.com
>> debmc@linux.vnet.ibm.com - (plain text)
>> =====================================
>>
>>     ----- Original message -----
>>     From: ppaidipe <ppaidipe@linux.vnet.ibm.com>
>>     To: Deborah McLemore/Austin/IBM@IBMUS
>>     Cc: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>, Deb McLemore
>>     <debmc@linux.vnet.ibm.com>, fwts-devel@lists.ubuntu.com
>>     Subject: Re: [PATCH] opal: prd_info: Add resilience to service check
>>     Date: Sat, Apr 7, 2018 1:16 PM
>>     On 2018-04-07 20:50, Deborah McLemore wrote:
>>      > We are getting -1 back, what is the expected exit status from systemd
>>      > stop ?
>>      >
>>
>>       From the execution of test what i understand is we are requesting
>>     start/stop
>>     the service too quickly which made the test fail.
>>
>>     Apr 07 13:11:18 xxxxxxxxxxx systemd[1]: opal-prd.service: Start request
>>     repeated too quickly.
>>     Apr 07 13:11:18 xxxxxxxxxxx systemd[1]: opal-prd.service: Failed with
>>     result 'start-limit-hit'.
>>     Apr 07 13:11:18 xxxxxxxxxxx systemd[1]: Failed to start OPAL PRD daemon.
>>
>>     So we need to request start/restart only when it is done with stop, and
>>     also request for stop
>>     only when the daemon is already started.
>>
>>
>>     Thanks
>>     Pridhiviraj
>>
>>      > Sent from my iPhone
>>      >
>>      >> On Apr 7, 2018, at 9:23 AM, Vasant Hegde
>>      > <hegdevasant@linux.vnet.ibm.com> wrote:
>>      >>
>>      >>> On 04/07/2018 07:40 PM, Deb McLemore wrote:
>>      >>> When the opal-prd.service is running and attempt to stop is
>>      >>> performed, ignore the exit status and continue.
>>      >>
>>      >> Deb,
>>      >>
>>      >> Can you please explain why do you want to ignore exit status here?
>>      >> Is there any issues?
>>      >>
>>      >> -Vasant
>>      >>
>>      >>
>>      >>
>>      >>>
>>      >>> Signed-off-by: Deb McLemore <debmc@linux.vnet.ibm.com>
>>      >>> ---
>>      >>> src/opal/prd_info.c | 20 ++++----------------
>>      >>> 1 file changed, 4 insertions(+), 16 deletions(-)
>>      >>>
>>      >>> diff --git a/src/opal/prd_info.c b/src/opal/prd_info.c
>>      >>> index 4082a18..2db9413 100644
>>      >>> --- a/src/opal/prd_info.c
>>      >>> +++ b/src/opal/prd_info.c
>>      >>> @@ -73,7 +73,7 @@ static int prd_dev_query(fwts_framework *fw)
>>      >>>
>>      >>> static int prd_service_check(fwts_framework *fw, int *restart)
>>      >>> {
>>      >>> - int rc = FWTS_OK, status = 0, stop_status = 0;
>>      >>> + int rc = FWTS_OK, status = 0;
>>      >>> char *command;
>>      >>> char *output = NULL;
>>      >>>
>>      >>> @@ -97,25 +97,13 @@ static int prd_service_check(fwts_framework
>>      > *fw, int *restart)
>>      >>> goto out;
>>      >>> case 0: /* "running" */
>>      >>> command = "systemctl stop opal-prd.service 2>&1";
>>      >>> - stop_status = fwts_exec2(command, &output);
>>      >>> + fwts_exec2(command, &output);
>>      >>>
>>      >>> if (output)
>>      >>> free(output);
>>      >>>
>>      >>> - switch (stop_status) {
>>      >>> - case 0:
>>      >>> - *restart = 1;
>>      >>> - break;
>>      >>> - default:
>>      >>> - fwts_failed(fw, LOG_LEVEL_HIGH, "OPAL PRD Info",
>>      >>> - "Attempt was made to stop the "
>>      >>> - "opal-prd.service but was not "
>>      >>> - "successful. Try to "
>>      >>> - ""sudo systemctl stop "
>>      >>> - "opal-prd.service" and retry.");
>>      >>> - rc = FWTS_ERROR;
>>      >>> - goto out;
>>      >>> - }
>>      >>> + *restart = 1;
>>      >>> + break;
>>      >>> default:
>>      >>> break;
>>      >>> }
>>      >>>
>>      >>
>>      >>
>>      >> --
>>      >> fwts-devel mailing list
>>      >> fwts-devel@lists.ubuntu.com
>>      >> Modify settings or unsubscribe at:
>>      >
>>     https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.ubuntu.com_mailman_listinfo_fwts-2Ddevel&d=DwIGaQ&c=jf_iaSHvJObTbx-siA1ZOg&r=V3KRDPsp3yMosW9R4elWYg&m=Sy-O20yWd_N3piZoJOEzigB1XzmLV4OUCfEyl3ENAcc&s=oPh1ACx1NGTgif-0V5BIQffXXqjymI8QC_bagI2jZsA&e=
>>      > [1]
>>      >>
>>      >
>>      >
>>      >
>>      > Links:
>>      > ------
>>      > [1]
>>      >
>>     https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.ubuntu.com_mailman_listinfo_fwts-2Ddevel&d=DwIGaQ&c=jf_iaSHvJObTbx-siA1ZOg&r=V3KRDPsp3yMosW9R4elWYg&m=Sy-O20yWd_N3piZoJOEzigB1XzmLV4OUCfEyl3ENAcc&s=oPh1ACx1NGTgif-0V5BIQffXXqjymI8QC_bagI2jZsA&e=
>>
>>
>
>
> --
> fwts-devel mailing list
> fwts-devel@lists.ubuntu.com
> Modify settings or unsubscribe at: https://lists.ubuntu.com/mailman/listinfo/fwts-devel
Deb McLemore May 2, 2018, 9:28 p.m. UTC | #8
Hi Alex, the patch is good, there was the fwts_pipeio regression patch

which fixed the issue which surfaced this, but I think the resilience

is good anyway.

https://lists.ubuntu.com/archives/fwts-devel/2018-April/010348.html


On 05/02/2018 02:38 PM, Alex Hung wrote:
> On Mon, Apr 9, 2018 at 6:07 AM, Deb McLemore <debmc@linux.vnet.ibm.com> wrote:
>> Just an update on this, narrowing this down to the Host OS (Ubuntu 16.04)
>>
>> has different levels of opal-prd daemon.  So far it seems that some
>>
>> changes to the fwts_pipe_readwrite does not return some socket info that it use to
>>
>> and so maybe different paths.  There is a fix we can do to properly
>>
>> only look at the return code from the child exit process (fwts_pipe_close2) on the case
>>
>> where there is no socket data coming back on the systemctl stop command and not the
>>
>> output buffer of the socket handling, but really need to look deeper to
>>
>> see the underlying issue more clearly, but I wanted to update the mailing
>>
>> list.
> Hi Deb,
>
> Are we expecting an updated patch for this or do you think this patch
> is in a good shape?
>
> There was no FWTS 18.04.00 but there will be 18.05.00 in two weeks
> (hopefully). If everybody agrees, this should be included in 18.05.00.
>
>>
>> $ opal-prd --version
>> opal-prd opal-prd-5.1.13
>>
>>
>> $ opal-prd --version
>> opal-prd opal-prd-5.4.3
>>
>>
>> On 04/07/2018 01:41 PM, Deborah McLemore wrote:
>>> The case I reproduced was manually running the "fwts prd_info" and all it does
>>> is a 'systemd status', then if 'running', 'systemd stop'.  The 'systemd stop'
>>> fails with -1.
>>> It works ok on some levels of Ubuntu and others not, I will do more
>>> investigation to see the root differences, but the proposed enhancement
>>> is a good one to ignore 'systemd stop' exit status since we did get a successful
>>> status of 'running' from the 'systemd status' query.
>>> The 'systemd stop' functionally works (the service is stopped), its just the
>>> exit status from the 'systemd stop' which is the -1 on some OS's.  We should be
>>> more resilient.  We only attempt to 'systemd start' after the test runs if we
>>> had determined that we were 'running' and tried the 'systemd stop', so its not
>>> so quick, but possibly.
>>> =====================================
>>> Deb McLemore
>>> IBM OpenPower - IBM Systems
>>> (512) 286 9980
>>>
>>> debmc@us.ibm.com
>>> debmc@linux.vnet.ibm.com - (plain text)
>>> =====================================
>>>
>>>     ----- Original message -----
>>>     From: ppaidipe <ppaidipe@linux.vnet.ibm.com>
>>>     To: Deborah McLemore/Austin/IBM@IBMUS
>>>     Cc: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>, Deb McLemore
>>>     <debmc@linux.vnet.ibm.com>, fwts-devel@lists.ubuntu.com
>>>     Subject: Re: [PATCH] opal: prd_info: Add resilience to service check
>>>     Date: Sat, Apr 7, 2018 1:16 PM
>>>     On 2018-04-07 20:50, Deborah McLemore wrote:
>>>      > We are getting -1 back, what is the expected exit status from systemd
>>>      > stop ?
>>>      >
>>>
>>>       From the execution of test what i understand is we are requesting
>>>     start/stop
>>>     the service too quickly which made the test fail.
>>>
>>>     Apr 07 13:11:18 xxxxxxxxxxx systemd[1]: opal-prd.service: Start request
>>>     repeated too quickly.
>>>     Apr 07 13:11:18 xxxxxxxxxxx systemd[1]: opal-prd.service: Failed with
>>>     result 'start-limit-hit'.
>>>     Apr 07 13:11:18 xxxxxxxxxxx systemd[1]: Failed to start OPAL PRD daemon.
>>>
>>>     So we need to request start/restart only when it is done with stop, and
>>>     also request for stop
>>>     only when the daemon is already started.
>>>
>>>
>>>     Thanks
>>>     Pridhiviraj
>>>
>>>      > Sent from my iPhone
>>>      >
>>>      >> On Apr 7, 2018, at 9:23 AM, Vasant Hegde
>>>      > <hegdevasant@linux.vnet.ibm.com> wrote:
>>>      >>
>>>      >>> On 04/07/2018 07:40 PM, Deb McLemore wrote:
>>>      >>> When the opal-prd.service is running and attempt to stop is
>>>      >>> performed, ignore the exit status and continue.
>>>      >>
>>>      >> Deb,
>>>      >>
>>>      >> Can you please explain why do you want to ignore exit status here?
>>>      >> Is there any issues?
>>>      >>
>>>      >> -Vasant
>>>      >>
>>>      >>
>>>      >>
>>>      >>>
>>>      >>> Signed-off-by: Deb McLemore <debmc@linux.vnet.ibm.com>
>>>      >>> ---
>>>      >>> src/opal/prd_info.c | 20 ++++----------------
>>>      >>> 1 file changed, 4 insertions(+), 16 deletions(-)
>>>      >>>
>>>      >>> diff --git a/src/opal/prd_info.c b/src/opal/prd_info.c
>>>      >>> index 4082a18..2db9413 100644
>>>      >>> --- a/src/opal/prd_info.c
>>>      >>> +++ b/src/opal/prd_info.c
>>>      >>> @@ -73,7 +73,7 @@ static int prd_dev_query(fwts_framework *fw)
>>>      >>>
>>>      >>> static int prd_service_check(fwts_framework *fw, int *restart)
>>>      >>> {
>>>      >>> - int rc = FWTS_OK, status = 0, stop_status = 0;
>>>      >>> + int rc = FWTS_OK, status = 0;
>>>      >>> char *command;
>>>      >>> char *output = NULL;
>>>      >>>
>>>      >>> @@ -97,25 +97,13 @@ static int prd_service_check(fwts_framework
>>>      > *fw, int *restart)
>>>      >>> goto out;
>>>      >>> case 0: /* "running" */
>>>      >>> command = "systemctl stop opal-prd.service 2>&1";
>>>      >>> - stop_status = fwts_exec2(command, &output);
>>>      >>> + fwts_exec2(command, &output);
>>>      >>>
>>>      >>> if (output)
>>>      >>> free(output);
>>>      >>>
>>>      >>> - switch (stop_status) {
>>>      >>> - case 0:
>>>      >>> - *restart = 1;
>>>      >>> - break;
>>>      >>> - default:
>>>      >>> - fwts_failed(fw, LOG_LEVEL_HIGH, "OPAL PRD Info",
>>>      >>> - "Attempt was made to stop the "
>>>      >>> - "opal-prd.service but was not "
>>>      >>> - "successful. Try to "
>>>      >>> - ""sudo systemctl stop "
>>>      >>> - "opal-prd.service" and retry.");
>>>      >>> - rc = FWTS_ERROR;
>>>      >>> - goto out;
>>>      >>> - }
>>>      >>> + *restart = 1;
>>>      >>> + break;
>>>      >>> default:
>>>      >>> break;
>>>      >>> }
>>>      >>>
>>>      >>
>>>      >>
>>>      >> --
>>>      >> fwts-devel mailing list
>>>      >> fwts-devel@lists.ubuntu.com
>>>      >> Modify settings or unsubscribe at:
>>>      >
>>>     https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.ubuntu.com_mailman_listinfo_fwts-2Ddevel&d=DwIGaQ&c=jf_iaSHvJObTbx-siA1ZOg&r=V3KRDPsp3yMosW9R4elWYg&m=Sy-O20yWd_N3piZoJOEzigB1XzmLV4OUCfEyl3ENAcc&s=oPh1ACx1NGTgif-0V5BIQffXXqjymI8QC_bagI2jZsA&e=
>>>      > [1]
>>>      >>
>>>      >
>>>      >
>>>      >
>>>      > Links:
>>>      > ------
>>>      > [1]
>>>      >
>>>     https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.ubuntu.com_mailman_listinfo_fwts-2Ddevel&d=DwIGaQ&c=jf_iaSHvJObTbx-siA1ZOg&r=V3KRDPsp3yMosW9R4elWYg&m=Sy-O20yWd_N3piZoJOEzigB1XzmLV4OUCfEyl3ENAcc&s=oPh1ACx1NGTgif-0V5BIQffXXqjymI8QC_bagI2jZsA&e=
>>>
>>>
>>
>> --
>> fwts-devel mailing list
>> fwts-devel@lists.ubuntu.com
>> Modify settings or unsubscribe at: https://lists.ubuntu.com/mailman/listinfo/fwts-devel
>
>
Alex Hung May 2, 2018, 9:34 p.m. UTC | #9
On Wed, May 2, 2018 at 2:28 PM, Deb McLemore <debmc@linux.vnet.ibm.com> wrote:
> Hi Alex, the patch is good, there was the fwts_pipeio regression patch
>
> which fixed the issue which surfaced this, but I think the resilience
>
> is good anyway.
>
> https://lists.ubuntu.com/archives/fwts-devel/2018-April/010348.html

Thanks Deb. I will ask other reviewers to check/ack it.

>
>
> On 05/02/2018 02:38 PM, Alex Hung wrote:
>> On Mon, Apr 9, 2018 at 6:07 AM, Deb McLemore <debmc@linux.vnet.ibm.com> wrote:
>>> Just an update on this, narrowing this down to the Host OS (Ubuntu 16.04)
>>>
>>> has different levels of opal-prd daemon.  So far it seems that some
>>>
>>> changes to the fwts_pipe_readwrite does not return some socket info that it use to
>>>
>>> and so maybe different paths.  There is a fix we can do to properly
>>>
>>> only look at the return code from the child exit process (fwts_pipe_close2) on the case
>>>
>>> where there is no socket data coming back on the systemctl stop command and not the
>>>
>>> output buffer of the socket handling, but really need to look deeper to
>>>
>>> see the underlying issue more clearly, but I wanted to update the mailing
>>>
>>> list.
>> Hi Deb,
>>
>> Are we expecting an updated patch for this or do you think this patch
>> is in a good shape?
>>
>> There was no FWTS 18.04.00 but there will be 18.05.00 in two weeks
>> (hopefully). If everybody agrees, this should be included in 18.05.00.
>>
>>>
>>> $ opal-prd --version
>>> opal-prd opal-prd-5.1.13
>>>
>>>
>>> $ opal-prd --version
>>> opal-prd opal-prd-5.4.3
>>>
>>>
>>> On 04/07/2018 01:41 PM, Deborah McLemore wrote:
>>>> The case I reproduced was manually running the "fwts prd_info" and all it does
>>>> is a 'systemd status', then if 'running', 'systemd stop'.  The 'systemd stop'
>>>> fails with -1.
>>>> It works ok on some levels of Ubuntu and others not, I will do more
>>>> investigation to see the root differences, but the proposed enhancement
>>>> is a good one to ignore 'systemd stop' exit status since we did get a successful
>>>> status of 'running' from the 'systemd status' query.
>>>> The 'systemd stop' functionally works (the service is stopped), its just the
>>>> exit status from the 'systemd stop' which is the -1 on some OS's.  We should be
>>>> more resilient.  We only attempt to 'systemd start' after the test runs if we
>>>> had determined that we were 'running' and tried the 'systemd stop', so its not
>>>> so quick, but possibly.
>>>> =====================================
>>>> Deb McLemore
>>>> IBM OpenPower - IBM Systems
>>>> (512) 286 9980
>>>>
>>>> debmc@us.ibm.com
>>>> debmc@linux.vnet.ibm.com - (plain text)
>>>> =====================================
>>>>
>>>>     ----- Original message -----
>>>>     From: ppaidipe <ppaidipe@linux.vnet.ibm.com>
>>>>     To: Deborah McLemore/Austin/IBM@IBMUS
>>>>     Cc: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>, Deb McLemore
>>>>     <debmc@linux.vnet.ibm.com>, fwts-devel@lists.ubuntu.com
>>>>     Subject: Re: [PATCH] opal: prd_info: Add resilience to service check
>>>>     Date: Sat, Apr 7, 2018 1:16 PM
>>>>     On 2018-04-07 20:50, Deborah McLemore wrote:
>>>>      > We are getting -1 back, what is the expected exit status from systemd
>>>>      > stop ?
>>>>      >
>>>>
>>>>       From the execution of test what i understand is we are requesting
>>>>     start/stop
>>>>     the service too quickly which made the test fail.
>>>>
>>>>     Apr 07 13:11:18 xxxxxxxxxxx systemd[1]: opal-prd.service: Start request
>>>>     repeated too quickly.
>>>>     Apr 07 13:11:18 xxxxxxxxxxx systemd[1]: opal-prd.service: Failed with
>>>>     result 'start-limit-hit'.
>>>>     Apr 07 13:11:18 xxxxxxxxxxx systemd[1]: Failed to start OPAL PRD daemon.
>>>>
>>>>     So we need to request start/restart only when it is done with stop, and
>>>>     also request for stop
>>>>     only when the daemon is already started.
>>>>
>>>>
>>>>     Thanks
>>>>     Pridhiviraj
>>>>
>>>>      > Sent from my iPhone
>>>>      >
>>>>      >> On Apr 7, 2018, at 9:23 AM, Vasant Hegde
>>>>      > <hegdevasant@linux.vnet.ibm.com> wrote:
>>>>      >>
>>>>      >>> On 04/07/2018 07:40 PM, Deb McLemore wrote:
>>>>      >>> When the opal-prd.service is running and attempt to stop is
>>>>      >>> performed, ignore the exit status and continue.
>>>>      >>
>>>>      >> Deb,
>>>>      >>
>>>>      >> Can you please explain why do you want to ignore exit status here?
>>>>      >> Is there any issues?
>>>>      >>
>>>>      >> -Vasant
>>>>      >>
>>>>      >>
>>>>      >>
>>>>      >>>
>>>>      >>> Signed-off-by: Deb McLemore <debmc@linux.vnet.ibm.com>
>>>>      >>> ---
>>>>      >>> src/opal/prd_info.c | 20 ++++----------------
>>>>      >>> 1 file changed, 4 insertions(+), 16 deletions(-)
>>>>      >>>
>>>>      >>> diff --git a/src/opal/prd_info.c b/src/opal/prd_info.c
>>>>      >>> index 4082a18..2db9413 100644
>>>>      >>> --- a/src/opal/prd_info.c
>>>>      >>> +++ b/src/opal/prd_info.c
>>>>      >>> @@ -73,7 +73,7 @@ static int prd_dev_query(fwts_framework *fw)
>>>>      >>>
>>>>      >>> static int prd_service_check(fwts_framework *fw, int *restart)
>>>>      >>> {
>>>>      >>> - int rc = FWTS_OK, status = 0, stop_status = 0;
>>>>      >>> + int rc = FWTS_OK, status = 0;
>>>>      >>> char *command;
>>>>      >>> char *output = NULL;
>>>>      >>>
>>>>      >>> @@ -97,25 +97,13 @@ static int prd_service_check(fwts_framework
>>>>      > *fw, int *restart)
>>>>      >>> goto out;
>>>>      >>> case 0: /* "running" */
>>>>      >>> command = "systemctl stop opal-prd.service 2>&1";
>>>>      >>> - stop_status = fwts_exec2(command, &output);
>>>>      >>> + fwts_exec2(command, &output);
>>>>      >>>
>>>>      >>> if (output)
>>>>      >>> free(output);
>>>>      >>>
>>>>      >>> - switch (stop_status) {
>>>>      >>> - case 0:
>>>>      >>> - *restart = 1;
>>>>      >>> - break;
>>>>      >>> - default:
>>>>      >>> - fwts_failed(fw, LOG_LEVEL_HIGH, "OPAL PRD Info",
>>>>      >>> - "Attempt was made to stop the "
>>>>      >>> - "opal-prd.service but was not "
>>>>      >>> - "successful. Try to "
>>>>      >>> - ""sudo systemctl stop "
>>>>      >>> - "opal-prd.service" and retry.");
>>>>      >>> - rc = FWTS_ERROR;
>>>>      >>> - goto out;
>>>>      >>> - }
>>>>      >>> + *restart = 1;
>>>>      >>> + break;
>>>>      >>> default:
>>>>      >>> break;
>>>>      >>> }
>>>>      >>>
>>>>      >>
>>>>      >>
>>>>      >> --
>>>>      >> fwts-devel mailing list
>>>>      >> fwts-devel@lists.ubuntu.com
>>>>      >> Modify settings or unsubscribe at:
>>>>      >
>>>>     https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.ubuntu.com_mailman_listinfo_fwts-2Ddevel&d=DwIGaQ&c=jf_iaSHvJObTbx-siA1ZOg&r=V3KRDPsp3yMosW9R4elWYg&m=Sy-O20yWd_N3piZoJOEzigB1XzmLV4OUCfEyl3ENAcc&s=oPh1ACx1NGTgif-0V5BIQffXXqjymI8QC_bagI2jZsA&e=
>>>>      > [1]
>>>>      >>
>>>>      >
>>>>      >
>>>>      >
>>>>      > Links:
>>>>      > ------
>>>>      > [1]
>>>>      >
>>>>     https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.ubuntu.com_mailman_listinfo_fwts-2Ddevel&d=DwIGaQ&c=jf_iaSHvJObTbx-siA1ZOg&r=V3KRDPsp3yMosW9R4elWYg&m=Sy-O20yWd_N3piZoJOEzigB1XzmLV4OUCfEyl3ENAcc&s=oPh1ACx1NGTgif-0V5BIQffXXqjymI8QC_bagI2jZsA&e=
>>>>
>>>>
>>>
>>> --
>>> fwts-devel mailing list
>>> fwts-devel@lists.ubuntu.com
>>> Modify settings or unsubscribe at: https://lists.ubuntu.com/mailman/listinfo/fwts-devel
>>
>>
>
Alex Hung May 2, 2018, 11:37 p.m. UTC | #10
On 2018-04-07 07:10 AM, Deb McLemore wrote:
> When the opal-prd.service is running and attempt to stop is
> performed, ignore the exit status and continue.
> 
> Signed-off-by: Deb McLemore <debmc@linux.vnet.ibm.com>
> ---
>   src/opal/prd_info.c | 20 ++++----------------
>   1 file changed, 4 insertions(+), 16 deletions(-)
> 
> diff --git a/src/opal/prd_info.c b/src/opal/prd_info.c
> index 4082a18..2db9413 100644
> --- a/src/opal/prd_info.c
> +++ b/src/opal/prd_info.c
> @@ -73,7 +73,7 @@ static int prd_dev_query(fwts_framework *fw)
>   
>   static int prd_service_check(fwts_framework *fw, int *restart)
>   {
> -	int rc = FWTS_OK, status = 0, stop_status = 0;
> +	int rc = FWTS_OK, status = 0;
>   	char *command;
>   	char *output = NULL;
>   
> @@ -97,25 +97,13 @@ static int prd_service_check(fwts_framework *fw, int *restart)
>   		goto out;
>   	case 0: /* "running" */
>   		command = "systemctl stop opal-prd.service 2>&1";
> -		stop_status = fwts_exec2(command, &output);
> +		fwts_exec2(command, &output);
>   
>   		if (output)
>   			free(output);
>   
> -		switch (stop_status) {
> -		case 0:
> -                        *restart = 1;
> -                        break;
> -		default:
> -                        fwts_failed(fw, LOG_LEVEL_HIGH, "OPAL PRD Info",
> -                                "Attempt was made to stop the "
> -                                "opal-prd.service but was not "
> -                                "successful. Try to "
> -                                "\"sudo systemctl stop "
> -                                "opal-prd.service\" and retry.");
> -                        rc = FWTS_ERROR;
> -                        goto out;
> -		}
> +		*restart = 1;
> +		break;
>   	default:
>   		break;
>   	}
> 



Acked-by: Alex Hung <alex.hung@canonical.com>
Ivan Hu May 9, 2018, 6:55 a.m. UTC | #11
On 04/07/2018 10:10 PM, Deb McLemore wrote:
> When the opal-prd.service is running and attempt to stop is
> performed, ignore the exit status and continue.
> 
> Signed-off-by: Deb McLemore <debmc@linux.vnet.ibm.com>
> ---
>   src/opal/prd_info.c | 20 ++++----------------
>   1 file changed, 4 insertions(+), 16 deletions(-)
> 
> diff --git a/src/opal/prd_info.c b/src/opal/prd_info.c
> index 4082a18..2db9413 100644
> --- a/src/opal/prd_info.c
> +++ b/src/opal/prd_info.c
> @@ -73,7 +73,7 @@ static int prd_dev_query(fwts_framework *fw)
>   
>   static int prd_service_check(fwts_framework *fw, int *restart)
>   {
> -	int rc = FWTS_OK, status = 0, stop_status = 0;
> +	int rc = FWTS_OK, status = 0;
>   	char *command;
>   	char *output = NULL;
>   
> @@ -97,25 +97,13 @@ static int prd_service_check(fwts_framework *fw, int *restart)
>   		goto out;
>   	case 0: /* "running" */
>   		command = "systemctl stop opal-prd.service 2>&1";
> -		stop_status = fwts_exec2(command, &output);
> +		fwts_exec2(command, &output);
>   
>   		if (output)
>   			free(output);
>   
> -		switch (stop_status) {
> -		case 0:
> -                        *restart = 1;
> -                        break;
> -		default:
> -                        fwts_failed(fw, LOG_LEVEL_HIGH, "OPAL PRD Info",
> -                                "Attempt was made to stop the "
> -                                "opal-prd.service but was not "
> -                                "successful. Try to "
> -                                "\"sudo systemctl stop "
> -                                "opal-prd.service\" and retry.");
> -                        rc = FWTS_ERROR;
> -                        goto out;
> -		}
> +		*restart = 1;
> +		break;
>   	default:
>   		break;
>   	}
> 

Acked-by: Ivan Hu <ivan.hu@canonical.com>
diff mbox series

Patch

diff --git a/src/opal/prd_info.c b/src/opal/prd_info.c
index 4082a18..2db9413 100644
--- a/src/opal/prd_info.c
+++ b/src/opal/prd_info.c
@@ -73,7 +73,7 @@  static int prd_dev_query(fwts_framework *fw)
 
 static int prd_service_check(fwts_framework *fw, int *restart)
 {
-	int rc = FWTS_OK, status = 0, stop_status = 0;
+	int rc = FWTS_OK, status = 0;
 	char *command;
 	char *output = NULL;
 
@@ -97,25 +97,13 @@  static int prd_service_check(fwts_framework *fw, int *restart)
 		goto out;
 	case 0: /* "running" */
 		command = "systemctl stop opal-prd.service 2>&1";
-		stop_status = fwts_exec2(command, &output);
+		fwts_exec2(command, &output);
 
 		if (output)
 			free(output);
 
-		switch (stop_status) {
-		case 0:
-                        *restart = 1;
-                        break;
-		default:
-                        fwts_failed(fw, LOG_LEVEL_HIGH, "OPAL PRD Info",
-                                "Attempt was made to stop the "
-                                "opal-prd.service but was not "
-                                "successful. Try to "
-                                "\"sudo systemctl stop "
-                                "opal-prd.service\" and retry.");
-                        rc = FWTS_ERROR;
-                        goto out;
-		}
+		*restart = 1;
+		break;
 	default:
 		break;
 	}