diff mbox series

[ovs-dev,v2] ovs-tcpdump: Fix cleanup mirror failed with twice fatal signals.

Message ID 20240223033727.71989-1-danieldin186@gmail.com
State Changes Requested
Delegated to: Ilya Maximets
Headers show
Series [ovs-dev,v2] ovs-tcpdump: Fix cleanup mirror failed with twice fatal signals. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test fail github build: failed
ovsrobot/intel-ovs-compilation success test: success

Commit Message

Daniel Ding Feb. 23, 2024, 3:37 a.m. UTC
After running ovs-tcpdump and inputs multiple CTRL+C, the program will
raise the following exception.

Error in atexit._run_exitfuncs:
Traceback (most recent call last):
  File "/usr/bin/ovs-tcpdump", line 421, in cleanup_mirror
    ovsdb = OVSDB(db_sock)
  File "/usr/bin/ovs-tcpdump", line 168, in __init__
    OVSDB.wait_for_db_change(self._idl_conn)  # Initial Sync with DB
  File "/usr/bin/ovs-tcpdump", line 155, in wait_for_db_change
    while idl.change_seqno == seq and not idl.run():

The default handler of SIGINT is default_int_handler, so it not
register the signal handler successfully. When received CTRL+C again,
the program be break, and calling hook can't execute completely.

Signed-off-by: Daniel Ding <danieldin186@gmail.com>
---
 python/ovs/fatal_signal.py | 24 +++++++++++++-----------
 utilities/ovs-tcpdump.in   | 38 +++++++++++++++++---------------------
 2 files changed, 30 insertions(+), 32 deletions(-)

Comments

Ilya Maximets March 16, 2024, 1:17 a.m. UTC | #1
On 2/23/24 04:37, Daniel Ding wrote:
> After running ovs-tcpdump and inputs multiple CTRL+C, the program will
> raise the following exception.
> 
> Error in atexit._run_exitfuncs:
> Traceback (most recent call last):
>   File "/usr/bin/ovs-tcpdump", line 421, in cleanup_mirror
>     ovsdb = OVSDB(db_sock)
>   File "/usr/bin/ovs-tcpdump", line 168, in __init__
>     OVSDB.wait_for_db_change(self._idl_conn)  # Initial Sync with DB
>   File "/usr/bin/ovs-tcpdump", line 155, in wait_for_db_change
>     while idl.change_seqno == seq and not idl.run():
> 
> The default handler of SIGINT is default_int_handler, so it not
> register the signal handler successfully. When received CTRL+C again,
> the program be break, and calling hook can't execute completely.
> 
> Signed-off-by: Daniel Ding <danieldin186@gmail.com>
> ---
>  python/ovs/fatal_signal.py | 24 +++++++++++++-----------
>  utilities/ovs-tcpdump.in   | 38 +++++++++++++++++---------------------
>  2 files changed, 30 insertions(+), 32 deletions(-)

Sorry for the late response.  Thanks for v2!

I did some testing and it seem to fix the problem, though in case the ovsdb
connection is no longer available for some reason, we would block forever in
the handler waiting for the server to reply while the signals are blocked.
So, it's getting harder to kill the ovs-tcpdump in this situation.  But I
don't really have a solution for this.  We will have either this problem or
lingering ports and mirrors as long as we're trying to communicate with the
external process in the signal handler.

I think, it's an OK trade-off to make.  Double Ctrl+C seems like a more
frequent event than paused or stuck ovsdb-server, so I think it's OK to have
this patch.

See a few minor comments below.

Best regards, Ilya Maximets.

> 
> diff --git a/python/ovs/fatal_signal.py b/python/ovs/fatal_signal.py
> index cb2e99e87..077f50dd5 100644
> --- a/python/ovs/fatal_signal.py
> +++ b/python/ovs/fatal_signal.py
> @@ -16,6 +16,7 @@ import atexit
>  import os
>  import signal
>  import sys
> +import threading
>  
>  import ovs.vlog
>  
> @@ -112,29 +113,29 @@ def _unlink(file_):
>  def _signal_handler(signr, _):
>      _call_hooks(signr)
>  
> -    # Re-raise the signal with the default handling so that the program
> -    # termination status reflects that we were killed by this signal.
> -    signal.signal(signr, signal.SIG_DFL)
> -    os.kill(os.getpid(), signr)
> -
>  
>  def _atexit_handler():
>      _call_hooks(0)
>  
>  
> -recurse = False
> +mutex = threading.Lock()
>  
>  
>  def _call_hooks(signr):
> -    global recurse
> -    if recurse:
> +    global mutex
> +    if not mutex.acquire(blocking=False):
>          return
> -    recurse = True
>  
>      for hook, cancel, run_at_exit in _hooks:
>          if signr != 0 or run_at_exit:
>              hook()
>  
> +    if signr != 0:
> +       # Re-raise the signal with the default handling so that the program
> +       # termination status reflects that we were killed by this signal.
> +       signal.signal(signr, signal.SIG_DFL)
> +       os.kill(os.getpid(), signr)

The indentation here is incorrect:

python/ovs/fatal_signal.py:134:8: E114 indentation is not a multiple of 4 (comment)
python/ovs/fatal_signal.py:135:8: E114 indentation is not a multiple of 4 (comment)
python/ovs/fatal_signal.py:136:8: E111 indentation is not a multiple of 4
python/ovs/fatal_signal.py:137:8: E111 indentation is not a multiple of 4

> +
>  
>  _inited = False
>  
> @@ -150,7 +151,9 @@ def _init():
>                         signal.SIGALRM]
>  
>          for signr in signals:
> -            if signal.getsignal(signr) == signal.SIG_DFL:
> +            handler = signal.getsignal(signr)
> +            if (handler == signal.SIG_DFL or
> +                handler == signal.default_int_handler):
>                  signal.signal(signr, _signal_handler)
>          atexit.register(_atexit_handler)
>  
> @@ -165,7 +168,6 @@ def signal_alarm(timeout):
>  
>      if sys.platform == "win32":
>          import time
> -        import threading
>  
>          class Alarm (threading.Thread):
>              def __init__(self, timeout):
> diff --git a/utilities/ovs-tcpdump.in b/utilities/ovs-tcpdump.in
> index 4cbd9a5d3..0731b4ac8 100755
> --- a/utilities/ovs-tcpdump.in
> +++ b/utilities/ovs-tcpdump.in
> @@ -534,29 +534,25 @@ def main():
>      ovsdb.close_idl()
>  
>      pipes = _doexec(*([dump_cmd, '-i', mirror_interface] + tcpdargs))
> +    while pipes.poll() is None:
> +        data = pipes.stdout.readline().strip(b'\n')
> +        if len(data) == 0:
> +            break
> +        print(data.decode('utf-8'))
> +
> +    # If there is a pipe behind ovs-tcpdump (such as ovs-tcpdump
> +    # -i eth0 | grep "192.168.1.1"), the pipe is no longer available
> +    # after received Ctrl+C.
> +    # If we write data to an unavailable pipe, a pipe error will be
> +    # reported, so we turn off stdout to avoid subsequent flushing
> +    # of data into the pipe.

I'm not sure if we need this comment.
The code below will not be executed after Ctrl+C.  We will go
directly to the signal handler, execute hooks and raise the
signal that will terminate the program.

The only way we can end up here is if underlying tcpdump exited
for some reason and we're in the process of graceful termination.

I think, we should keep the code below, but it seems that the
comment lost its meaning.

Does that make sense?


>      try:
> -        while pipes.poll() is None:
> -            data = pipes.stdout.readline().strip(b'\n')
> -            if len(data) == 0:
> -                raise KeyboardInterrupt
> -            print(data.decode('utf-8'))
> -        raise KeyboardInterrupt
> -    except KeyboardInterrupt:
> -        # If there is a pipe behind ovs-tcpdump (such as ovs-tcpdump
> -        # -i eth0 | grep "192.168.1.1"), the pipe is no longer available
> -        # after received Ctrl+C.
> -        # If we write data to an unavailable pipe, a pipe error will be
> -        # reported, so we turn off stdout to avoid subsequent flushing
> -        # of data into the pipe.
> -        try:
> -            sys.stdout.close()
> -        except IOError:
> -            pass
> -
> -        if pipes.poll() is None:
> -            pipes.terminate()
> +       sys.stdout.close()
> +    except IOError:
> +       pass

And here the indentation is not right:

utilities/ovs-tcpdump.in:550:8: E111 indentation is not a multiple of 4
utilities/ovs-tcpdump.in:552:8: E111 indentation is not a multiple of 4

>  
> -    sys.exit(0)
> +    if pipes.poll() is None:
> +        pipes.terminate()
>  
>  
>  if __name__ == '__main__':
Daniel Ding March 18, 2024, 8:38 a.m. UTC | #2
> 2024年3月16日 上午9:17,Ilya Maximets <i.maximets@ovn.org> 写道:
> 
> On 2/23/24 04:37, Daniel Ding wrote:
>> After running ovs-tcpdump and inputs multiple CTRL+C, the program will
>> raise the following exception.
>> 
>> Error in atexit._run_exitfuncs:
>> Traceback (most recent call last):
>>  File "/usr/bin/ovs-tcpdump", line 421, in cleanup_mirror
>>    ovsdb = OVSDB(db_sock)
>>  File "/usr/bin/ovs-tcpdump", line 168, in __init__
>>    OVSDB.wait_for_db_change(self._idl_conn)  # Initial Sync with DB
>>  File "/usr/bin/ovs-tcpdump", line 155, in wait_for_db_change
>>    while idl.change_seqno == seq and not idl.run():
>> 
>> The default handler of SIGINT is default_int_handler, so it not
>> register the signal handler successfully. When received CTRL+C again,
>> the program be break, and calling hook can't execute completely.
>> 
>> Signed-off-by: Daniel Ding <danieldin186@gmail.com>
>> ---
>> python/ovs/fatal_signal.py | 24 +++++++++++++-----------
>> utilities/ovs-tcpdump.in   | 38 +++++++++++++++++---------------------
>> 2 files changed, 30 insertions(+), 32 deletions(-)
> 
> Sorry for the late response.  Thanks for v2!
> 
> I did some testing and it seem to fix the problem, though in case the ovsdb
> connection is no longer available for some reason, we would block forever in
> the handler waiting for the server to reply while the signals are blocked.
> So, it's getting harder to kill the ovs-tcpdump in this situation.  But I
> don't really have a solution for this.  We will have either this problem or
> lingering ports and mirrors as long as we're trying to communicate with the
> external process in the signal handler.
> 
> I think, it's an OK trade-off to make.  Double Ctrl+C seems like a more
> frequent event than paused or stuck ovsdb-server, so I think it's OK to have
> this patch.
> 
> See a few minor comments below.
> 

Please help me review the following patch again, Thanks!

> Best regards, Ilya Maximets.
> 
>> 
>> diff --git a/python/ovs/fatal_signal.py b/python/ovs/fatal_signal.py
>> index cb2e99e87..077f50dd5 100644
>> --- a/python/ovs/fatal_signal.py
>> +++ b/python/ovs/fatal_signal.py
>> @@ -16,6 +16,7 @@ import atexit
>> import os
>> import signal
>> import sys
>> +import threading
>> 
>> import ovs.vlog
>> 
>> @@ -112,29 +113,29 @@ def _unlink(file_):
>> def _signal_handler(signr, _):
>>     _call_hooks(signr)
>> 
>> -    # Re-raise the signal with the default handling so that the program
>> -    # termination status reflects that we were killed by this signal.
>> -    signal.signal(signr, signal.SIG_DFL)
>> -    os.kill(os.getpid(), signr)
>> -
>> 
>> def _atexit_handler():
>>     _call_hooks(0)
>> 
>> 
>> -recurse = False
>> +mutex = threading.Lock()
>> 
>> 
>> def _call_hooks(signr):
>> -    global recurse
>> -    if recurse:
>> +    global mutex
>> +    if not mutex.acquire(blocking=False):
>>         return
>> -    recurse = True
>> 
>>     for hook, cancel, run_at_exit in _hooks:
>>         if signr != 0 or run_at_exit:
>>             hook()
>> 
>> +    if signr != 0:
>> +       # Re-raise the signal with the default handling so that the program
>> +       # termination status reflects that we were killed by this signal.
>> +       signal.signal(signr, signal.SIG_DFL)
>> +       os.kill(os.getpid(), signr)
> 
> The indentation here is incorrect:
> 
> python/ovs/fatal_signal.py:134:8: E114 indentation is not a multiple of 4 (comment)
> python/ovs/fatal_signal.py:135:8: E114 indentation is not a multiple of 4 (comment)
> python/ovs/fatal_signal.py:136:8: E111 indentation is not a multiple of 4
> python/ovs/fatal_signal.py:137:8: E111 indentation is not a multiple of 4

Done

> 
>> +
>> 
>> _inited = False
>> 
>> @@ -150,7 +151,9 @@ def _init():
>>                        signal.SIGALRM]
>> 
>>         for signr in signals:
>> -            if signal.getsignal(signr) == signal.SIG_DFL:
>> +            handler = signal.getsignal(signr)
>> +            if (handler == signal.SIG_DFL or
>> +                handler == signal.default_int_handler):
>>                 signal.signal(signr, _signal_handler)
>>         atexit.register(_atexit_handler)
>> 
>> @@ -165,7 +168,6 @@ def signal_alarm(timeout):
>> 
>>     if sys.platform == "win32":
>>         import time
>> -        import threading
>> 
>>         class Alarm (threading.Thread):
>>             def __init__(self, timeout):
>> diff --git a/utilities/ovs-tcpdump.in b/utilities/ovs-tcpdump.in
>> index 4cbd9a5d3..0731b4ac8 100755
>> --- a/utilities/ovs-tcpdump.in
>> +++ b/utilities/ovs-tcpdump.in
>> @@ -534,29 +534,25 @@ def main():
>>     ovsdb.close_idl()
>> 
>>     pipes = _doexec(*([dump_cmd, '-i', mirror_interface] + tcpdargs))
>> +    while pipes.poll() is None:
>> +        data = pipes.stdout.readline().strip(b'\n')
>> +        if len(data) == 0:
>> +            break
>> +        print(data.decode('utf-8'))
>> +
>> +    # If there is a pipe behind ovs-tcpdump (such as ovs-tcpdump
>> +    # -i eth0 | grep "192.168.1.1"), the pipe is no longer available
>> +    # after received Ctrl+C.
>> +    # If we write data to an unavailable pipe, a pipe error will be
>> +    # reported, so we turn off stdout to avoid subsequent flushing
>> +    # of data into the pipe.
> 
> I'm not sure if we need this comment.
> The code below will not be executed after Ctrl+C.  We will go
> directly to the signal handler, execute hooks and raise the
> signal that will terminate the program.
> 
> The only way we can end up here is if underlying tcpdump exited
> for some reason and we're in the process of graceful termination.
> 
> I think, we should keep the code below, but it seems that the
> comment lost its meaning.
> 
> Does that make sense?
> 

OK, I removed the comment and keep the code bellow.

> 
>>     try:
>> -        while pipes.poll() is None:
>> -            data = pipes.stdout.readline().strip(b'\n')
>> -            if len(data) == 0:
>> -                raise KeyboardInterrupt
>> -            print(data.decode('utf-8'))
>> -        raise KeyboardInterrupt
>> -    except KeyboardInterrupt:
>> -        # If there is a pipe behind ovs-tcpdump (such as ovs-tcpdump
>> -        # -i eth0 | grep "192.168.1.1"), the pipe is no longer available
>> -        # after received Ctrl+C.
>> -        # If we write data to an unavailable pipe, a pipe error will be
>> -        # reported, so we turn off stdout to avoid subsequent flushing
>> -        # of data into the pipe.
>> -        try:
>> -            sys.stdout.close()
>> -        except IOError:
>> -            pass
>> -
>> -        if pipes.poll() is None:
>> -            pipes.terminate()
>> +       sys.stdout.close()
>> +    except IOError:
>> +       pass
> 
> And here the indentation is not right:
> 
> utilities/ovs-tcpdump.in:550 <http://ovs-tcpdump.in:550/>:8: E111 indentation is not a multiple of 4
> utilities/ovs-tcpdump.in:552 <http://ovs-tcpdump.in:552/>:8: E111 indentation is not a multiple of 4
> 
>> 
>> -    sys.exit(0)
>> +    if pipes.poll() is None:
>> +        pipes.terminate()
>> 
>> 
>> if __name__ == '__main__’:


After running ovs-tcpdump and inputs multiple CTRL+C, the program will
raise the following exception.

Error in atexit._run_exitfuncs:
Traceback (most recent call last):
  File "/usr/bin/ovs-tcpdump", line 421, in cleanup_mirror
    ovsdb = OVSDB(db_sock)
  File "/usr/bin/ovs-tcpdump", line 168, in __init__
    OVSDB.wait_for_db_change(self._idl_conn)  # Initial Sync with DB
  File "/usr/bin/ovs-tcpdump", line 155, in wait_for_db_change
    while idl.change_seqno == seq and not idl.run():

The default handler of SIGINT is default_int_handler, so it was not
registered to the signal handler. When received CTRL+C again, the program
was broken, and calling hook could not be executed completely.

Signed-off-by: Daniel Ding <danieldin186@gmail.com>
---
 python/ovs/fatal_signal.py | 24 +++++++++++++-----------
 utilities/ovs-tcpdump.in   | 32 +++++++++++---------------------
 2 files changed, 24 insertions(+), 32 deletions(-)

diff --git a/python/ovs/fatal_signal.py b/python/ovs/fatal_signal.py
index cb2e99e87..16a7e78a0 100644
--- a/python/ovs/fatal_signal.py
+++ b/python/ovs/fatal_signal.py
@@ -16,6 +16,7 @@ import atexit
 import os
 import signal
 import sys
+import threading

 import ovs.vlog

@@ -112,29 +113,29 @@ def _unlink(file_):
 def _signal_handler(signr, _):
     _call_hooks(signr)

-    # Re-raise the signal with the default handling so that the program
-    # termination status reflects that we were killed by this signal.
-    signal.signal(signr, signal.SIG_DFL)
-    os.kill(os.getpid(), signr)
-

 def _atexit_handler():
     _call_hooks(0)


-recurse = False
+mutex = threading.Lock()


 def _call_hooks(signr):
-    global recurse
-    if recurse:
+    global mutex
+    if not mutex.acquire(blocking=False):
         return
-    recurse = True

     for hook, cancel, run_at_exit in _hooks:
         if signr != 0 or run_at_exit:
             hook()

+    if signr != 0:
+        # Re-raise the signal with the default handling so that the program
+        # termination status reflects that we were killed by this signal.
+        signal.signal(signr, signal.SIG_DFL)
+        os.kill(os.getpid(), signr)
+

 _inited = False

@@ -150,7 +151,9 @@ def _init():
                        signal.SIGALRM]

         for signr in signals:
-            if signal.getsignal(signr) == signal.SIG_DFL:
+            handler = signal.getsignal(signr)
+            if (handler == signal.SIG_DFL or
+                handler == signal.default_int_handler):
                 signal.signal(signr, _signal_handler)
         atexit.register(_atexit_handler)

@@ -165,7 +168,6 @@ def signal_alarm(timeout):

     if sys.platform == "win32":
         import time
-        import threading

         class Alarm (threading.Thread):
             def __init__(self, timeout):
diff --git a/utilities/ovs-tcpdump.in b/utilities/ovs-tcpdump.in
index 4cbd9a5d3..eada803bb 100755
--- a/utilities/ovs-tcpdump.in
+++ b/utilities/ovs-tcpdump.in
@@ -534,29 +534,19 @@ def main():
     ovsdb.close_idl()

     pipes = _doexec(*([dump_cmd, '-i', mirror_interface] + tcpdargs))
-    try:
-        while pipes.poll() is None:
-            data = pipes.stdout.readline().strip(b'\n')
-            if len(data) == 0:
-                raise KeyboardInterrupt
-            print(data.decode('utf-8'))
-        raise KeyboardInterrupt
-    except KeyboardInterrupt:
-        # If there is a pipe behind ovs-tcpdump (such as ovs-tcpdump
-        # -i eth0 | grep "192.168.1.1"), the pipe is no longer available
-        # after received Ctrl+C.
-        # If we write data to an unavailable pipe, a pipe error will be
-        # reported, so we turn off stdout to avoid subsequent flushing
-        # of data into the pipe.
-        try:
-            sys.stdout.close()
-        except IOError:
-            pass
+    while pipes.poll() is None:
+        data = pipes.stdout.readline().strip(b'\n')
+        if len(data) == 0:
+            break
+        print(data.decode('utf-8'))

-        if pipes.poll() is None:
-            pipes.terminate()
+    try:
+        sys.stdout.close()
+    except IOError:
+        pass

-    sys.exit(0)
+    if pipes.poll() is None:
+        pipes.terminate()


 if __name__ == '__main__':
--
2.43.0



Regards,
Daniel Ding
Ilya Maximets March 19, 2024, 9:05 p.m. UTC | #3
On 3/18/24 09:38, Daniel Ding wrote:
> 
> 
>> 2024年3月16日 上午9:17,Ilya Maximets <i.maximets@ovn.org <mailto:i.maximets@ovn.org>> 写道:
>>
>> On 2/23/24 04:37, Daniel Ding wrote:
>>> After running ovs-tcpdump and inputs multiple CTRL+C, the program will
>>> raise the following exception.
>>>
>>> Error in atexit._run_exitfuncs:
>>> Traceback (most recent call last):
>>>  File "/usr/bin/ovs-tcpdump", line 421, in cleanup_mirror
>>>    ovsdb = OVSDB(db_sock)
>>>  File "/usr/bin/ovs-tcpdump", line 168, in __init__
>>>    OVSDB.wait_for_db_change(self._idl_conn)  # Initial Sync with DB
>>>  File "/usr/bin/ovs-tcpdump", line 155, in wait_for_db_change
>>>    while idl.change_seqno == seq and not idl.run():
>>>
>>> The default handler of SIGINT is default_int_handler, so it not
>>> register the signal handler successfully. When received CTRL+C again,
>>> the program be break, and calling hook can't execute completely.
>>>
>>> Signed-off-by: Daniel Ding <danieldin186@gmail.com <mailto:danieldin186@gmail.com>>
>>> ---
>>> python/ovs/fatal_signal.py | 24 +++++++++++++-----------
>>> utilities/ovs-tcpdump.in <http://ovs-tcpdump.in>   | 38 +++++++++++++++++---------------------
>>> 2 files changed, 30 insertions(+), 32 deletions(-)
>>
>> Sorry for the late response.  Thanks for v2!
>>
>> I did some testing and it seem to fix the problem, though in case the ovsdb
>> connection is no longer available for some reason, we would block forever in
>> the handler waiting for the server to reply while the signals are blocked.
>> So, it's getting harder to kill the ovs-tcpdump in this situation.  But I
>> don't really have a solution for this.  We will have either this problem or
>> lingering ports and mirrors as long as we're trying to communicate with the
>> external process in the signal handler.
>>
>> I think, it's an OK trade-off to make.  Double Ctrl+C seems like a more
>> frequent event than paused or stuck ovsdb-server, so I think it's OK to have
>> this patch.
>>
>> See a few minor comments below.
>>
> 
> Please help me review the following patch again, Thanks!
> 
>> Best regards, Ilya Maximets.
>>
>>>
>>> diff --git a/python/ovs/fatal_signal.py b/python/ovs/fatal_signal.py
>>> index cb2e99e87..077f50dd5 100644
>>> --- a/python/ovs/fatal_signal.py
>>> +++ b/python/ovs/fatal_signal.py
>>> @@ -16,6 +16,7 @@ import atexit
>>> import os
>>> import signal
>>> import sys
>>> +import threading
>>>
>>> import ovs.vlog
>>>
>>> @@ -112,29 +113,29 @@ def _unlink(file_):
>>> def _signal_handler(signr, _):
>>>     _call_hooks(signr)
>>>
>>> -    # Re-raise the signal with the default handling so that the program
>>> -    # termination status reflects that we were killed by this signal.
>>> -    signal.signal(signr, signal.SIG_DFL)
>>> -    os.kill(os.getpid(), signr)
>>> -
>>>
>>> def _atexit_handler():
>>>     _call_hooks(0)
>>>
>>>
>>> -recurse = False
>>> +mutex = threading.Lock()
>>>
>>>
>>> def _call_hooks(signr):
>>> -    global recurse
>>> -    if recurse:
>>> +    global mutex
>>> +    if not mutex.acquire(blocking=False):
>>>         return
>>> -    recurse = True
>>>
>>>     for hook, cancel, run_at_exit in _hooks:
>>>         if signr != 0 or run_at_exit:
>>>             hook()
>>>
>>> +    if signr != 0:
>>> +       # Re-raise the signal with the default handling so that the program
>>> +       # termination status reflects that we were killed by this signal.
>>> +       signal.signal(signr, signal.SIG_DFL)
>>> +       os.kill(os.getpid(), signr)
>>
>> The indentation here is incorrect:
>>
>> python/ovs/fatal_signal.py:134:8: E114 indentation is not a multiple of 4 (comment)
>> python/ovs/fatal_signal.py:135:8: E114 indentation is not a multiple of 4 (comment)
>> python/ovs/fatal_signal.py:136:8: E111 indentation is not a multiple of 4
>> python/ovs/fatal_signal.py:137:8: E111 indentation is not a multiple of 4
> 
> Done
> 
>>
>>> +
>>>
>>> _inited = False
>>>
>>> @@ -150,7 +151,9 @@ def _init():
>>>                        signal.SIGALRM]
>>>
>>>         for signr in signals:
>>> -            if signal.getsignal(signr) == signal.SIG_DFL:
>>> +            handler = signal.getsignal(signr)
>>> +            if (handler == signal.SIG_DFL or
>>> +                handler == signal.default_int_handler):
>>>                 signal.signal(signr, _signal_handler)
>>>         atexit.register(_atexit_handler)
>>>
>>> @@ -165,7 +168,6 @@ def signal_alarm(timeout):
>>>
>>>     if sys.platform == "win32":
>>>         import time
>>> -        import threading
>>>
>>>         class Alarm (threading.Thread):
>>>             def __init__(self, timeout):
>>> diff --git a/utilities/ovs-tcpdump.in <http://ovs-tcpdump.in> b/utilities/ovs-tcpdump.in <http://ovs-tcpdump.in>
>>> index 4cbd9a5d3..0731b4ac8 100755
>>> --- a/utilities/ovs-tcpdump.in <http://ovs-tcpdump.in>
>>> +++ b/utilities/ovs-tcpdump.in <http://ovs-tcpdump.in>
>>> @@ -534,29 +534,25 @@ def main():
>>>     ovsdb.close_idl()
>>>
>>>     pipes = _doexec(*([dump_cmd, '-i', mirror_interface] + tcpdargs))
>>> +    while pipes.poll() is None:
>>> +        data = pipes.stdout.readline().strip(b'\n')
>>> +        if len(data) == 0:
>>> +            break
>>> +        print(data.decode('utf-8'))
>>> +
>>> +    # If there is a pipe behind ovs-tcpdump (such as ovs-tcpdump
>>> +    # -i eth0 | grep "192.168.1.1"), the pipe is no longer available
>>> +    # after received Ctrl+C.
>>> +    # If we write data to an unavailable pipe, a pipe error will be
>>> +    # reported, so we turn off stdout to avoid subsequent flushing
>>> +    # of data into the pipe.
>>
>> I'm not sure if we need this comment.
>> The code below will not be executed after Ctrl+C.  We will go
>> directly to the signal handler, execute hooks and raise the
>> signal that will terminate the program.
>>
>> The only way we can end up here is if underlying tcpdump exited
>> for some reason and we're in the process of graceful termination.
>>
>> I think, we should keep the code below, but it seems that the
>> comment lost its meaning.
>>
>> Does that make sense?
>>
> 
> OK, I removed the comment and keep the code bellow.
> 
>>
>>>     try:
>>> -        while pipes.poll() is None:
>>> -            data = pipes.stdout.readline().strip(b'\n')
>>> -            if len(data) == 0:
>>> -                raise KeyboardInterrupt
>>> -            print(data.decode('utf-8'))
>>> -        raise KeyboardInterrupt
>>> -    except KeyboardInterrupt:
>>> -        # If there is a pipe behind ovs-tcpdump (such as ovs-tcpdump
>>> -        # -i eth0 | grep "192.168.1.1"), the pipe is no longer available
>>> -        # after received Ctrl+C.
>>> -        # If we write data to an unavailable pipe, a pipe error will be
>>> -        # reported, so we turn off stdout to avoid subsequent flushing
>>> -        # of data into the pipe.
>>> -        try:
>>> -            sys.stdout.close()
>>> -        except IOError:
>>> -            pass
>>> -
>>> -        if pipes.poll() is None:
>>> -            pipes.terminate()
>>> +       sys.stdout.close()
>>> +    except IOError:
>>> +       pass
>>
>> And here the indentation is not right:
>>
>> utilities/ovs-tcpdump.in:550 <http://ovs-tcpdump.in:550/>:8: E111 indentation is not a multiple of 4
>> utilities/ovs-tcpdump.in:552 <http://ovs-tcpdump.in:552/>:8: E111 indentation is not a multiple of 4
>>
>>>
>>> -    sys.exit(0)
>>> +    if pipes.poll() is None:
>>> +        pipes.terminate()
>>>
>>>
>>> if __name__ == '__main__’:
> 
> 
> After running ovs-tcpdump and inputs multiple CTRL+C, the program will
> raise the following exception.
> 
> Error in atexit._run_exitfuncs:
> Traceback (most recent call last):
>   File "/usr/bin/ovs-tcpdump", line 421, in cleanup_mirror
>     ovsdb = OVSDB(db_sock)
>   File "/usr/bin/ovs-tcpdump", line 168, in __init__
>     OVSDB.wait_for_db_change(self._idl_conn)  # Initial Sync with DB
>   File "/usr/bin/ovs-tcpdump", line 155, in wait_for_db_change
>     while idl.change_seqno == seq and not idl.run():
> 
> The default handler of SIGINT is default_int_handler, so it was not
> registered to the signal handler. When received CTRL+C again, the program
> was broken, and calling hook could not be executed completely.
> 
> Signed-off-by: Daniel Ding <danieldin186@gmail.com <mailto:danieldin186@gmail.com>>
> ---
>  python/ovs/fatal_signal.py | 24 +++++++++++++-----------
>  utilities/ovs-tcpdump.in <http://ovs-tcpdump.in>   | 32 +++++++++++---------------------
>  2 files changed, 24 insertions(+), 32 deletions(-)

Thanks!  This seems correct to me.

Could you send it as a new PATCH v3 ?

Best regards, Ilya Maximets.

> 
> diff --git a/python/ovs/fatal_signal.py b/python/ovs/fatal_signal.py
> index cb2e99e87..16a7e78a0 100644
> --- a/python/ovs/fatal_signal.py
> +++ b/python/ovs/fatal_signal.py
> @@ -16,6 +16,7 @@ import atexit
>  import os
>  import signal
>  import sys
> +import threading
> 
>  import ovs.vlog
> 
> @@ -112,29 +113,29 @@ def _unlink(file_):
>  def _signal_handler(signr, _):
>      _call_hooks(signr)
> 
> -    # Re-raise the signal with the default handling so that the program
> -    # termination status reflects that we were killed by this signal.
> -    signal.signal(signr, signal.SIG_DFL)
> -    os.kill(os.getpid(), signr)
> -
> 
>  def _atexit_handler():
>      _call_hooks(0)
> 
> 
> -recurse = False
> +mutex = threading.Lock()
> 
> 
>  def _call_hooks(signr):
> -    global recurse
> -    if recurse:
> +    global mutex
> +    if not mutex.acquire(blocking=False):
>          return
> -    recurse = True
> 
>      for hook, cancel, run_at_exit in _hooks:
>          if signr != 0 or run_at_exit:
>              hook()
> 
> +    if signr != 0:
> +        # Re-raise the signal with the default handling so that the program
> +        # termination status reflects that we were killed by this signal.
> +        signal.signal(signr, signal.SIG_DFL)
> +        os.kill(os.getpid(), signr)
> +
> 
>  _inited = False
> 
> @@ -150,7 +151,9 @@ def _init():
>                         signal.SIGALRM]
> 
>          for signr in signals:
> -            if signal.getsignal(signr) == signal.SIG_DFL:
> +            handler = signal.getsignal(signr)
> +            if (handler == signal.SIG_DFL or
> +                handler == signal.default_int_handler):
>                  signal.signal(signr, _signal_handler)
>          atexit.register(_atexit_handler)
> 
> @@ -165,7 +168,6 @@ def signal_alarm(timeout):
> 
>      if sys.platform == "win32":
>          import time
> -        import threading
> 
>          class Alarm (threading.Thread):
>              def __init__(self, timeout):
> diff --git a/utilities/ovs-tcpdump.in <http://ovs-tcpdump.in> b/utilities/ovs-tcpdump.in <http://ovs-tcpdump.in>
> index 4cbd9a5d3..eada803bb 100755
> --- a/utilities/ovs-tcpdump.in <http://ovs-tcpdump.in>
> +++ b/utilities/ovs-tcpdump.in <http://ovs-tcpdump.in>
> @@ -534,29 +534,19 @@ def main():
>      ovsdb.close_idl()
> 
>      pipes = _doexec(*([dump_cmd, '-i', mirror_interface] + tcpdargs))
> -    try:
> -        while pipes.poll() is None:
> -            data = pipes.stdout.readline().strip(b'\n')
> -            if len(data) == 0:
> -                raise KeyboardInterrupt
> -            print(data.decode('utf-8'))
> -        raise KeyboardInterrupt
> -    except KeyboardInterrupt:
> -        # If there is a pipe behind ovs-tcpdump (such as ovs-tcpdump
> -        # -i eth0 | grep "192.168.1.1"), the pipe is no longer available
> -        # after received Ctrl+C.
> -        # If we write data to an unavailable pipe, a pipe error will be
> -        # reported, so we turn off stdout to avoid subsequent flushing
> -        # of data into the pipe.
> -        try:
> -            sys.stdout.close()
> -        except IOError:
> -            pass
> +    while pipes.poll() is None:
> +        data = pipes.stdout.readline().strip(b'\n')
> +        if len(data) == 0:
> +            break
> +        print(data.decode('utf-8'))
> 
> -        if pipes.poll() is None:
> -            pipes.terminate()
> +    try:
> +        sys.stdout.close()
> +    except IOError:
> +        pass
> 
> -    sys.exit(0)
> +    if pipes.poll() is None:
> +        pipes.terminate()
> 
> 
>  if __name__ == '__main__':
> --
> 2.43.0
> 
> 
> 
> Regards,
> Daniel Ding
> 
> 
>
diff mbox series

Patch

diff --git a/python/ovs/fatal_signal.py b/python/ovs/fatal_signal.py
index cb2e99e87..077f50dd5 100644
--- a/python/ovs/fatal_signal.py
+++ b/python/ovs/fatal_signal.py
@@ -16,6 +16,7 @@  import atexit
 import os
 import signal
 import sys
+import threading
 
 import ovs.vlog
 
@@ -112,29 +113,29 @@  def _unlink(file_):
 def _signal_handler(signr, _):
     _call_hooks(signr)
 
-    # Re-raise the signal with the default handling so that the program
-    # termination status reflects that we were killed by this signal.
-    signal.signal(signr, signal.SIG_DFL)
-    os.kill(os.getpid(), signr)
-
 
 def _atexit_handler():
     _call_hooks(0)
 
 
-recurse = False
+mutex = threading.Lock()
 
 
 def _call_hooks(signr):
-    global recurse
-    if recurse:
+    global mutex
+    if not mutex.acquire(blocking=False):
         return
-    recurse = True
 
     for hook, cancel, run_at_exit in _hooks:
         if signr != 0 or run_at_exit:
             hook()
 
+    if signr != 0:
+       # Re-raise the signal with the default handling so that the program
+       # termination status reflects that we were killed by this signal.
+       signal.signal(signr, signal.SIG_DFL)
+       os.kill(os.getpid(), signr)
+
 
 _inited = False
 
@@ -150,7 +151,9 @@  def _init():
                        signal.SIGALRM]
 
         for signr in signals:
-            if signal.getsignal(signr) == signal.SIG_DFL:
+            handler = signal.getsignal(signr)
+            if (handler == signal.SIG_DFL or
+                handler == signal.default_int_handler):
                 signal.signal(signr, _signal_handler)
         atexit.register(_atexit_handler)
 
@@ -165,7 +168,6 @@  def signal_alarm(timeout):
 
     if sys.platform == "win32":
         import time
-        import threading
 
         class Alarm (threading.Thread):
             def __init__(self, timeout):
diff --git a/utilities/ovs-tcpdump.in b/utilities/ovs-tcpdump.in
index 4cbd9a5d3..0731b4ac8 100755
--- a/utilities/ovs-tcpdump.in
+++ b/utilities/ovs-tcpdump.in
@@ -534,29 +534,25 @@  def main():
     ovsdb.close_idl()
 
     pipes = _doexec(*([dump_cmd, '-i', mirror_interface] + tcpdargs))
+    while pipes.poll() is None:
+        data = pipes.stdout.readline().strip(b'\n')
+        if len(data) == 0:
+            break
+        print(data.decode('utf-8'))
+
+    # If there is a pipe behind ovs-tcpdump (such as ovs-tcpdump
+    # -i eth0 | grep "192.168.1.1"), the pipe is no longer available
+    # after received Ctrl+C.
+    # If we write data to an unavailable pipe, a pipe error will be
+    # reported, so we turn off stdout to avoid subsequent flushing
+    # of data into the pipe.
     try:
-        while pipes.poll() is None:
-            data = pipes.stdout.readline().strip(b'\n')
-            if len(data) == 0:
-                raise KeyboardInterrupt
-            print(data.decode('utf-8'))
-        raise KeyboardInterrupt
-    except KeyboardInterrupt:
-        # If there is a pipe behind ovs-tcpdump (such as ovs-tcpdump
-        # -i eth0 | grep "192.168.1.1"), the pipe is no longer available
-        # after received Ctrl+C.
-        # If we write data to an unavailable pipe, a pipe error will be
-        # reported, so we turn off stdout to avoid subsequent flushing
-        # of data into the pipe.
-        try:
-            sys.stdout.close()
-        except IOError:
-            pass
-
-        if pipes.poll() is None:
-            pipes.terminate()
+       sys.stdout.close()
+    except IOError:
+       pass
 
-    sys.exit(0)
+    if pipes.poll() is None:
+        pipes.terminate()
 
 
 if __name__ == '__main__':