diff mbox series

[1/1] check-package: fix Python3 support

Message ID 20180723004107.20985-1-ricardo.martincoski@gmail.com
State Changes Requested
Headers show
Series [1/1] check-package: fix Python3 support | expand

Commit Message

Ricardo Martincoski July 23, 2018, 12:41 a.m. UTC
This script currently uses "/usr/bin/env python" as shebang but it does
not really support Python3. Instead of limiting the script to Python2,
fix it to support both versions.

Do this by first using an automated tool:
$ python -m modernize -w -p <files>
and then manually fixing what remains:
 - keep the imports in alphabetical order;
 - fix handling of files that cannot be decoded by 'utf-8' codec, such
   as package/urg/0002-urg-gcc6-fix-narrowing-conversion.patch .

In order to avoid errors when decoding files, use
errors="surrogateescape" when opening files, the docs for open() states:
"This is useful for processing files in an unknown encoding.".
This argument is not compatible with Python2 open() so import 'six' to
use it only when running in Python3.
As a consequence the file handler becomes explicit, so use it to close()
the file after it got processed.

Signed-off-by: Ricardo Martincoski <ricardo.martincoski@gmail.com>
---
Using the default docker (debian) image which uses Python2:
before this patch:
https://gitlab.com/buildroot.org/buildroot/-/jobs/83454639
after this patch:
https://gitlab.com/RicardoMartincoski/buildroot/-/jobs/83542815

Using an arch docker image which uses Python3:
before this patch:
https://gitlab.com/RicardoMartincoski/buildroot/-/jobs/81892607
after this patch:
https://gitlab.com/RicardoMartincoski/buildroot/-/jobs/83543363
The arch image for docker was generated using:
http://patchwork.ozlabs.org/patch/943301/
---
 utils/check-package                 |  9 ++++++++-
 utils/checkpackagelib/lib.py        |  3 ++-
 utils/checkpackagelib/lib_config.py | 13 +++++++------
 utils/checkpackagelib/lib_hash.py   | 13 +++++++------
 utils/checkpackagelib/lib_mk.py     | 11 ++++++-----
 utils/checkpackagelib/lib_patch.py  |  5 +++--
 6 files changed, 33 insertions(+), 21 deletions(-)

Comments

Arnout Vandecappelle July 24, 2018, 8:54 a.m. UTC | #1
On 23-07-18 02:41, Ricardo Martincoski wrote:
> This script currently uses "/usr/bin/env python" as shebang but it does
> not really support Python3. Instead of limiting the script to Python2,
> fix it to support both versions.
> 
> Do this by first using an automated tool:
> $ python -m modernize -w -p <files>

 In general (if it doesn't break things), I prefer to do the automatic stuff in
a separate patch from the manual stuff. That makes review easier (because for
the automatic stuff, you mainly review the automation itself rather than the
result).

> and then manually fixing what remains:
>  - keep the imports in alphabetical order;
>  - fix handling of files that cannot be decoded by 'utf-8' codec, such
>    as package/urg/0002-urg-gcc6-fix-narrowing-conversion.patch .
> 
> In order to avoid errors when decoding files, use
> errors="surrogateescape" when opening files, the docs for open() states:
> "This is useful for processing files in an unknown encoding.".
> This argument is not compatible with Python2 open() so import 'six' to
> use it only when running in Python3.

 I would prefer to open files with "rb" instead. However, that's a much bigger
change because all checks should then use bytes objects instead of strings.

> As a consequence the file handler becomes explicit, so use it to close()
> the file after it got processed.
> 
> Signed-off-by: Ricardo Martincoski <ricardo.martincoski@gmail.com>
> ---
> Using the default docker (debian) image which uses Python2:
> before this patch:
> https://gitlab.com/buildroot.org/buildroot/-/jobs/83454639
> after this patch:
> https://gitlab.com/RicardoMartincoski/buildroot/-/jobs/83542815
> 
> Using an arch docker image which uses Python3:
> before this patch:
> https://gitlab.com/RicardoMartincoski/buildroot/-/jobs/81892607
> after this patch:
> https://gitlab.com/RicardoMartincoski/buildroot/-/jobs/83543363
> The arch image for docker was generated using:
> http://patchwork.ozlabs.org/patch/943301/
> ---
>  utils/check-package                 |  9 ++++++++-
>  utils/checkpackagelib/lib.py        |  3 ++-
>  utils/checkpackagelib/lib_config.py | 13 +++++++------
>  utils/checkpackagelib/lib_hash.py   | 13 +++++++------
>  utils/checkpackagelib/lib_mk.py     | 11 ++++++-----
>  utils/checkpackagelib/lib_patch.py  |  5 +++--
>  6 files changed, 33 insertions(+), 21 deletions(-)
> 
> diff --git a/utils/check-package b/utils/check-package
> index 3dbc28b0a2..a8f5ce6157 100755
> --- a/utils/check-package
> +++ b/utils/check-package
> @@ -1,11 +1,13 @@
>  #!/usr/bin/env python
>  # See utils/checkpackagelib/readme.txt before editing this file.
>  
> +from __future__ import absolute_import

 This doesn't make sense in a top-level script, does it?

>  from __future__ import print_function
>  import argparse
>  import inspect
>  import os
>  import re
> +import six
>  import sys
>  
>  import checkpackagelib.lib_config
> @@ -127,10 +129,15 @@ def check_file_using_lib(fname):
>  
>      for cf in objects:
>          nwarnings += print_warnings(cf.before())
> -    for lineno, text in enumerate(open(fname, "r").readlines()):
> +    if six.PY3:
> +        f = open(fname, "r", errors="surrogateescape")
> +    else:
> +        f = open(fname, "r")
> +    for lineno, text in enumerate(f.readlines()):
>          nlines += 1
>          for cf in objects:
>              nwarnings += print_warnings(cf.check_line(lineno + 1, text))
> +    f.close()
>      for cf in objects:
>          nwarnings += print_warnings(cf.after())
>  
> diff --git a/utils/checkpackagelib/lib.py b/utils/checkpackagelib/lib.py
> index 91f4ad49b7..61fa46125d 100644
> --- a/utils/checkpackagelib/lib.py
> +++ b/utils/checkpackagelib/lib.py
> @@ -1,6 +1,7 @@
>  # See utils/checkpackagelib/readme.txt before editing this file.
>  
> -from base import _CheckFunction
> +from __future__ import absolute_import
> +from .base import _CheckFunction

 Why is this changed? Will Python3 look for base anywhere else than in the
checkpackagelib directory?

 Same below.

>  
>  
>  class ConsecutiveEmptyLines(_CheckFunction):
> diff --git a/utils/checkpackagelib/lib_config.py b/utils/checkpackagelib/lib_config.py
> index 1d273f1c5f..5f316a6d5b 100644
> --- a/utils/checkpackagelib/lib_config.py
> +++ b/utils/checkpackagelib/lib_config.py
> @@ -3,13 +3,14 @@
>  # "bool", so below check functions don't need to check for things already
>  # checked by running "make menuconfig".
>  
> +from __future__ import absolute_import
>  import re
>  
> -from base import _CheckFunction
> -from lib import ConsecutiveEmptyLines  # noqa: F401
> -from lib import EmptyLastLine          # noqa: F401
> -from lib import NewlineAtEof           # noqa: F401
> -from lib import TrailingSpace          # noqa: F401
> +from .base import _CheckFunction
> +from .lib import ConsecutiveEmptyLines  # noqa: F401
> +from .lib import EmptyLastLine          # noqa: F401
> +from .lib import NewlineAtEof           # noqa: F401
> +from .lib import TrailingSpace          # noqa: F401
>  
>  
>  def _empty_or_comment(text):
> @@ -45,7 +46,7 @@ class AttributesOrder(_CheckFunction):
>          if attribute in entries_that_should_not_be_indented:
>              self.state = 0
>              return
> -        if attribute not in self.attributes_order_convention.keys():
> +        if attribute not in list(self.attributes_order_convention.keys()):

 Why is this needed? 'not in' works on an iterator, doesn't it?

 Same below.

 Bottom line: I don't think modernize does anything useful.

 Regards,
 Arnout


>              return
>          new_state = self.attributes_order_convention[attribute]
>          wrong_order = self.state > new_state
> diff --git a/utils/checkpackagelib/lib_hash.py b/utils/checkpackagelib/lib_hash.py
> index 6d4cc9fd62..8ca92a5a90 100644
> --- a/utils/checkpackagelib/lib_hash.py
> +++ b/utils/checkpackagelib/lib_hash.py
> @@ -3,13 +3,14 @@
>  # functions don't need to check for things already checked by running
>  # "make package-dirclean package-source".
>  
> +from __future__ import absolute_import
>  import re
>  
> -from base import _CheckFunction
> -from lib import ConsecutiveEmptyLines  # noqa: F401
> -from lib import EmptyLastLine          # noqa: F401
> -from lib import NewlineAtEof           # noqa: F401
> -from lib import TrailingSpace          # noqa: F401
> +from .base import _CheckFunction
> +from .lib import ConsecutiveEmptyLines  # noqa: F401
> +from .lib import EmptyLastLine          # noqa: F401
> +from .lib import NewlineAtEof           # noqa: F401
> +from .lib import TrailingSpace          # noqa: F401
>  
>  
>  def _empty_line_or_comment(text):
> @@ -43,7 +44,7 @@ class HashType(_CheckFunction):
>          htype, hexa = fields[:2]
>          if htype == "none":
>              return
> -        if htype not in self.len_of_hash.keys():
> +        if htype not in list(self.len_of_hash.keys()):
>              return ["{}:{}: unexpected type of hash ({}#adding-packages-hash)"
>                      .format(self.filename, lineno, self.url_to_manual),
>                      text]
> diff --git a/utils/checkpackagelib/lib_mk.py b/utils/checkpackagelib/lib_mk.py
> index 86e9aa2d97..9c17a3cfd2 100644
> --- a/utils/checkpackagelib/lib_mk.py
> +++ b/utils/checkpackagelib/lib_mk.py
> @@ -4,13 +4,14 @@
>  # menu options using "make menuconfig" and by running "make" with appropriate
>  # packages enabled.
>  
> +from __future__ import absolute_import
>  import re
>  
> -from base import _CheckFunction
> -from lib import ConsecutiveEmptyLines  # noqa: F401
> -from lib import EmptyLastLine          # noqa: F401
> -from lib import NewlineAtEof           # noqa: F401
> -from lib import TrailingSpace          # noqa: F401
> +from .base import _CheckFunction
> +from .lib import ConsecutiveEmptyLines  # noqa: F401
> +from .lib import EmptyLastLine          # noqa: F401
> +from .lib import NewlineAtEof           # noqa: F401
> +from .lib import TrailingSpace          # noqa: F401
>  
>  
>  class Indent(_CheckFunction):
> diff --git a/utils/checkpackagelib/lib_patch.py b/utils/checkpackagelib/lib_patch.py
> index 555621afa1..4b0d32268f 100644
> --- a/utils/checkpackagelib/lib_patch.py
> +++ b/utils/checkpackagelib/lib_patch.py
> @@ -3,10 +3,11 @@
>  # functions don't need to check for things already checked by running
>  # "make package-dirclean package-patch".
>  
> +from __future__ import absolute_import
>  import re
>  
> -from base import _CheckFunction
> -from lib import NewlineAtEof           # noqa: F401
> +from .base import _CheckFunction
> +from .lib import NewlineAtEof           # noqa: F401
>  
>  
>  class ApplyOrder(_CheckFunction):
>
Ricardo Martincoski July 26, 2018, 3:21 a.m. UTC | #2
Hello,

Thank you for your review.
I don't usually write Python3 code, and I still beginning to use modernize.
Sorry I did the newbie mistake of trusting too much in the defaults of the tool.

On Tue, Jul 24, 2018 at 05:54 AM, Arnout Vandecappelle wrote:

> On 23-07-18 02:41, Ricardo Martincoski wrote:
>> This script currently uses "/usr/bin/env python" as shebang but it does
>> not really support Python3. Instead of limiting the script to Python2,
>> fix it to support both versions.
>> 
>> Do this by first using an automated tool:
>> $ python -m modernize -w -p <files>
> 
>  In general (if it doesn't break things), I prefer to do the automatic stuff in
> a separate patch from the manual stuff. That makes review easier (because for
> the automatic stuff, you mainly review the automation itself rather than the
> result).

OK. In this case things are already broken. The automatic stuff would unbreak
99% of them, and the manual stuff would unbreak the remaining 1%. So separate
patches makes sense.
I will not use the tool for the new iteration, but I will keep in mind for
future patch series.

> 
>> and then manually fixing what remains:
>>  - keep the imports in alphabetical order;
>>  - fix handling of files that cannot be decoded by 'utf-8' codec, such
>>    as package/urg/0002-urg-gcc6-fix-narrowing-conversion.patch .
>> 
>> In order to avoid errors when decoding files, use
>> errors="surrogateescape" when opening files, the docs for open() states:
>> "This is useful for processing files in an unknown encoding.".
>> This argument is not compatible with Python2 open() so import 'six' to
>> use it only when running in Python3.
> 
>  I would prefer to open files with "rb" instead. However, that's a much bigger
> change because all checks should then use bytes objects instead of strings.

Indeed it would change all functions in the lib*.py files.
It seems to me it's not worth the effort in this case. All functions are there
to check text files. The only case we can have non-ascii/non-utf-8 files being
checked by the script are for patch files when the file to be patched is not
ascii or utf-8. We have currently one case in the tree right now.

[snip]
>> +++ b/utils/check-package
>> @@ -1,11 +1,13 @@
>>  #!/usr/bin/env python
>>  # See utils/checkpackagelib/readme.txt before editing this file.
>>  
>> +from __future__ import absolute_import
> 
>  This doesn't make sense in a top-level script, does it?

I am not sure in the general sense. I am still trying to understand what it
really does. It's not easy to get a docker with Python 2.5 to test (the official
docker images from python start from 2.7). I will probably fallback to setup a
VM.

But it is certainly not needed here. It was a result from the automatic stuff.
I will remove it. Actually from everywhere.

[snip]
>> +++ b/utils/checkpackagelib/lib.py
>> @@ -1,6 +1,7 @@
>>  # See utils/checkpackagelib/readme.txt before editing this file.
>>  
>> -from base import _CheckFunction
>> +from __future__ import absolute_import
>> +from .base import _CheckFunction
> 
>  Why is this changed? Will Python3 look for base anywhere else than in the
> checkpackagelib directory?

Python3 follows PEP328 and dropped implicit relative imports.

But maybe a better approach is to use the absolute import:
from checkpackagelib.base import _CheckFunction

I will do this, everywhere, if you agree.

> 
>  Same below.
> 
[snip]
>> @@ -45,7 +46,7 @@ class AttributesOrder(_CheckFunction):
>>          if attribute in entries_that_should_not_be_indented:
>>              self.state = 0
>>              return
>> -        if attribute not in self.attributes_order_convention.keys():
>> +        if attribute not in list(self.attributes_order_convention.keys()):
> 
>  Why is this needed? 'not in' works on an iterator, doesn't it?

You are right. It does work. It seems modernize adds it always, when it is
needed (when a index will be used in the result for example) and when it is not
needed.

So I probably won't use the 'hash' transformation from modernize anymore, it is
enabled by default.

> 
>  Same below.
> 
>  Bottom line: I don't think modernize does anything useful.

For this script I must agree.

Good examples (not applicable to this script) seem to be the conversion of
print, exception, and urllib2.


Regards,
Ricardo
Arnout Vandecappelle July 26, 2018, 8:05 a.m. UTC | #3
On 26-07-18 05:21, Ricardo Martincoski wrote:
> Hello,
> 
> Thank you for your review.
> I don't usually write Python3 code, and I still beginning to use modernize.
> Sorry I did the newbie mistake of trusting too much in the defaults of the tool.
> 
> On Tue, Jul 24, 2018 at 05:54 AM, Arnout Vandecappelle wrote:
> 
>> On 23-07-18 02:41, Ricardo Martincoski wrote:
[snip]
>>> +from __future__ import absolute_import
>>
>>  This doesn't make sense in a top-level script, does it?
> 
> I am not sure in the general sense. I am still trying to understand what it
> really does. 

 So am I. As I understand it, in modern Python, for scripts that are part of a
package, the import path is relative to the top level of the package rather than
the directory of the module itself. For scripts outside of a package, nothing
has changed AFAIK.

> It's not easy to get a docker with Python 2.5 to test (the official
> docker images from python start from 2.7).

 Since we're targetting RedHat 6 here, Python 2.6 should be sufficient, no?

> I will probably fallback to setup a VM.
> 
> But it is certainly not needed here. It was a result from the automatic stuff.
> I will remove it. Actually from everywhere.
> 
> [snip]
>>> +++ b/utils/checkpackagelib/lib.py
>>> @@ -1,6 +1,7 @@
>>>  # See utils/checkpackagelib/readme.txt before editing this file.
>>>  
>>> -from base import _CheckFunction
>>> +from __future__ import absolute_import
>>> +from .base import _CheckFunction
>>
>>  Why is this changed? Will Python3 look for base anywhere else than in the
>> checkpackagelib directory?
> 
> Python3 follows PEP328 and dropped implicit relative imports.
> 
> But maybe a better approach is to use the absolute import:
> from checkpackagelib.base import _CheckFunction
> 
> I will do this, everywhere, if you agree.

 Yes, the .xxx approach only makes sense in cases where you want a module in the
package be callable directly as a script.

[snip]
> Good examples (not applicable to this script) seem to be the conversion of
> print, exception, and urllib2.

 print() was already handled before. We're lacking tests of python3 compliance
of this script to make sure there are no regression. Heck, we're lacking tests
of this script, period.

 Regards,
 Arnout
diff mbox series

Patch

diff --git a/utils/check-package b/utils/check-package
index 3dbc28b0a2..a8f5ce6157 100755
--- a/utils/check-package
+++ b/utils/check-package
@@ -1,11 +1,13 @@ 
 #!/usr/bin/env python
 # See utils/checkpackagelib/readme.txt before editing this file.
 
+from __future__ import absolute_import
 from __future__ import print_function
 import argparse
 import inspect
 import os
 import re
+import six
 import sys
 
 import checkpackagelib.lib_config
@@ -127,10 +129,15 @@  def check_file_using_lib(fname):
 
     for cf in objects:
         nwarnings += print_warnings(cf.before())
-    for lineno, text in enumerate(open(fname, "r").readlines()):
+    if six.PY3:
+        f = open(fname, "r", errors="surrogateescape")
+    else:
+        f = open(fname, "r")
+    for lineno, text in enumerate(f.readlines()):
         nlines += 1
         for cf in objects:
             nwarnings += print_warnings(cf.check_line(lineno + 1, text))
+    f.close()
     for cf in objects:
         nwarnings += print_warnings(cf.after())
 
diff --git a/utils/checkpackagelib/lib.py b/utils/checkpackagelib/lib.py
index 91f4ad49b7..61fa46125d 100644
--- a/utils/checkpackagelib/lib.py
+++ b/utils/checkpackagelib/lib.py
@@ -1,6 +1,7 @@ 
 # See utils/checkpackagelib/readme.txt before editing this file.
 
-from base import _CheckFunction
+from __future__ import absolute_import
+from .base import _CheckFunction
 
 
 class ConsecutiveEmptyLines(_CheckFunction):
diff --git a/utils/checkpackagelib/lib_config.py b/utils/checkpackagelib/lib_config.py
index 1d273f1c5f..5f316a6d5b 100644
--- a/utils/checkpackagelib/lib_config.py
+++ b/utils/checkpackagelib/lib_config.py
@@ -3,13 +3,14 @@ 
 # "bool", so below check functions don't need to check for things already
 # checked by running "make menuconfig".
 
+from __future__ import absolute_import
 import re
 
-from base import _CheckFunction
-from lib import ConsecutiveEmptyLines  # noqa: F401
-from lib import EmptyLastLine          # noqa: F401
-from lib import NewlineAtEof           # noqa: F401
-from lib import TrailingSpace          # noqa: F401
+from .base import _CheckFunction
+from .lib import ConsecutiveEmptyLines  # noqa: F401
+from .lib import EmptyLastLine          # noqa: F401
+from .lib import NewlineAtEof           # noqa: F401
+from .lib import TrailingSpace          # noqa: F401
 
 
 def _empty_or_comment(text):
@@ -45,7 +46,7 @@  class AttributesOrder(_CheckFunction):
         if attribute in entries_that_should_not_be_indented:
             self.state = 0
             return
-        if attribute not in self.attributes_order_convention.keys():
+        if attribute not in list(self.attributes_order_convention.keys()):
             return
         new_state = self.attributes_order_convention[attribute]
         wrong_order = self.state > new_state
diff --git a/utils/checkpackagelib/lib_hash.py b/utils/checkpackagelib/lib_hash.py
index 6d4cc9fd62..8ca92a5a90 100644
--- a/utils/checkpackagelib/lib_hash.py
+++ b/utils/checkpackagelib/lib_hash.py
@@ -3,13 +3,14 @@ 
 # functions don't need to check for things already checked by running
 # "make package-dirclean package-source".
 
+from __future__ import absolute_import
 import re
 
-from base import _CheckFunction
-from lib import ConsecutiveEmptyLines  # noqa: F401
-from lib import EmptyLastLine          # noqa: F401
-from lib import NewlineAtEof           # noqa: F401
-from lib import TrailingSpace          # noqa: F401
+from .base import _CheckFunction
+from .lib import ConsecutiveEmptyLines  # noqa: F401
+from .lib import EmptyLastLine          # noqa: F401
+from .lib import NewlineAtEof           # noqa: F401
+from .lib import TrailingSpace          # noqa: F401
 
 
 def _empty_line_or_comment(text):
@@ -43,7 +44,7 @@  class HashType(_CheckFunction):
         htype, hexa = fields[:2]
         if htype == "none":
             return
-        if htype not in self.len_of_hash.keys():
+        if htype not in list(self.len_of_hash.keys()):
             return ["{}:{}: unexpected type of hash ({}#adding-packages-hash)"
                     .format(self.filename, lineno, self.url_to_manual),
                     text]
diff --git a/utils/checkpackagelib/lib_mk.py b/utils/checkpackagelib/lib_mk.py
index 86e9aa2d97..9c17a3cfd2 100644
--- a/utils/checkpackagelib/lib_mk.py
+++ b/utils/checkpackagelib/lib_mk.py
@@ -4,13 +4,14 @@ 
 # menu options using "make menuconfig" and by running "make" with appropriate
 # packages enabled.
 
+from __future__ import absolute_import
 import re
 
-from base import _CheckFunction
-from lib import ConsecutiveEmptyLines  # noqa: F401
-from lib import EmptyLastLine          # noqa: F401
-from lib import NewlineAtEof           # noqa: F401
-from lib import TrailingSpace          # noqa: F401
+from .base import _CheckFunction
+from .lib import ConsecutiveEmptyLines  # noqa: F401
+from .lib import EmptyLastLine          # noqa: F401
+from .lib import NewlineAtEof           # noqa: F401
+from .lib import TrailingSpace          # noqa: F401
 
 
 class Indent(_CheckFunction):
diff --git a/utils/checkpackagelib/lib_patch.py b/utils/checkpackagelib/lib_patch.py
index 555621afa1..4b0d32268f 100644
--- a/utils/checkpackagelib/lib_patch.py
+++ b/utils/checkpackagelib/lib_patch.py
@@ -3,10 +3,11 @@ 
 # functions don't need to check for things already checked by running
 # "make package-dirclean package-patch".
 
+from __future__ import absolute_import
 import re
 
-from base import _CheckFunction
-from lib import NewlineAtEof           # noqa: F401
+from .base import _CheckFunction
+from .lib import NewlineAtEof           # noqa: F401
 
 
 class ApplyOrder(_CheckFunction):