diff mbox series

[v2,2/2] qapi: open files in binary mode and use explicit decoding/encoding in common.py

Message ID 20180615044042.7928-3-tamiko@43-1.org
State New
Headers show
Series Fix compilation with python-3 if en_US.UTF-8 is unavailable | expand

Commit Message

Matthias Maier June 15, 2018, 4:40 a.m. UTC
This is a different approach to fix the locale dependent encode/decode
problem in common.py utilizing the binary read/write mode [1,2] and
decode/encode with explicit UTF-8 encoding arguments [3].

This approach is preferred over the fix in commit d4e5ec877ca because it
is (a) locale independent, and (b) does not depend on the en_US.UTF_8
locale to be available.

[1] https://docs.python.org/3.6/library/stdtypes.html#bytes.decode
[2] https://docs.python.org/3.6/library/stdtypes.html#str.encode
[3] https://docs.python.org/3/howto/unicode.html

Signed-off-by: Arfrever Frehtes Taifersar Arahesis <arfrever.fta@gmail.com>
Signed-off-by: Matthias Maier <tamiko@43-1.org>
---
 scripts/qapi/common.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

Comments

Markus Armbruster June 15, 2018, 3:31 p.m. UTC | #1
Matthias Maier <tamiko@43-1.org> writes:

> This is a different approach to fix the locale dependent encode/decode
> problem in common.py utilizing the binary read/write mode [1,2] and
> decode/encode with explicit UTF-8 encoding arguments [3].

Why can't we simply pass encoding='utf-8' to open()?

> This approach is preferred over the fix in commit d4e5ec877ca because it
> is (a) locale independent, and (b) does not depend on the en_US.UTF_8
> locale to be available.
>
> [1] https://docs.python.org/3.6/library/stdtypes.html#bytes.decode
> [2] https://docs.python.org/3.6/library/stdtypes.html#str.encode
> [3] https://docs.python.org/3/howto/unicode.html
>
> Signed-off-by: Arfrever Frehtes Taifersar Arahesis <arfrever.fta@gmail.com>
> Signed-off-by: Matthias Maier <tamiko@43-1.org>
> ---
>  scripts/qapi/common.py | 11 ++++++++---
>  1 file changed, 8 insertions(+), 3 deletions(-)
>
> diff --git a/scripts/qapi/common.py b/scripts/qapi/common.py
> index 2462fc0291..44270cd703 100644
> --- a/scripts/qapi/common.py
> +++ b/scripts/qapi/common.py
> @@ -16,6 +16,7 @@ import errno
>  import os
>  import re
>  import string
> +import sys
>  from collections import OrderedDict
>  
>  builtin_types = {
> @@ -259,6 +260,8 @@ class QAPISchemaParser(object):
>          previously_included.append(os.path.abspath(fp.name))
>          self.incl_info = incl_info
>          self.src = fp.read()
> +        if sys.version_info[0] >= 3:
> +            self.src = self.src.decode("UTF-8")

If I understand 7.2.3. Standard Encodings[*] correctly, the canonical
name is "utf-8".  Let's use that.  Wait, it's the default, no need to
pass an argument.

>          if self.src == '' or self.src[-1] != '\n':
>              self.src += '\n'
>          self.cursor = 0
> @@ -340,7 +343,7 @@ class QAPISchemaParser(object):
>              return None
>  
>          try:
> -            fobj = open(incl_fname, 'r')
> +            fobj = open(incl_fname, 'rb')
>          except IOError as e:
>              raise QAPISemError(info, '%s: %s' % (e.strerror, incl_fname))
>          return QAPISchemaParser(fobj, previously_included, info)
> @@ -1492,7 +1495,7 @@ class QAPISchemaEvent(QAPISchemaEntity):
>  class QAPISchema(object):
>      def __init__(self, fname):
>          self._fname = fname
> -        parser = QAPISchemaParser(open(fname, 'r'))
> +        parser = QAPISchemaParser(open(fname, 'rb'))
>          exprs = check_exprs(parser.exprs)
>          self.docs = parser.docs
>          self._entity_list = []
> @@ -2006,9 +2009,11 @@ class QAPIGen(object):
>                  if e.errno != errno.EEXIST:
>                      raise
>          fd = os.open(pathname, os.O_RDWR | os.O_CREAT, 0o666)
> -        f = os.fdopen(fd, 'r+')
> +        f = os.fdopen(fd, 'r+b')
>          text = (self._top(fname) + self._preamble + self._body
>                  + self._bottom(fname))
> +        if sys.version_info[0] >= 3:
> +            text = text.encode("UTF-8")

Likewise.

>          oldtext = f.read(len(text) + 1)
>          if text != oldtext:
>              f.seek(0)

[*] https://docs.python.org/3/library/codecs.html#standard-encodings
Matthias Maier June 15, 2018, 9:55 p.m. UTC | #2
On Fri, Jun 15, 2018, at 10:31 CDT, Markus Armbruster <armbru@redhat.com> wrote:

> If I understand 7.2.3. Standard Encodings[*] correctly, the canonical
> name is "utf-8".  Let's use that.  Wait, it's the default, no need to
> pass an argument.

Roger. I will change this in v3.
diff mbox series

Patch

diff --git a/scripts/qapi/common.py b/scripts/qapi/common.py
index 2462fc0291..44270cd703 100644
--- a/scripts/qapi/common.py
+++ b/scripts/qapi/common.py
@@ -16,6 +16,7 @@  import errno
 import os
 import re
 import string
+import sys
 from collections import OrderedDict
 
 builtin_types = {
@@ -259,6 +260,8 @@  class QAPISchemaParser(object):
         previously_included.append(os.path.abspath(fp.name))
         self.incl_info = incl_info
         self.src = fp.read()
+        if sys.version_info[0] >= 3:
+            self.src = self.src.decode("UTF-8")
         if self.src == '' or self.src[-1] != '\n':
             self.src += '\n'
         self.cursor = 0
@@ -340,7 +343,7 @@  class QAPISchemaParser(object):
             return None
 
         try:
-            fobj = open(incl_fname, 'r')
+            fobj = open(incl_fname, 'rb')
         except IOError as e:
             raise QAPISemError(info, '%s: %s' % (e.strerror, incl_fname))
         return QAPISchemaParser(fobj, previously_included, info)
@@ -1492,7 +1495,7 @@  class QAPISchemaEvent(QAPISchemaEntity):
 class QAPISchema(object):
     def __init__(self, fname):
         self._fname = fname
-        parser = QAPISchemaParser(open(fname, 'r'))
+        parser = QAPISchemaParser(open(fname, 'rb'))
         exprs = check_exprs(parser.exprs)
         self.docs = parser.docs
         self._entity_list = []
@@ -2006,9 +2009,11 @@  class QAPIGen(object):
                 if e.errno != errno.EEXIST:
                     raise
         fd = os.open(pathname, os.O_RDWR | os.O_CREAT, 0o666)
-        f = os.fdopen(fd, 'r+')
+        f = os.fdopen(fd, 'r+b')
         text = (self._top(fname) + self._preamble + self._body
                 + self._bottom(fname))
+        if sys.version_info[0] >= 3:
+            text = text.encode("UTF-8")
         oldtext = f.read(len(text) + 1)
         if text != oldtext:
             f.seek(0)