Message ID | 20180627021423.18404-7-ehabkost@redhat.com |
---|---|
State | New |
Headers | show |
Series | docker: Port to Python 3 | expand |
On 06/26/2018 09:14 PM, Eduardo Habkost wrote: > Instead of treating dockerfile contents as byte sequences, always > open dockerfiles in text mode and treat it as text. > > This is not strictly required to make the script compatible with > Python 3, but it's a simpler and safer way than opening > dockerfiles in binary mode and decoding the data data later. s/data data/data/ > > To make the code compatible with both Python 2 and 3, use > io.open(), which accepts a 'encoding' argument on both versions. How does this compare to the recent change to the QAPI generators in commit de685ae5e? Should we be trying to use similar mechanisms in both places? > > Signed-off-by: Eduardo Habkost <ehabkost@redhat.com> > --- > tests/docker/docker.py | 46 ++++++++++++++++++++++++------------------ > 1 file changed, 26 insertions(+), 20 deletions(-) > > diff --git a/tests/docker/docker.py b/tests/docker/docker.py > index f58af8e894..412a031c1c 100755 > --- a/tests/docker/docker.py > +++ b/tests/docker/docker.py > @@ -23,6 +23,7 @@ import argparse > import tempfile > import re > import signal > +import io > from tarfile import TarFile, TarInfo > from io import BytesIO > from shutil import copy, rmtree > @@ -30,7 +31,7 @@ from pwd import getpwuid > from datetime import datetime,timedelta > > try: > - from typing import List, Union, Tuple > + from typing import List, Union, Tuple, Text > except ImportError: > # needed only to make type annotations work > pass > @@ -52,13 +53,13 @@ def _fsdecode(name): > return name # type: ignore > > def _text_checksum(text): > - # type: (bytes) -> str > + # type: (Text) -> str > """Calculate a digest string unique to the text content""" > - return hashlib.sha1(text).hexdigest() > + return hashlib.sha1(text.encode('utf-8')).hexdigest() > > def _file_checksum(filename): > # type: (str) -> str > - return _text_checksum(open(filename, 'rb').read()) > + return _text_checksum(io.open(filename, 'r', encoding='utf-8').read()) > > def _guess_docker_command(): > # type: () -> List[str] > @@ -129,14 +130,14 @@ def _copy_binary_with_libs(src, dest_dir): > _copy_with_mkdir(l , dest_dir, so_path) > > def _read_qemu_dockerfile(img_name): > - # type: (str) -> str > + # type: (Text) -> str > df = os.path.join(os.path.dirname(__file__), "dockerfiles", > img_name + ".docker") > - return open(df, "r").read() > + return io.open(df, "r", encoding='utf-8').read() > > def _dockerfile_preprocess(df): > - # type: (str) -> str > - out = "" > + # type: (Text) -> Text > + out = u"" > for l in df.splitlines(): > if len(l.strip()) == 0 or l.startswith("#"): > continue > @@ -149,7 +150,7 @@ def _dockerfile_preprocess(df): > inlining = _read_qemu_dockerfile(l[len(from_pref):]) > out += _dockerfile_preprocess(inlining) > continue > - out += l + "\n" > + out += l + u"\n" > return out > > class Docker(object): > @@ -220,32 +221,37 @@ class Docker(object): > def build_image(self, > tag, # type: str > docker_dir, # type: str > - dockerfile, # type: str > + dockerfile, # type: Text > quiet=True, # type: bool > user=False, # type: bool > argv=[], # type: List[str] > extra_files_cksum=[] # List[Tuple[str, bytes]] > ): > # type(...) -> None > - tmp_df = tempfile.NamedTemporaryFile(dir=docker_dir, suffix=".docker") > + tmp_ndf = tempfile.NamedTemporaryFile(dir=docker_dir, suffix=".docker") > + # on Python 2.7, NamedTemporaryFile doesn't support encoding parameter, > + # so reopen it in text mode: > + tmp_df = io.open(tmp_ndf.name, mode='w+t', encoding='utf-8') > tmp_df.write(dockerfile) > > if user: > uid = os.getuid() > uname = getpwuid(uid).pw_name > - tmp_df.write("\n") > - tmp_df.write("RUN id %s 2>/dev/null || useradd -u %d -U %s" % > + tmp_df.write(u"\n") > + tmp_df.write(u"RUN id %s 2>/dev/null || useradd -u %d -U %s" % > (uname, uid, uname)) > > - tmp_df.write("\n") > - tmp_df.write("LABEL com.qemu.dockerfile-checksum=%s" % > - _text_checksum(_dockerfile_preprocess(dockerfile))) > + dockerfile = _dockerfile_preprocess(dockerfile) > + > + tmp_df.write(u"\n") > + tmp_df.write(u"LABEL com.qemu.dockerfile-checksum=%s" % > + _text_checksum(dockerfile)) > for f, c in extra_files_cksum: > - tmp_df.write("LABEL com.qemu.%s-checksum=%s" % (f, c)) > + tmp_df.write(u"LABEL com.qemu.%s-checksum=%s" % (f, c)) > > tmp_df.flush() > > - self._do_check(["build", "-t", tag, "-f", tmp_df.name] + argv + \ > + self._do_check(["build", "-t", tag, "-f", tmp_ndf.name] + argv + \ > [docker_dir], > quiet=quiet) > > @@ -326,7 +332,7 @@ class BuildCommand(SubCommand): > > def run(self, args, argv): > # type: (argparse.Namespace, List[str]) -> int > - dockerfile = open(args.dockerfile, "rb").read() > + dockerfile = io.open(args.dockerfile, "r", encoding='utf-8').read() > tag = args.tag > > dkr = Docker() > @@ -519,7 +525,7 @@ class CheckCommand(SubCommand): > print("Need a dockerfile for tag:%s" % (tag)) > return 1 > > - dockerfile = open(args.dockerfile, "rb").read() > + dockerfile = io.open(args.dockerfile, "r", encoding='utf-8').read() > > if dkr.image_matches_dockerfile(tag, dockerfile): > if not args.quiet: >
On Wed, Jun 27, 2018 at 07:51:01AM -0500, Eric Blake wrote: > On 06/26/2018 09:14 PM, Eduardo Habkost wrote: > > Instead of treating dockerfile contents as byte sequences, always > > open dockerfiles in text mode and treat it as text. > > > > This is not strictly required to make the script compatible with > > Python 3, but it's a simpler and safer way than opening > > dockerfiles in binary mode and decoding the data data later. > > s/data data/data/ Thanks. > > > > > To make the code compatible with both Python 2 and 3, use > > io.open(), which accepts a 'encoding' argument on both versions. > > How does this compare to the recent change to the QAPI generators in commit > de685ae5e? Should we be trying to use similar mechanisms in both places? We could do the same and use io.open(..., encoding='utf-8') unconditionally on QAPI too, but it may be harder because of its usage of insinstance() and the 'str' type[1] everywhere. One solution I considered on QAPI was using '__future__.unicode_literals' and the 'builtins.str' type from python-future, but I don't think QAPI's specific case justify adding a new build dependency on Python 2.7 hosts. Adding type annotations to the QAPI code may help us sort out the mess more easily. [1] 'str' on Python 2.7 is more similar to the 'bytes' type on Python 3, but io.open(..., encoding=...) on Python 2.7 returns 'unicode' objects (that that are more similar to the native 'str' type from Python 3).
diff --git a/tests/docker/docker.py b/tests/docker/docker.py index f58af8e894..412a031c1c 100755 --- a/tests/docker/docker.py +++ b/tests/docker/docker.py @@ -23,6 +23,7 @@ import argparse import tempfile import re import signal +import io from tarfile import TarFile, TarInfo from io import BytesIO from shutil import copy, rmtree @@ -30,7 +31,7 @@ from pwd import getpwuid from datetime import datetime,timedelta try: - from typing import List, Union, Tuple + from typing import List, Union, Tuple, Text except ImportError: # needed only to make type annotations work pass @@ -52,13 +53,13 @@ def _fsdecode(name): return name # type: ignore def _text_checksum(text): - # type: (bytes) -> str + # type: (Text) -> str """Calculate a digest string unique to the text content""" - return hashlib.sha1(text).hexdigest() + return hashlib.sha1(text.encode('utf-8')).hexdigest() def _file_checksum(filename): # type: (str) -> str - return _text_checksum(open(filename, 'rb').read()) + return _text_checksum(io.open(filename, 'r', encoding='utf-8').read()) def _guess_docker_command(): # type: () -> List[str] @@ -129,14 +130,14 @@ def _copy_binary_with_libs(src, dest_dir): _copy_with_mkdir(l , dest_dir, so_path) def _read_qemu_dockerfile(img_name): - # type: (str) -> str + # type: (Text) -> str df = os.path.join(os.path.dirname(__file__), "dockerfiles", img_name + ".docker") - return open(df, "r").read() + return io.open(df, "r", encoding='utf-8').read() def _dockerfile_preprocess(df): - # type: (str) -> str - out = "" + # type: (Text) -> Text + out = u"" for l in df.splitlines(): if len(l.strip()) == 0 or l.startswith("#"): continue @@ -149,7 +150,7 @@ def _dockerfile_preprocess(df): inlining = _read_qemu_dockerfile(l[len(from_pref):]) out += _dockerfile_preprocess(inlining) continue - out += l + "\n" + out += l + u"\n" return out class Docker(object): @@ -220,32 +221,37 @@ class Docker(object): def build_image(self, tag, # type: str docker_dir, # type: str - dockerfile, # type: str + dockerfile, # type: Text quiet=True, # type: bool user=False, # type: bool argv=[], # type: List[str] extra_files_cksum=[] # List[Tuple[str, bytes]] ): # type(...) -> None - tmp_df = tempfile.NamedTemporaryFile(dir=docker_dir, suffix=".docker") + tmp_ndf = tempfile.NamedTemporaryFile(dir=docker_dir, suffix=".docker") + # on Python 2.7, NamedTemporaryFile doesn't support encoding parameter, + # so reopen it in text mode: + tmp_df = io.open(tmp_ndf.name, mode='w+t', encoding='utf-8') tmp_df.write(dockerfile) if user: uid = os.getuid() uname = getpwuid(uid).pw_name - tmp_df.write("\n") - tmp_df.write("RUN id %s 2>/dev/null || useradd -u %d -U %s" % + tmp_df.write(u"\n") + tmp_df.write(u"RUN id %s 2>/dev/null || useradd -u %d -U %s" % (uname, uid, uname)) - tmp_df.write("\n") - tmp_df.write("LABEL com.qemu.dockerfile-checksum=%s" % - _text_checksum(_dockerfile_preprocess(dockerfile))) + dockerfile = _dockerfile_preprocess(dockerfile) + + tmp_df.write(u"\n") + tmp_df.write(u"LABEL com.qemu.dockerfile-checksum=%s" % + _text_checksum(dockerfile)) for f, c in extra_files_cksum: - tmp_df.write("LABEL com.qemu.%s-checksum=%s" % (f, c)) + tmp_df.write(u"LABEL com.qemu.%s-checksum=%s" % (f, c)) tmp_df.flush() - self._do_check(["build", "-t", tag, "-f", tmp_df.name] + argv + \ + self._do_check(["build", "-t", tag, "-f", tmp_ndf.name] + argv + \ [docker_dir], quiet=quiet) @@ -326,7 +332,7 @@ class BuildCommand(SubCommand): def run(self, args, argv): # type: (argparse.Namespace, List[str]) -> int - dockerfile = open(args.dockerfile, "rb").read() + dockerfile = io.open(args.dockerfile, "r", encoding='utf-8').read() tag = args.tag dkr = Docker() @@ -519,7 +525,7 @@ class CheckCommand(SubCommand): print("Need a dockerfile for tag:%s" % (tag)) return 1 - dockerfile = open(args.dockerfile, "rb").read() + dockerfile = io.open(args.dockerfile, "r", encoding='utf-8').read() if dkr.image_matches_dockerfile(tag, dockerfile): if not args.quiet:
Instead of treating dockerfile contents as byte sequences, always open dockerfiles in text mode and treat it as text. This is not strictly required to make the script compatible with Python 3, but it's a simpler and safer way than opening dockerfiles in binary mode and decoding the data data later. To make the code compatible with both Python 2 and 3, use io.open(), which accepts a 'encoding' argument on both versions. Signed-off-by: Eduardo Habkost <ehabkost@redhat.com> --- tests/docker/docker.py | 46 ++++++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 20 deletions(-)