diff mbox series

[4/4] buildman: Use bytes for the environment

Message ID 20210411162720.4.I7de274bda337a2f2c3cb952c78aeda3f2155d521@changeid
State Accepted
Delegated to: Simon Glass
Headers show
Series buildman: Deal with unicode errors and thread exceptions | expand

Commit Message

Simon Glass April 11, 2021, 4:27 a.m. UTC
At present we sometimes see problems in gitlab where the environment has
0x80 characters or sequences which are not valid UTF-8.

Avoid this by using bytes for the environment, both internal to buildman
and when writing out the 'env' file. Add a test to make sure this works
as expected.

Reported-by: Marek Vasut <marex@denx.de>
Fixes: e5fc79ea718 ("buildman: Write the environment out to an 'env' file")
Signed-off-by: Simon Glass <sjg@chromium.org>
---

 tools/buildman/builderthread.py |  5 ++---
 tools/buildman/func_test.py     | 12 ++++++++++++
 tools/buildman/toolchain.py     | 24 ++++++++++++++++--------
 3 files changed, 30 insertions(+), 11 deletions(-)

Comments

Simon Glass April 29, 2021, 4:03 p.m. UTC | #1
At present we sometimes see problems in gitlab where the environment has
0x80 characters or sequences which are not valid UTF-8.

Avoid this by using bytes for the environment, both internal to buildman
and when writing out the 'env' file. Add a test to make sure this works
as expected.

Reported-by: Marek Vasut <marex@denx.de>
Fixes: e5fc79ea718 ("buildman: Write the environment out to an 'env' file")
Signed-off-by: Simon Glass <sjg@chromium.org>
---

 tools/buildman/builderthread.py |  5 ++---
 tools/buildman/func_test.py     | 12 ++++++++++++
 tools/buildman/toolchain.py     | 24 ++++++++++++++++--------
 3 files changed, 30 insertions(+), 11 deletions(-)

Applied to u-boot-dm, thanks!
diff mbox series

Patch

diff --git a/tools/buildman/builderthread.py b/tools/buildman/builderthread.py
index ddb3eab8c03..48128cf6732 100644
--- a/tools/buildman/builderthread.py
+++ b/tools/buildman/builderthread.py
@@ -351,10 +351,9 @@  class BuilderThread(threading.Thread):
 
             # Write out the image and function size information and an objdump
             env = result.toolchain.MakeEnvironment(self.builder.full_path)
-            with open(os.path.join(build_dir, 'out-env'), 'w',
-                      encoding='utf-8') as fd:
+            with open(os.path.join(build_dir, 'out-env'), 'wb') as fd:
                 for var in sorted(env.keys()):
-                    print('%s="%s"' % (var, env[var]), file=fd)
+                    fd.write(b'%s="%s"' % (var, env[var]))
             lines = []
             for fname in BASE_ELF_FILENAMES:
                 cmd = ['%snm' % self.toolchain.cross, '--size-sort', fname]
diff --git a/tools/buildman/func_test.py b/tools/buildman/func_test.py
index 61e3012d2b1..7edbee0652f 100644
--- a/tools/buildman/func_test.py
+++ b/tools/buildman/func_test.py
@@ -572,6 +572,18 @@  class TestFunctional(unittest.TestCase):
         self.assertTrue(os.path.exists(os.path.join(board0_dir, 'done')))
         self.assertTrue(os.path.exists(os.path.join(board0_dir, 'out-env')))
 
+    def testEnvironmentUnicode(self):
+        """Test there are no unicode errors when the env has non-ASCII chars"""
+        try:
+            varname = b'buildman_test_var'
+            os.environb[varname] = b'strange\x80chars'
+            self.assertEqual(0, self._RunControl('-o', self._output_dir))
+            board0_dir = os.path.join(self._output_dir, 'current', 'board0')
+            self.assertTrue(os.path.exists(os.path.join(board0_dir, 'done')))
+            self.assertTrue(os.path.exists(os.path.join(board0_dir, 'out-env')))
+        finally:
+            del os.environb[varname]
+
     def testWorkInOutput(self):
         """Test the -w option which should write directly to the output dir"""
         board_list = board.Boards()
diff --git a/tools/buildman/toolchain.py b/tools/buildman/toolchain.py
index acb5a29c8fb..fd137f7300e 100644
--- a/tools/buildman/toolchain.py
+++ b/tools/buildman/toolchain.py
@@ -179,27 +179,35 @@  class Toolchain:
         output and possibly unicode encoded output of all build tools by
         adding LC_ALL=C.
 
+        Note that os.environb is used to obtain the environment, since in some
+        cases the environment many contain non-ASCII characters and we see
+        errors like:
+
+          UnicodeEncodeError: 'utf-8' codec can't encode characters in position
+             569-570: surrogates not allowed
+
         Args:
             full_path: Return the full path in CROSS_COMPILE and don't set
                 PATH
         Returns:
-            Dict containing the environemnt to use. This is based on the current
-            environment, with changes as needed to CROSS_COMPILE, PATH and
-            LC_ALL.
+            Dict containing the (bytes) environment to use. This is based on the
+            current environment, with changes as needed to CROSS_COMPILE, PATH
+            and LC_ALL.
         """
-        env = dict(os.environ)
+        env = dict(os.environb)
         wrapper = self.GetWrapper()
 
         if self.override_toolchain:
             # We'll use MakeArgs() to provide this
             pass
         elif full_path:
-            env['CROSS_COMPILE'] = wrapper + os.path.join(self.path, self.cross)
+            env[b'CROSS_COMPILE'] = tools.ToBytes(
+                wrapper + os.path.join(self.path, self.cross))
         else:
-            env['CROSS_COMPILE'] = wrapper + self.cross
-            env['PATH'] = self.path + ':' + env['PATH']
+            env[b'CROSS_COMPILE'] = tools.ToBytes(wrapper + self.cross)
+            env[b'PATH'] = tools.ToBytes(self.path) + b':' + env[b'PATH']
 
-        env['LC_ALL'] = 'C'
+        env[b'LC_ALL'] = b'C'
 
         return env