diff mbox series

[16/25] binman: Avoid reporting image-pos with compression

Message ID 20201019024138.3804540-16-sjg@chromium.org
State Superseded
Delegated to: Simon Glass
Headers show
Series binman: Support compression of sections | expand

Commit Message

Simon Glass Oct. 19, 2020, 2:41 a.m. UTC
When a section is compressed, all entries within it are grouped together
into a compressed block of data. This obscures the start of each
individual child entry.

Avoid reporting bogus 'image-pos' properties in this case, since it is
not possible to access the entry at the location provided. The entire
section must be decompressed first.

CBFS does not support compressing whole sections, only individual files,
so needs no special handling here.

Signed-off-by: Simon Glass <sjg@chromium.org>
---

 tools/binman/control.py       |  2 +-
 tools/binman/entry.py         | 18 ++++++++++++++----
 tools/binman/etype/cbfs.py    |  6 +++---
 tools/binman/etype/section.py | 13 ++++++++-----
 4 files changed, 26 insertions(+), 13 deletions(-)

Comments

Alper Nebi Yasak Oct. 19, 2020, 9:15 p.m. UTC | #1
On 19/10/2020 05:41, Simon Glass wrote:
> When a section is compressed, all entries within it are grouped together
> into a compressed block of data. This obscures the start of each
> individual child entry.
> 
> Avoid reporting bogus 'image-pos' properties in this case, since it is
> not possible to access the entry at the location provided. The entire
> section must be decompressed first.
> 
> CBFS does not support compressing whole sections, only individual files,
> so needs no special handling here.
> 
> Signed-off-by: Simon Glass <sjg@chromium.org>
> ---

Maybe instead of this, 'image-pos' could be overloaded to a list of
recursive relative positions within compressed archives? Something like:

    section {
        offset = <1000>;
        compress = "lz4";
        /* image-pos = <1000>; */

        blob {
            filename = "foo";
            offset = <100>;
            /* image-pos = <1000>, <100>; */
        };
    };

    cbfs {
        offset = <2000>;
        /* image-pos = <2000>; */

        blob {
            filename = "bar";
            cbfs-compress = "lz4";
            cbfs-offset = <200>;
            /* image-pos = <2200>, <0>; */
        };

        blob2 {
            filename = "baz";
            cbfs-offset = <800>;
            /* image-pos = <2800>; */
        };
    };

>  tools/binman/control.py       |  2 +-
>  tools/binman/entry.py         | 18 ++++++++++++++----
>  tools/binman/etype/cbfs.py    |  6 +++---
>  tools/binman/etype/section.py | 13 ++++++++-----
>  4 files changed, 26 insertions(+), 13 deletions(-)
> 
> diff --git a/tools/binman/control.py b/tools/binman/control.py
> index ee5771e7292..26f1cf462ec 100644
> --- a/tools/binman/control.py
> +++ b/tools/binman/control.py
> @@ -462,7 +462,7 @@ def PrepareImagesAndDtbs(dtb_fname, select_images, update_fdt):
>      for image in images.values():
>          image.ExpandEntries()
>          if update_fdt:
> -            image.AddMissingProperties()
> +            image.AddMissingProperties(True)
>          image.ProcessFdt(dtb)
>  
>      for dtb_item in state.GetAllFdts():
> diff --git a/tools/binman/entry.py b/tools/binman/entry.py
> index 01a5fde84ed..8fa1dcef2da 100644
> --- a/tools/binman/entry.py
> +++ b/tools/binman/entry.py
> @@ -213,11 +213,20 @@ class Entry(object):
>      def ExpandEntries(self):
>          pass
>  
> -    def AddMissingProperties(self):
> -        """Add new properties to the device tree as needed for this entry"""
> -        for prop in ['offset', 'size', 'image-pos']:
> +    def AddMissingProperties(self, have_image_pos):
> +        """Add new properties to the device tree as needed for this entry
> +
> +        Args:
> +            have_image_pos: True if this entry has an image position. This can
> +                be False if its parent section is compressed, since compression
> +                groups all entries together into a compressed block of data,
> +                obscuring the start of each individual child entry
> +        """
> +        for prop in ['offset', 'size']:
>              if not prop in self._node.props:
>                  state.AddZeroProp(self._node, prop)
> +        if have_image_pos and 'image-pos' not in self._node.props:
> +            state.AddZeroProp(self._node, 'image-pos')
>          if self.GetImage().allow_repack:
>              if self.orig_offset is not None:
>                  state.AddZeroProp(self._node, 'orig-offset', True)
> @@ -235,7 +244,8 @@ class Entry(object):
>          state.SetInt(self._node, 'offset', self.offset)
>          state.SetInt(self._node, 'size', self.size)
>          base = self.section.GetRootSkipAtStart() if self.section else 0
> -        state.SetInt(self._node, 'image-pos', self.image_pos - base)
> +        if self.image_pos is not None:
> +            state.SetInt(self._node, 'image-pos', self.image_pos)
>          if self.GetImage().allow_repack:
>              if self.orig_offset is not None:
>                  state.SetInt(self._node, 'orig-offset', self.orig_offset, True)
> diff --git a/tools/binman/etype/cbfs.py b/tools/binman/etype/cbfs.py
> index 650ab2c292f..6cdbaa085f5 100644
> --- a/tools/binman/etype/cbfs.py
> +++ b/tools/binman/etype/cbfs.py
> @@ -237,10 +237,10 @@ class Entry_cbfs(Entry):
>              if entry._cbfs_compress:
>                  entry.uncomp_size = cfile.memlen
>  
> -    def AddMissingProperties(self):
> -        super().AddMissingProperties()
> +    def AddMissingProperties(self, have_image_pos):
> +        super().AddMissingProperties(have_image_pos)
>          for entry in self._cbfs_entries.values():
> -            entry.AddMissingProperties()
> +            entry.AddMissingProperties(have_image_pos)
>              if entry._cbfs_compress:
>                  state.AddZeroProp(entry._node, 'uncomp-size')
>                  # Store the 'compress' property, since we don't look at
> diff --git a/tools/binman/etype/section.py b/tools/binman/etype/section.py
> index 6e6f6749727..2812989ba1a 100644
> --- a/tools/binman/etype/section.py
> +++ b/tools/binman/etype/section.py
> @@ -136,11 +136,13 @@ class Entry_section(Entry):
>          for entry in self._entries.values():
>              entry.ExpandEntries()
>  
> -    def AddMissingProperties(self):
> +    def AddMissingProperties(self, have_image_pos):
>          """Add new properties to the device tree as needed for this entry"""
> -        super().AddMissingProperties()
> +        super().AddMissingProperties(have_image_pos)
> +        if self.compress != 'none':
> +            have_image_pos = False
>          for entry in self._entries.values():
> -            entry.AddMissingProperties()
> +            entry.AddMissingProperties(have_image_pos)
>  
>      def ObtainContents(self):
>          return self.GetEntryContents()
> @@ -323,8 +325,9 @@ class Entry_section(Entry):
>  
>      def SetImagePos(self, image_pos):
>          super().SetImagePos(image_pos)
> -        for entry in self._entries.values():
> -            entry.SetImagePos(image_pos + self.offset)
> +        if self.compress == 'none':
> +            for entry in self._entries.values():
> +                entry.SetImagePos(image_pos + self.offset)
>  
>      def ProcessContents(self):
>          sizes_ok_base = super(Entry_section, self).ProcessContents()
>
Simon Glass Oct. 26, 2020, 7:22 p.m. UTC | #2
Hi Alper,

On Mon, 19 Oct 2020 at 15:29, Alper Nebi Yasak <alpernebiyasak@gmail.com> wrote:
>
> On 19/10/2020 05:41, Simon Glass wrote:
> > When a section is compressed, all entries within it are grouped together
> > into a compressed block of data. This obscures the start of each
> > individual child entry.
> >
> > Avoid reporting bogus 'image-pos' properties in this case, since it is
> > not possible to access the entry at the location provided. The entire
> > section must be decompressed first.
> >
> > CBFS does not support compressing whole sections, only individual files,
> > so needs no special handling here.
> >
> > Signed-off-by: Simon Glass <sjg@chromium.org>
> > ---
>
> Maybe instead of this, 'image-pos' could be overloaded to a list of
> recursive relative positions within compressed archives? Something like:
>
>     section {
>         offset = <1000>;
>         compress = "lz4";
>         /* image-pos = <1000>; */
>
>         blob {
>             filename = "foo";
>             offset = <100>;
>             /* image-pos = <1000>, <100>; */
>         };
>     };
>
>     cbfs {
>         offset = <2000>;
>         /* image-pos = <2000>; */
>
>         blob {
>             filename = "bar";
>             cbfs-compress = "lz4";
>             cbfs-offset = <200>;
>             /* image-pos = <2200>, <0>; */
>         };
>
>         blob2 {
>             filename = "baz";
>             cbfs-offset = <800>;
>             /* image-pos = <2800>; */
>         };
>     };
>

What do these values actually mean though? What would we use them for?

Note that CBFS compresses its data on a per-entry basis so the
image-pos is actually supported.

[..]

Regards,
Simon
Alper Nebi Yasak Oct. 26, 2020, 10:25 p.m. UTC | #3
On 26/10/2020 22:22, Simon Glass wrote:
> Hi Alper,
> 
> On Mon, 19 Oct 2020 at 15:29, Alper Nebi Yasak <alpernebiyasak@gmail.com> wrote:
>>
>> On 19/10/2020 05:41, Simon Glass wrote:
>>> When a section is compressed, all entries within it are grouped together
>>> into a compressed block of data. This obscures the start of each
>>> individual child entry.
>>>
>>> Avoid reporting bogus 'image-pos' properties in this case, since it is
>>> not possible to access the entry at the location provided. The entire
>>> section must be decompressed first.
>>>
>>> CBFS does not support compressing whole sections, only individual files,
>>> so needs no special handling here.
>>>
>>> Signed-off-by: Simon Glass <sjg@chromium.org>
>>> ---
>>
>> Maybe instead of this, 'image-pos' could be overloaded to a list of
>> recursive relative positions within compressed archives? Something like:
>>
>>     section {
>>         offset = <1000>;
>>         compress = "lz4";
>>         /* image-pos = <1000>; */
>>
>>         blob {
>>             filename = "foo";
>>             offset = <100>;
>>             /* image-pos = <1000>, <100>; */
>>         };
>>     };
>>
>>     cbfs {
>>         offset = <2000>;
>>         /* image-pos = <2000>; */
>>
>>         blob {
>>             filename = "bar";
>>             cbfs-compress = "lz4";
>>             cbfs-offset = <200>;
>>             /* image-pos = <2200>, <0>; */
>>         };
>>
>>         blob2 {
>>             filename = "baz";
>>             cbfs-offset = <800>;
>>             /* image-pos = <2800>; */
>>         };
>>     };
>>
> 
> What do these values actually mean though? What would we use them for?

What I meant is using pairs of <position-of-compressed-archive>,
<position-in-uncompressed-data> to avoid losing position information of
compressed entries, but honestly I'm not sure if any of this will be
necessary. I think the single use would be to avoid parsing uncompressed
data containing multiple entries to identify what is where.

To get to an entry in a compressed section we could get "size" bytes of
data starting from "image-pos[0]", decompress that to somewhere, then
get "uncomp-size" bytes of data starting from decompression address +
"image-pos[1]".  I think it can even be extended to multiple layers of
compression (<pos-of-x>, <pos-of-y-in-d(x)>, <pos-of-z-in-d(y)>, ...).

> Note that CBFS compresses its data on a per-entry basis so the
> image-pos is actually supported.

On the compressed cbfs example, I assume the data we currently get at
image-pos = <2200> is compressed data and still needs to be decompressed
for it to be useable (technically bogus?). If so, I'd go a level deeper
by adding a <0> indicating the start of the uncompressed data is the
actual position of the entry contents.
Simon Glass Oct. 30, 2020, 6:15 p.m. UTC | #4
Hi Alper,

On Mon, 26 Oct 2020 at 17:20, Alper Nebi Yasak <alpernebiyasak@gmail.com> wrote:
>
> On 26/10/2020 22:22, Simon Glass wrote:
> > Hi Alper,
> >
> > On Mon, 19 Oct 2020 at 15:29, Alper Nebi Yasak <alpernebiyasak@gmail.com> wrote:
> >>
> >> On 19/10/2020 05:41, Simon Glass wrote:
> >>> When a section is compressed, all entries within it are grouped together
> >>> into a compressed block of data. This obscures the start of each
> >>> individual child entry.
> >>>
> >>> Avoid reporting bogus 'image-pos' properties in this case, since it is
> >>> not possible to access the entry at the location provided. The entire
> >>> section must be decompressed first.
> >>>
> >>> CBFS does not support compressing whole sections, only individual files,
> >>> so needs no special handling here.
> >>>
> >>> Signed-off-by: Simon Glass <sjg@chromium.org>
> >>> ---
> >>
> >> Maybe instead of this, 'image-pos' could be overloaded to a list of
> >> recursive relative positions within compressed archives? Something like:
> >>
> >>     section {
> >>         offset = <1000>;
> >>         compress = "lz4";
> >>         /* image-pos = <1000>; */
> >>
> >>         blob {
> >>             filename = "foo";
> >>             offset = <100>;
> >>             /* image-pos = <1000>, <100>; */
> >>         };
> >>     };
> >>
> >>     cbfs {
> >>         offset = <2000>;
> >>         /* image-pos = <2000>; */
> >>
> >>         blob {
> >>             filename = "bar";
> >>             cbfs-compress = "lz4";
> >>             cbfs-offset = <200>;
> >>             /* image-pos = <2200>, <0>; */
> >>         };
> >>
> >>         blob2 {
> >>             filename = "baz";
> >>             cbfs-offset = <800>;
> >>             /* image-pos = <2800>; */
> >>         };
> >>     };
> >>
> >
> > What do these values actually mean though? What would we use them for?
>
> What I meant is using pairs of <position-of-compressed-archive>,
> <position-in-uncompressed-data> to avoid losing position information of
> compressed entries, but honestly I'm not sure if any of this will be
> necessary. I think the single use would be to avoid parsing uncompressed
> data containing multiple entries to identify what is where.
>
> To get to an entry in a compressed section we could get "size" bytes of
> data starting from "image-pos[0]", decompress that to somewhere, then
> get "uncomp-size" bytes of data starting from decompression address +
> "image-pos[1]".  I think it can even be extended to multiple layers of
> compression (<pos-of-x>, <pos-of-y-in-d(x)>, <pos-of-z-in-d(y)>, ...).
>
> > Note that CBFS compresses its data on a per-entry basis so the
> > image-pos is actually supported.
>
> On the compressed cbfs example, I assume the data we currently get at
> image-pos = <2200> is compressed data and still needs to be decompressed
> for it to be useable (technically bogus?). If so, I'd go a level deeper
> by adding a <0> indicating the start of the uncompressed data is the
> actual position of the entry contents.

For the CBFS case there isn't anything needed I think.

We already have an offset field which tells you where something is in
the compressed data. For the case where a compressed section has just
other entries in it, the offset is enough.

For the case where the compressed section has other uncompressed
sections, we need to add together the offsets (uncompressed section +
its entry) to get to the compressed location. It certainly has some
benefits.

But I wonder if instead we should have a properly like comp-pos that
tells you the absolute offset within the compressed section? I'm not a
big fan of making image-pos a multi-cell value.

Regards,
Simon
Alper Nebi Yasak Nov. 4, 2020, 7:45 p.m. UTC | #5
On 30/10/2020 21:15, Simon Glass wrote:
> Hi Alper,
> 
> On Mon, 26 Oct 2020 at 17:20, Alper Nebi Yasak <alpernebiyasak@gmail.com> wrote:
>> What I meant is using pairs of <position-of-compressed-archive>,
>> <position-in-uncompressed-data> to avoid losing position information of
>> compressed entries, but honestly I'm not sure if any of this will be
>> necessary. I think the single use would be to avoid parsing uncompressed
>> data containing multiple entries to identify what is where.
> 
> For the CBFS case there isn't anything needed I think.
> 
> We already have an offset field which tells you where something is in
> the compressed data. For the case where a compressed section has just
> other entries in it, the offset is enough.

That completely slipped my mind. I haven't got a good grasp on all of
this yet and it shows :)

So it's possible to just traverse down the tree, keep adding offsets,
decompress any encountered compressed entry, then keep adding offsets
onto where it's decompressed, and so on.

> For the case where the compressed section has other uncompressed
> sections, we need to add together the offsets (uncompressed section +
> its entry) to get to the compressed location. It certainly has some
> benefits.
> 
> But I wonder if instead we should have a properly like comp-pos that
> tells you the absolute offset within the compressed section? I'm not a
> big fan of making image-pos a multi-cell value.

Then, image-pos is by definition the sum of offsets from the topmost
parent (image) downto the entry? If so, that comp-pos would be the same
thing but from the closest compressed parent (or simply only support one
layer of compression).

I can't think of anything specific that would need this anyway, but I
guess one compression should be enough for most if not all cases.
diff mbox series

Patch

diff --git a/tools/binman/control.py b/tools/binman/control.py
index ee5771e7292..26f1cf462ec 100644
--- a/tools/binman/control.py
+++ b/tools/binman/control.py
@@ -462,7 +462,7 @@  def PrepareImagesAndDtbs(dtb_fname, select_images, update_fdt):
     for image in images.values():
         image.ExpandEntries()
         if update_fdt:
-            image.AddMissingProperties()
+            image.AddMissingProperties(True)
         image.ProcessFdt(dtb)
 
     for dtb_item in state.GetAllFdts():
diff --git a/tools/binman/entry.py b/tools/binman/entry.py
index 01a5fde84ed..8fa1dcef2da 100644
--- a/tools/binman/entry.py
+++ b/tools/binman/entry.py
@@ -213,11 +213,20 @@  class Entry(object):
     def ExpandEntries(self):
         pass
 
-    def AddMissingProperties(self):
-        """Add new properties to the device tree as needed for this entry"""
-        for prop in ['offset', 'size', 'image-pos']:
+    def AddMissingProperties(self, have_image_pos):
+        """Add new properties to the device tree as needed for this entry
+
+        Args:
+            have_image_pos: True if this entry has an image position. This can
+                be False if its parent section is compressed, since compression
+                groups all entries together into a compressed block of data,
+                obscuring the start of each individual child entry
+        """
+        for prop in ['offset', 'size']:
             if not prop in self._node.props:
                 state.AddZeroProp(self._node, prop)
+        if have_image_pos and 'image-pos' not in self._node.props:
+            state.AddZeroProp(self._node, 'image-pos')
         if self.GetImage().allow_repack:
             if self.orig_offset is not None:
                 state.AddZeroProp(self._node, 'orig-offset', True)
@@ -235,7 +244,8 @@  class Entry(object):
         state.SetInt(self._node, 'offset', self.offset)
         state.SetInt(self._node, 'size', self.size)
         base = self.section.GetRootSkipAtStart() if self.section else 0
-        state.SetInt(self._node, 'image-pos', self.image_pos - base)
+        if self.image_pos is not None:
+            state.SetInt(self._node, 'image-pos', self.image_pos)
         if self.GetImage().allow_repack:
             if self.orig_offset is not None:
                 state.SetInt(self._node, 'orig-offset', self.orig_offset, True)
diff --git a/tools/binman/etype/cbfs.py b/tools/binman/etype/cbfs.py
index 650ab2c292f..6cdbaa085f5 100644
--- a/tools/binman/etype/cbfs.py
+++ b/tools/binman/etype/cbfs.py
@@ -237,10 +237,10 @@  class Entry_cbfs(Entry):
             if entry._cbfs_compress:
                 entry.uncomp_size = cfile.memlen
 
-    def AddMissingProperties(self):
-        super().AddMissingProperties()
+    def AddMissingProperties(self, have_image_pos):
+        super().AddMissingProperties(have_image_pos)
         for entry in self._cbfs_entries.values():
-            entry.AddMissingProperties()
+            entry.AddMissingProperties(have_image_pos)
             if entry._cbfs_compress:
                 state.AddZeroProp(entry._node, 'uncomp-size')
                 # Store the 'compress' property, since we don't look at
diff --git a/tools/binman/etype/section.py b/tools/binman/etype/section.py
index 6e6f6749727..2812989ba1a 100644
--- a/tools/binman/etype/section.py
+++ b/tools/binman/etype/section.py
@@ -136,11 +136,13 @@  class Entry_section(Entry):
         for entry in self._entries.values():
             entry.ExpandEntries()
 
-    def AddMissingProperties(self):
+    def AddMissingProperties(self, have_image_pos):
         """Add new properties to the device tree as needed for this entry"""
-        super().AddMissingProperties()
+        super().AddMissingProperties(have_image_pos)
+        if self.compress != 'none':
+            have_image_pos = False
         for entry in self._entries.values():
-            entry.AddMissingProperties()
+            entry.AddMissingProperties(have_image_pos)
 
     def ObtainContents(self):
         return self.GetEntryContents()
@@ -323,8 +325,9 @@  class Entry_section(Entry):
 
     def SetImagePos(self, image_pos):
         super().SetImagePos(image_pos)
-        for entry in self._entries.values():
-            entry.SetImagePos(image_pos + self.offset)
+        if self.compress == 'none':
+            for entry in self._entries.values():
+                entry.SetImagePos(image_pos + self.offset)
 
     def ProcessContents(self):
         sizes_ok_base = super(Entry_section, self).ProcessContents()