diff mbox series

package/pkg-download: do not try to vendor _EXTRA_DOWNLOADS

Message ID 24588_1651771186_62740732_24588_360_1_2d4cd3ac6995d7b5119ef4dfdc22b975992b8a73.1651770627.git.yann.morin@orange.com
State Accepted
Headers show
Series package/pkg-download: do not try to vendor _EXTRA_DOWNLOADS | expand

Commit Message

Yann E. MORIN May 5, 2022, 5:19 p.m. UTC
From: "Yann E. MORIN" <yann.morin@orange.com>

For golang- or cargo-based packages, we apply a vendoring pass after the
package's "main" download is done. Whether to vendor or not is based on
the heuristic that a specific directory exists or not; for golang
packages, we look for '/vendor', while for cargo, we look for '/VENDOR'.

This is fine for the "main" (by lack of a better term) download, but
this falls flat on its face for extra downloads. Indeed, so packages may
need to download data sets, or assets, as _EXTRA_DOWNLOADS. Those are
usually just data blobs, and are not actual golang or cargo packages; as
such they do not need to be vendored, but worse, if we try to actually
vendor them, this fails because the required files for vendoring are
missing from the archives in such data sets.

We fix that by decoupling the download for the extra download, from the
download for the main archive. We pass the post-processing option only
to the main download.

This makes the hard assumption that extra downloads will never need to
be post-processed for vendoring, of course; we hope this will always be
correct in practice.

Signed-off-by: Yann E. MORIN <yann.morin@orange.com>
Cc: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
---
 package/pkg-download.mk | 3 ++-
 package/pkg-generic.mk  | 8 ++++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

Comments

Arnout Vandecappelle May 5, 2022, 8:37 p.m. UTC | #1
On 05/05/2022 19:19, yann.morin@orange.com wrote:
> From: "Yann E. MORIN" <yann.morin@orange.com>
> 
> For golang- or cargo-based packages, we apply a vendoring pass after the
> package's "main" download is done. Whether to vendor or not is based on
> the heuristic that a specific directory exists or not; for golang
> packages, we look for '/vendor', while for cargo, we look for '/VENDOR'.
> 
> This is fine for the "main" (by lack of a better term) download, but
> this falls flat on its face for extra downloads. Indeed, so packages may
> need to download data sets, or assets, as _EXTRA_DOWNLOADS. Those are
> usually just data blobs, and are not actual golang or cargo packages; as
> such they do not need to be vendored, but worse, if we try to actually
> vendor them, this fails because the required files for vendoring are
> missing from the archives in such data sets.
> 
> We fix that by decoupling the download for the extra download, from the
> download for the main archive. We pass the post-processing option only
> to the main download.
> 
> This makes the hard assumption that extra downloads will never need to
> be post-processed for vendoring, of course; we hope this will always be
> correct in practice.
> 
> Signed-off-by: Yann E. MORIN <yann.morin@orange.com>
> Cc: Thomas Petazzoni <thomas.petazzoni@bootlin.com>


  Applied to master, thanks, with a few changes:

      - no loop needed for MAIN_DOWNLOAD, it can have only one;
      - remove superfluous backslash in the definition of MAIN_DOWNLOAD;
      - introduce _ADDITIONAL_DOWNLOADS to avoid filter-out.


  Regards,
  Arnout

> ---
>   package/pkg-download.mk | 3 ++-
>   package/pkg-generic.mk  | 8 ++++++--
>   2 files changed, 8 insertions(+), 3 deletions(-)
> 
> diff --git a/package/pkg-download.mk b/package/pkg-download.mk
> index a15e21e110..28b31ca30e 100644
> --- a/package/pkg-download.mk
> +++ b/package/pkg-download.mk
> @@ -103,6 +103,7 @@ endif
>   #
>   # Argument 1 is the source location
>   # Argument 2 is the upper-case package name
> +# Argument 3 is a sapce-separated list of optional arguments
>   #
>   ################################################################################
>   
> @@ -118,10 +119,10 @@ define DOWNLOAD
>   		-n '$($(2)_BASENAME_RAW)' \
>   		-N '$($(2)_RAWNAME)' \
>   		-o '$($(2)_DL_DIR)/$(notdir $(1))' \
> -		$(if $($(2)_DOWNLOAD_POST_PROCESS),-p '$($(2)_DOWNLOAD_POST_PROCESS)') \
>   		$(if $($(2)_GIT_SUBMODULES),-r) \
>   		$(if $($(2)_GIT_LFS),-l) \
>   		$(foreach uri,$(call DOWNLOAD_URIS,$(1),$(2)),-u $(uri)) \
> +		$(3) \
>   		$(QUIET) \
>   		-- \
>   		$($(2)_DL_OPTS)
> diff --git a/package/pkg-generic.mk b/package/pkg-generic.mk
> index 1222526ba1..3a3af44fc4 100644
> --- a/package/pkg-generic.mk
> +++ b/package/pkg-generic.mk
> @@ -192,7 +192,8 @@ $(BUILD_DIR)/%/.stamp_downloaded:
>   			break ; \
>   		fi ; \
>   	done
> -	$(foreach p,$($(PKG)_ALL_DOWNLOADS),$(call DOWNLOAD,$(p),$(PKG))$(sep))
> +	$(foreach p,$($(PKG)_MAIN_DOWNLOAD),$(call DOWNLOAD,$(p),$(PKG),$(if $($(PKG)_DOWNLOAD_POST_PROCESS),-p '$($(PKG)_DOWNLOAD_POST_PROCESS)'))$(sep))
> +	$(foreach p,$(filter-out $($(PKG)_MAIN_DOWNLOAD),$($(PKG)_ALL_DOWNLOADS)),$(call DOWNLOAD,$(p),$(PKG))$(sep))
>   	$(foreach hook,$($(PKG)_POST_DOWNLOAD_HOOKS),$(call $(hook))$(sep))
>   	$(Q)mkdir -p $(@D)
>   	@$(call step_end,download)
> @@ -601,8 +602,11 @@ ifndef $(2)_PATCH
>    endif
>   endif
>   
> -$(2)_ALL_DOWNLOADS = \
> +$(2)_MAIN_DOWNLOAD = \
>   	$$(if $$($(2)_SOURCE),$$($(2)_SITE_METHOD)+$$($(2)_SITE)/$$($(2)_SOURCE)) \
> +
> +$(2)_ALL_DOWNLOADS = \
> +	$$($(2)_MAIN_DOWNLOAD) \
>   	$$(foreach p,$$($(2)_PATCH) $$($(2)_EXTRA_DOWNLOADS),\
>   		$$(if $$(findstring ://,$$(p)),$$(p),\
>   			$$($(2)_SITE_METHOD)+$$($(2)_SITE)/$$(p)))
Yann E. MORIN May 6, 2022, 5:21 a.m. UTC | #2
Arnout, All,

On 2022-05-05 22:37 +0200, Arnout Vandecappelle spake thusly:
> On 05/05/2022 19:19, yann.morin@orange.com wrote:
> >From: "Yann E. MORIN" <yann.morin@orange.com>
> >
> >For golang- or cargo-based packages, we apply a vendoring pass after the
> >package's "main" download is done. Whether to vendor or not is based on
> >the heuristic that a specific directory exists or not; for golang
> >packages, we look for '/vendor', while for cargo, we look for '/VENDOR'.
> >
> >This is fine for the "main" (by lack of a better term) download, but
> >this falls flat on its face for extra downloads. Indeed, so packages may
> >need to download data sets, or assets, as _EXTRA_DOWNLOADS. Those are
> >usually just data blobs, and are not actual golang or cargo packages; as
> >such they do not need to be vendored, but worse, if we try to actually
> >vendor them, this fails because the required files for vendoring are
> >missing from the archives in such data sets.
> >
> >We fix that by decoupling the download for the extra download, from the
> >download for the main archive. We pass the post-processing option only
> >to the main download.
> >
> >This makes the hard assumption that extra downloads will never need to
> >be post-processed for vendoring, of course; we hope this will always be
> >correct in practice.
> >
> >Signed-off-by: Yann E. MORIN <yann.morin@orange.com>
> >Cc: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
> 
>  Applied to master, thanks, with a few changes:
> 
>      - no loop needed for MAIN_DOWNLOAD, it can have only one;

In fact, that was on purpose: it can have no main download...
Indeed, a package can very well only declare extra downloads...
I forgot to explain that in the commit log, sorry.

So, either we do the loop, which is a simple way to expand to nothing
when there is no item but the reason is not obvious, or we add an
explicit $(if ...) test, which is going to be a bit more verbose...

What's your preference?

>      - remove superfluous backslash in the definition of MAIN_DOWNLOAD;

Ah, yes I forgot to drop it... Good catch.

>      - introduce _ADDITIONAL_DOWNLOADS to avoid filter-out.

I did that initially, but I thought that yet another variable could be
easily avoided with the filtering our. But OK, that's fine with me too.

Thanks!

Regards,
Yann E. MORIN.
Arnout Vandecappelle May 10, 2022, 7:24 p.m. UTC | #3
On 06/05/2022 07:21, yann.morin@orange.com wrote:
> Arnout, All,
> 
> On 2022-05-05 22:37 +0200, Arnout Vandecappelle spake thusly:
>> On 05/05/2022 19:19, yann.morin@orange.com wrote:
>>> From: "Yann E. MORIN" <yann.morin@orange.com>
>>>
>>> For golang- or cargo-based packages, we apply a vendoring pass after the
>>> package's "main" download is done. Whether to vendor or not is based on
>>> the heuristic that a specific directory exists or not; for golang
>>> packages, we look for '/vendor', while for cargo, we look for '/VENDOR'.
>>>
>>> This is fine for the "main" (by lack of a better term) download, but
>>> this falls flat on its face for extra downloads. Indeed, so packages may
>>> need to download data sets, or assets, as _EXTRA_DOWNLOADS. Those are
>>> usually just data blobs, and are not actual golang or cargo packages; as
>>> such they do not need to be vendored, but worse, if we try to actually
>>> vendor them, this fails because the required files for vendoring are
>>> missing from the archives in such data sets.
>>>
>>> We fix that by decoupling the download for the extra download, from the
>>> download for the main archive. We pass the post-processing option only
>>> to the main download.
>>>
>>> This makes the hard assumption that extra downloads will never need to
>>> be post-processed for vendoring, of course; we hope this will always be
>>> correct in practice.
>>>
>>> Signed-off-by: Yann E. MORIN <yann.morin@orange.com>
>>> Cc: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
>>
>>   Applied to master, thanks, with a few changes:
>>
>>       - no loop needed for MAIN_DOWNLOAD, it can have only one;
> 
> In fact, that was on purpose: it can have no main download...
> Indeed, a package can very well only declare extra downloads...
> I forgot to explain that in the commit log, sorry.

  OK. However, somehow this seems to work:

package/foo/foo.mk:
FOO_SOURCE =
FOO_VERSION = 1

FOO_INSTALL_TARGET_CMDS = echo '***foo!***'

$(eval $(generic-package))

bash 84467$ make foo
WARNING: no hash file for
 >>> foo 1 Extracting
 >>> foo 1 Patching
 >>> foo 1 Configuring
 >>> foo 1 Building
 >>> foo 1 Installing to target
echo '***foo!***'
***foo!***


  So I don't think it is needed after all? The dl-wrapper script apparently 
doesn't do anything if the filename is empty.

  Regards,
  Arnout

> 
> So, either we do the loop, which is a simple way to expand to nothing
> when there is no item but the reason is not obvious, or we add an
> explicit $(if ...) test, which is going to be a bit more verbose...
> 
> What's your preference?
> 
>>       - remove superfluous backslash in the definition of MAIN_DOWNLOAD;
> 
> Ah, yes I forgot to drop it... Good catch.
> 
>>       - introduce _ADDITIONAL_DOWNLOADS to avoid filter-out.
> 
> I did that initially, but I thought that yet another variable could be
> easily avoided with the filtering our. But OK, that's fine with me too.
> 
> Thanks!
> 
> Regards,
> Yann E. MORIN.
>
Yann E. MORIN May 11, 2022, 6 a.m. UTC | #4
Arnout, All,

On 2022-05-10 21:24 +0200, Arnout Vandecappelle spake thusly:
> On 06/05/2022 07:21, yann.morin@orange.com wrote:
> >On 2022-05-05 22:37 +0200, Arnout Vandecappelle spake thusly:
[--SNIP--]
> >>  Applied to master, thanks, with a few changes:
> >>      - no loop needed for MAIN_DOWNLOAD, it can have only one;
> >In fact, that was on purpose: it can have no main download...
> >Indeed, a package can very well only declare extra downloads...
> >I forgot to explain that in the commit log, sorry.
>  OK. However, somehow this seems to work:

Yes, otherwise we could not even install host-skeleton to begin with....

>  So I don't think it is needed after all?

Still, I am not happy that we are in a situation that works by accident.
I'd much prefer we understand why exactly this does not break...

> The dl-wrapper script apparently
> doesn't do anything if the filename is empty.

It really works by accident:

 1. we pass no URI, so I thought the dlwrapper would fail, as it
    basically does:

        download_and_check=0
        for uri in "${uris[@]}"; do
            ...
        done
        if [ "${download_and_check}" -eq 0 ]; then
            exit 1
        fi

    However, it does not even go that far...

 2. even though there is no output file, we still pass the path to
    the package output directory as the output path. So, as part of
    validating the download, the wrapper checks if the output file
    exists, and checks its hash:

        if [ -e "${output}" ]; then
            if support/download/check-hash ${quiet} "${hfile}" "${output}" ...
                exit 0
            ...
        fi

 3. the output path does exist now, because we explicitly create it just
    before calling the wrapper, because that's where we also locate the
    lockfile.

So, this is all a bit fragile, and I am not feeling very happy about
that...

So, I'll cook up a fixup patch to revert to the previous behaviour of
not even attempting a download in that case.

Regards,
Yann E. MORIN.
Peter Korsgaard May 28, 2022, 9 a.m. UTC | #5
>>>>>   <yann.morin@orange.com> writes:

 > From: "Yann E. MORIN" <yann.morin@orange.com>
 > For golang- or cargo-based packages, we apply a vendoring pass after the
 > package's "main" download is done. Whether to vendor or not is based on
 > the heuristic that a specific directory exists or not; for golang
 > packages, we look for '/vendor', while for cargo, we look for '/VENDOR'.

 > This is fine for the "main" (by lack of a better term) download, but
 > this falls flat on its face for extra downloads. Indeed, so packages may
 > need to download data sets, or assets, as _EXTRA_DOWNLOADS. Those are
 > usually just data blobs, and are not actual golang or cargo packages; as
 > such they do not need to be vendored, but worse, if we try to actually
 > vendor them, this fails because the required files for vendoring are
 > missing from the archives in such data sets.

 > We fix that by decoupling the download for the extra download, from the
 > download for the main archive. We pass the post-processing option only
 > to the main download.

 > This makes the hard assumption that extra downloads will never need to
 > be post-processed for vendoring, of course; we hope this will always be
 > correct in practice.

 > Signed-off-by: Yann E. MORIN <yann.morin@orange.com>
 > Cc: Thomas Petazzoni <thomas.petazzoni@bootlin.com>

Committed to 2022.02.x, thanks.
diff mbox series

Patch

diff --git a/package/pkg-download.mk b/package/pkg-download.mk
index a15e21e110..28b31ca30e 100644
--- a/package/pkg-download.mk
+++ b/package/pkg-download.mk
@@ -103,6 +103,7 @@  endif
 #
 # Argument 1 is the source location
 # Argument 2 is the upper-case package name
+# Argument 3 is a sapce-separated list of optional arguments
 #
 ################################################################################
 
@@ -118,10 +119,10 @@  define DOWNLOAD
 		-n '$($(2)_BASENAME_RAW)' \
 		-N '$($(2)_RAWNAME)' \
 		-o '$($(2)_DL_DIR)/$(notdir $(1))' \
-		$(if $($(2)_DOWNLOAD_POST_PROCESS),-p '$($(2)_DOWNLOAD_POST_PROCESS)') \
 		$(if $($(2)_GIT_SUBMODULES),-r) \
 		$(if $($(2)_GIT_LFS),-l) \
 		$(foreach uri,$(call DOWNLOAD_URIS,$(1),$(2)),-u $(uri)) \
+		$(3) \
 		$(QUIET) \
 		-- \
 		$($(2)_DL_OPTS)
diff --git a/package/pkg-generic.mk b/package/pkg-generic.mk
index 1222526ba1..3a3af44fc4 100644
--- a/package/pkg-generic.mk
+++ b/package/pkg-generic.mk
@@ -192,7 +192,8 @@  $(BUILD_DIR)/%/.stamp_downloaded:
 			break ; \
 		fi ; \
 	done
-	$(foreach p,$($(PKG)_ALL_DOWNLOADS),$(call DOWNLOAD,$(p),$(PKG))$(sep))
+	$(foreach p,$($(PKG)_MAIN_DOWNLOAD),$(call DOWNLOAD,$(p),$(PKG),$(if $($(PKG)_DOWNLOAD_POST_PROCESS),-p '$($(PKG)_DOWNLOAD_POST_PROCESS)'))$(sep))
+	$(foreach p,$(filter-out $($(PKG)_MAIN_DOWNLOAD),$($(PKG)_ALL_DOWNLOADS)),$(call DOWNLOAD,$(p),$(PKG))$(sep))
 	$(foreach hook,$($(PKG)_POST_DOWNLOAD_HOOKS),$(call $(hook))$(sep))
 	$(Q)mkdir -p $(@D)
 	@$(call step_end,download)
@@ -601,8 +602,11 @@  ifndef $(2)_PATCH
  endif
 endif
 
-$(2)_ALL_DOWNLOADS = \
+$(2)_MAIN_DOWNLOAD = \
 	$$(if $$($(2)_SOURCE),$$($(2)_SITE_METHOD)+$$($(2)_SITE)/$$($(2)_SOURCE)) \
+
+$(2)_ALL_DOWNLOADS = \
+	$$($(2)_MAIN_DOWNLOAD) \
 	$$(foreach p,$$($(2)_PATCH) $$($(2)_EXTRA_DOWNLOADS),\
 		$$(if $$(findstring ://,$$(p)),$$(p),\
 			$$($(2)_SITE_METHOD)+$$($(2)_SITE)/$$(p)))