diff mbox series

[2/2] package/pkg-utils: teach per-package-rsync to copy or hardlink dest

Message ID 3a0be46ecee62af708a5c63ebcd4845228fc29a5.1697576472.git.yann.morin.1998@free.fr
State Accepted
Headers show
Series package/pkg-utils: fix regression in size and build time with PPD (branch yem/rsync-copy) | expand

Commit Message

Yann E. MORIN Oct. 17, 2023, 9:01 p.m. UTC
commit 21d52e52d8de (package/pkg-utils.mk: break hardlinks in global
{TARGET, HOST}_DIR on per-package build) was recently reverted, so we
are back to a situation where it is possible for packages and post-build
scripts to modify files in-place, and thus impact files in any arbitrary
per-package directory, which may break things on rebuild for example.

21d52e52d8de wsa too big a hammer, but we can still apply the reasoning
from it, to the aggregation of the final target and host directories.

This solves the case for post-build scripts at least. We leave the case
of inter-package modification aside, as it is a bigger issue that will
need more than jsut copying files around.

Signed-off-by: Yann E. MORIN <yann.morin.1998@free.fr>
Cc: Herve Codina <herve.codina@bootlin.com>
Cc: Peter Korsgaard <peter@korsgaard.com>
Cc: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
---
 Makefile             |  4 ++--
 package/pkg-utils.mk | 15 ++++++++++++---
 2 files changed, 14 insertions(+), 5 deletions(-)

Comments

Herve Codina Oct. 18, 2023, 8:53 a.m. UTC | #1
Hi Yann,

On Tue, 17 Oct 2023 23:01:20 +0200
"Yann E. MORIN" <yann.morin.1998@free.fr> wrote:

> commit 21d52e52d8de (package/pkg-utils.mk: break hardlinks in global
> {TARGET, HOST}_DIR on per-package build) was recently reverted, so we
> are back to a situation where it is possible for packages and post-build
> scripts to modify files in-place, and thus impact files in any arbitrary
> per-package directory, which may break things on rebuild for example.
> 
> 21d52e52d8de wsa too big a hammer, but we can still apply the reasoning

s/wsa/was/

> from it, to the aggregation of the final target and host directories.
> 
> This solves the case for post-build scripts at least. We leave the case
> of inter-package modification aside, as it is a bigger issue that will
> need more than jsut copying files around.

s/jsut/just/

> 
> Signed-off-by: Yann E. MORIN <yann.morin.1998@free.fr>
> Cc: Herve Codina <herve.codina@bootlin.com>
> Cc: Peter Korsgaard <peter@korsgaard.com>
> Cc: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
> ---
>  Makefile             |  4 ++--
>  package/pkg-utils.mk | 15 ++++++++++++---
>  2 files changed, 14 insertions(+), 5 deletions(-)
> 

[...]

> --- a/package/pkg-utils.mk
> +++ b/package/pkg-utils.mk
> @@ -214,10 +214,19 @@ ifeq ($(BR2_PER_PACKAGE_DIRECTORIES),y)
>  # $1: space-separated list of packages to rsync from
>  # $2: 'host' or 'target'
>  # $3: destination directory
> +# $4: literal "copy" or "hardlink" to copy or hardlink files from src to dest
>  define per-package-rsync
>  	mkdir -p $(3)
>  	$(foreach pkg,$(1),\
> -		rsync -a --link-dest=$(PER_PACKAGE_DIR)/$(pkg)/$(2)/ \
> +		rsync -a \
> +			--hard-links \

You preserve hard links (--hard-links) in all cases (copy and hardlink).
In case of copy, is it correct to preserve hard links ?

> +			$(if $(filter hardlink,$(4)), \
> +				--link-dest=$(PER_PACKAGE_DIR)/$(pkg)/$(2)/, \
> +				$(if $(filter copy,$(4)), \
> +					$(empty), \
> +					$(error per-package-rsync can only "copy" or "hardlink", not "$(4)") \
> +				) \
> +			) \
>  			$(PER_PACKAGE_DIR)/$(pkg)/$(2)/ \
>  			$(3)$(sep))
>  endef

[...]

Best regards,
Hervé
Yann E. MORIN Oct. 18, 2023, 3:20 p.m. UTC | #2
Hervé, All

On 2023-10-18 10:53 +0200, Herve Codina via buildroot spake thusly:
> On Tue, 17 Oct 2023 23:01:20 +0200
> "Yann E. MORIN" <yann.morin.1998@free.fr> wrote:
[--SNIP--]

Typoes fixed in both commits, thanks.

> > --- a/package/pkg-utils.mk
> > +++ b/package/pkg-utils.mk
> > @@ -214,10 +214,19 @@ ifeq ($(BR2_PER_PACKAGE_DIRECTORIES),y)
> >  # $1: space-separated list of packages to rsync from
> >  # $2: 'host' or 'target'
> >  # $3: destination directory
> > +# $4: literal "copy" or "hardlink" to copy or hardlink files from src to dest
> >  define per-package-rsync
> >  	mkdir -p $(3)
> >  	$(foreach pkg,$(1),\
> > -		rsync -a --link-dest=$(PER_PACKAGE_DIR)/$(pkg)/$(2)/ \
> > +		rsync -a \
> > +			--hard-links \
> You preserve hard links (--hard-links) in all cases (copy and hardlink).
> In case of copy, is it correct to preserve hard links ?

Yes, --hard-links is:

    This tells rsync to look for hard-linked files in the source and
    link together the corresponding files on the destination.

Regards,
Yann E. MORIN.

> > +			$(if $(filter hardlink,$(4)), \
> > +				--link-dest=$(PER_PACKAGE_DIR)/$(pkg)/$(2)/, \
> > +				$(if $(filter copy,$(4)), \
> > +					$(empty), \
> > +					$(error per-package-rsync can only "copy" or "hardlink", not "$(4)") \
> > +				) \
> > +			) \
> >  			$(PER_PACKAGE_DIR)/$(pkg)/$(2)/ \
> >  			$(3)$(sep))
> >  endef
> 
> [...]
> 
> Best regards,
> Hervé
> _______________________________________________
> buildroot mailing list
> buildroot@buildroot.org
> https://lists.buildroot.org/mailman/listinfo/buildroot
Herve Codina Oct. 18, 2023, 3:38 p.m. UTC | #3
Yann, All,

On Wed, 18 Oct 2023 17:20:47 +0200
"Yann E. MORIN" <yann.morin.1998@free.fr> wrote:

> Hervé, All
> 
[...]
> 
> > > --- a/package/pkg-utils.mk
> > > +++ b/package/pkg-utils.mk
> > > @@ -214,10 +214,19 @@ ifeq ($(BR2_PER_PACKAGE_DIRECTORIES),y)
> > >  # $1: space-separated list of packages to rsync from
> > >  # $2: 'host' or 'target'
> > >  # $3: destination directory
> > > +# $4: literal "copy" or "hardlink" to copy or hardlink files from src to dest
> > >  define per-package-rsync
> > >  	mkdir -p $(3)
> > >  	$(foreach pkg,$(1),\
> > > -		rsync -a --link-dest=$(PER_PACKAGE_DIR)/$(pkg)/$(2)/ \
> > > +		rsync -a \
> > > +			--hard-links \  
> > You preserve hard links (--hard-links) in all cases (copy and hardlink).
> > In case of copy, is it correct to preserve hard links ?  
> 
> Yes, --hard-links is:
> 
>     This tells rsync to look for hard-linked files in the source and
>     link together the corresponding files on the destination.

In case of 'copy', you keep hard-links already present in source and so the
destination (final host/target dirs) is not a full copy.
Some hard-links can be present and post-build scripts can still corrupt some
per-package sources.

I don't understand why keeping hard-links on this last copy is needed and I
probably miss something...

Regards,
Hervé

> 
> Regards,
> Yann E. MORIN.
> 
> > > +			$(if $(filter hardlink,$(4)), \
> > > +				--link-dest=$(PER_PACKAGE_DIR)/$(pkg)/$(2)/, \
> > > +				$(if $(filter copy,$(4)), \
> > > +					$(empty), \
> > > +					$(error per-package-rsync can only "copy" or "hardlink", not "$(4)") \
> > > +				) \
> > > +			) \
> > >  			$(PER_PACKAGE_DIR)/$(pkg)/$(2)/ \
> > >  			$(3)$(sep))
> > >  endef  
> > 
> > [...]
> >
Yann E. MORIN Oct. 18, 2023, 4:18 p.m. UTC | #4
Hervé, All,

On 2023-10-18 17:38 +0200, Herve Codina via buildroot spake thusly:
> On Wed, 18 Oct 2023 17:20:47 +0200
> "Yann E. MORIN" <yann.morin.1998@free.fr> wrote:
> > Hervé, All
> > > > --- a/package/pkg-utils.mk
> > > > +++ b/package/pkg-utils.mk
> > > > @@ -214,10 +214,19 @@ ifeq ($(BR2_PER_PACKAGE_DIRECTORIES),y)
> > > >  # $1: space-separated list of packages to rsync from
> > > >  # $2: 'host' or 'target'
> > > >  # $3: destination directory
> > > > +# $4: literal "copy" or "hardlink" to copy or hardlink files from src to dest
> > > >  define per-package-rsync
> > > >  	mkdir -p $(3)
> > > >  	$(foreach pkg,$(1),\
> > > > -		rsync -a --link-dest=$(PER_PACKAGE_DIR)/$(pkg)/$(2)/ \
> > > > +		rsync -a \
> > > > +			--hard-links \  
> > > You preserve hard links (--hard-links) in all cases (copy and hardlink).
> > > In case of copy, is it correct to preserve hard links ?  
> > Yes, --hard-links is:
> > 
> >     This tells rsync to look for hard-linked files in the source and
> >     link together the corresponding files on the destination.
> In case of 'copy', you keep hard-links already present in source and so the
> destination (final host/target dirs) is not a full copy.

Ah, it does not hard-link something in dest into src. The hardlinks are
only in the destination. See below...

> Some hard-links can be present and post-build scripts can still corrupt some
> per-package sources.
> 
> I don't understand why keeping hard-links on this last copy is needed and I
> probably miss something...

That is because we can have hard-links in host or target. For example,
git will install a lot of hard-links in /usr/libexec/git-core/
(excerpt):

    $ ls -li per-package/git/target/usr/libexec/git-core/
    9588770 -rwxr-xr-x 141 ymorin ymorin 3318828 Oct 16 17:45 git-var
    9588770 -rwxr-xr-x 141 ymorin ymorin 3318828 Oct 16 17:45 git-verify-commit
    9588770 -rwxr-xr-x 141 ymorin ymorin 3318828 Oct 16 17:45 git-verify-pack
    9588770 -rwxr-xr-x 141 ymorin ymorin 3318828 Oct 16 17:45 git-verify-tag
    9588770 -rwxr-xr-x 141 ymorin ymorin 3318828 Oct 16 17:45 git-version

    $ ls -li target/usr/libexec/git-core/
    9600190 -rwxr-xr-x 141 ymorin ymorin 2832860 Oct 16 17:45 git-var
    9600190 -rwxr-xr-x 141 ymorin ymorin 2832860 Oct 16 17:45 git-verify-commit
    9600190 -rwxr-xr-x 141 ymorin ymorin 2832860 Oct 16 17:45 git-verify-pack
    9600190 -rwxr-xr-x 141 ymorin ymorin 2832860 Oct 16 17:45 git-verify-tag
    9600190 -rwxr-xr-x 141 ymorin ymorin 2832860 Oct 16 17:45 git-version

As you can see, in git's PPD, git tools are hard-links, and in target/
they also are hard-links. But the inode is different, so the hard-links
in target/ are different from the hard-links in the PPD.

However, in all the PPDs of packages that have git as a dependency, the
hard-links would all be to the same inode, 9588770.

(The size delta is because the files in target/ are stripped.)

So, in case of "copy", this is actually a copy, that keeps existing
hard-links in the source, as new hard-links in the destination. Without
--hard-links, the files in target/usr/libexec/git-core/ would all have a
different inode, as they would be different files, and that would
tremendously increase the filesystem size (squashfs would notice, and
would store the content only once, but would still create as many inodes
as needed, though).

Regards,
Yann E. MORIN.

> Regards,
> Hervé
> 
> > 
> > Regards,
> > Yann E. MORIN.
> > 
> > > > +			$(if $(filter hardlink,$(4)), \
> > > > +				--link-dest=$(PER_PACKAGE_DIR)/$(pkg)/$(2)/, \
> > > > +				$(if $(filter copy,$(4)), \
> > > > +					$(empty), \
> > > > +					$(error per-package-rsync can only "copy" or "hardlink", not "$(4)") \
> > > > +				) \
> > > > +			) \
> > > >  			$(PER_PACKAGE_DIR)/$(pkg)/$(2)/ \
> > > >  			$(3)$(sep))
> > > >  endef  
> > > 
> > > [...]
> > > 
> _______________________________________________
> buildroot mailing list
> buildroot@buildroot.org
> https://lists.buildroot.org/mailman/listinfo/buildroot
Herve Codina Oct. 18, 2023, 4:42 p.m. UTC | #5
Yann, All,

On Wed, 18 Oct 2023 18:18:59 +0200
"Yann E. MORIN" <yann.morin.1998@free.fr> wrote:

> Hervé, All,
> 
> On 2023-10-18 17:38 +0200, Herve Codina via buildroot spake thusly:
> > On Wed, 18 Oct 2023 17:20:47 +0200
> > "Yann E. MORIN" <yann.morin.1998@free.fr> wrote:  
> > > Hervé, All  
> > > > > --- a/package/pkg-utils.mk
> > > > > +++ b/package/pkg-utils.mk
> > > > > @@ -214,10 +214,19 @@ ifeq ($(BR2_PER_PACKAGE_DIRECTORIES),y)
> > > > >  # $1: space-separated list of packages to rsync from
> > > > >  # $2: 'host' or 'target'
> > > > >  # $3: destination directory
> > > > > +# $4: literal "copy" or "hardlink" to copy or hardlink files from src to dest
> > > > >  define per-package-rsync
> > > > >  	mkdir -p $(3)
> > > > >  	$(foreach pkg,$(1),\
> > > > > -		rsync -a --link-dest=$(PER_PACKAGE_DIR)/$(pkg)/$(2)/ \
> > > > > +		rsync -a \
> > > > > +			--hard-links \    
> > > > You preserve hard links (--hard-links) in all cases (copy and hardlink).
> > > > In case of copy, is it correct to preserve hard links ?    
> > > Yes, --hard-links is:
> > > 
> > >     This tells rsync to look for hard-linked files in the source and
> > >     link together the corresponding files on the destination.  
> > In case of 'copy', you keep hard-links already present in source and so the
> > destination (final host/target dirs) is not a full copy.  
> 
> Ah, it does not hard-link something in dest into src. The hardlinks are
> only in the destination. See below...
> 
> > Some hard-links can be present and post-build scripts can still corrupt some
> > per-package sources.
> > 
> > I don't understand why keeping hard-links on this last copy is needed and I
> > probably miss something...  
> 
> That is because we can have hard-links in host or target. For example,
> git will install a lot of hard-links in /usr/libexec/git-core/
> (excerpt):
> 
>     $ ls -li per-package/git/target/usr/libexec/git-core/
>     9588770 -rwxr-xr-x 141 ymorin ymorin 3318828 Oct 16 17:45 git-var
>     9588770 -rwxr-xr-x 141 ymorin ymorin 3318828 Oct 16 17:45 git-verify-commit
>     9588770 -rwxr-xr-x 141 ymorin ymorin 3318828 Oct 16 17:45 git-verify-pack
>     9588770 -rwxr-xr-x 141 ymorin ymorin 3318828 Oct 16 17:45 git-verify-tag
>     9588770 -rwxr-xr-x 141 ymorin ymorin 3318828 Oct 16 17:45 git-version
> 
>     $ ls -li target/usr/libexec/git-core/
>     9600190 -rwxr-xr-x 141 ymorin ymorin 2832860 Oct 16 17:45 git-var
>     9600190 -rwxr-xr-x 141 ymorin ymorin 2832860 Oct 16 17:45 git-verify-commit
>     9600190 -rwxr-xr-x 141 ymorin ymorin 2832860 Oct 16 17:45 git-verify-pack
>     9600190 -rwxr-xr-x 141 ymorin ymorin 2832860 Oct 16 17:45 git-verify-tag
>     9600190 -rwxr-xr-x 141 ymorin ymorin 2832860 Oct 16 17:45 git-version
> 
> As you can see, in git's PPD, git tools are hard-links, and in target/
> they also are hard-links. But the inode is different, so the hard-links
> in target/ are different from the hard-links in the PPD.
> 
> However, in all the PPDs of packages that have git as a dependency, the
> hard-links would all be to the same inode, 9588770.
> 
> (The size delta is because the files in target/ are stripped.)
> 
> So, in case of "copy", this is actually a copy, that keeps existing
> hard-links in the source, as new hard-links in the destination. Without
> --hard-links, the files in target/usr/libexec/git-core/ would all have a
> different inode, as they would be different files, and that would
> tremendously increase the filesystem size (squashfs would notice, and
> would store the content only once, but would still create as many inodes
> as needed, though).
> 

Thanks a lot for these explanations!
It is clearer in my mind now.

Maybe, in the commit log:
--- 8< ---
rsync --hard-links option allows to keep existing hard-links in the source
as new hard-links in the destination ("copy" of hard-links).
Having this hard-links "copy" contributes to the directories size
reduction.
--- 8< --- 

With all that said:
Reviewed-by: Herve Codina <herve.codina@bootlin.com>

Best regards,
Hervé
Peter Korsgaard Oct. 26, 2023, 6:34 p.m. UTC | #6
>>>>> "Yann" == Yann E MORIN <yann.morin.1998@free.fr> writes:

 > commit 21d52e52d8de (package/pkg-utils.mk: break hardlinks in global
 > {TARGET, HOST}_DIR on per-package build) was recently reverted, so we
 > are back to a situation where it is possible for packages and post-build
 > scripts to modify files in-place, and thus impact files in any arbitrary
 > per-package directory, which may break things on rebuild for example.

 > 21d52e52d8de wsa too big a hammer, but we can still apply the reasoning
 > from it, to the aggregation of the final target and host directories.

 > This solves the case for post-build scripts at least. We leave the case
 > of inter-package modification aside, as it is a bigger issue that will
 > need more than jsut copying files around.

 > Signed-off-by: Yann E. MORIN <yann.morin.1998@free.fr>
 > Cc: Herve Codina <herve.codina@bootlin.com>
 > Cc: Peter Korsgaard <peter@korsgaard.com>
 > Cc: Thomas Petazzoni <thomas.petazzoni@bootlin.com>

Committed to 2023.02.x and 2023.08.x, thanks.
diff mbox series

Patch

diff --git a/Makefile b/Makefile
index f87f4458ba..3e85d5ef09 100644
--- a/Makefile
+++ b/Makefile
@@ -717,7 +717,7 @@  STAGING_DIR_FILES_LISTS = $(sort $(wildcard $(BUILD_DIR)/*/.files-list-staging.t
 .PHONY: host-finalize
 host-finalize: $(PACKAGES) $(HOST_DIR) $(HOST_DIR_SYMLINK)
 	@$(call MESSAGE,"Finalizing host directory")
-	$(call per-package-rsync,$(sort $(PACKAGES)),host,$(HOST_DIR))
+	$(call per-package-rsync,$(sort $(PACKAGES)),host,$(HOST_DIR),copy)
 
 .PHONY: staging-finalize
 staging-finalize: $(STAGING_DIR_SYMLINK)
@@ -725,7 +725,7 @@  staging-finalize: $(STAGING_DIR_SYMLINK)
 .PHONY: target-finalize
 target-finalize: $(PACKAGES) $(TARGET_DIR) host-finalize
 	@$(call MESSAGE,"Finalizing target directory")
-	$(call per-package-rsync,$(sort $(PACKAGES)),target,$(TARGET_DIR))
+	$(call per-package-rsync,$(sort $(PACKAGES)),target,$(TARGET_DIR),copy)
 	$(foreach hook,$(TARGET_FINALIZE_HOOKS),$($(hook))$(sep))
 	rm -rf $(TARGET_DIR)/usr/include $(TARGET_DIR)/usr/share/aclocal \
 		$(TARGET_DIR)/usr/lib/pkgconfig $(TARGET_DIR)/usr/share/pkgconfig \
diff --git a/package/pkg-utils.mk b/package/pkg-utils.mk
index 5d5980b098..dcab7d9b2a 100644
--- a/package/pkg-utils.mk
+++ b/package/pkg-utils.mk
@@ -214,10 +214,19 @@  ifeq ($(BR2_PER_PACKAGE_DIRECTORIES),y)
 # $1: space-separated list of packages to rsync from
 # $2: 'host' or 'target'
 # $3: destination directory
+# $4: literal "copy" or "hardlink" to copy or hardlink files from src to dest
 define per-package-rsync
 	mkdir -p $(3)
 	$(foreach pkg,$(1),\
-		rsync -a --link-dest=$(PER_PACKAGE_DIR)/$(pkg)/$(2)/ \
+		rsync -a \
+			--hard-links \
+			$(if $(filter hardlink,$(4)), \
+				--link-dest=$(PER_PACKAGE_DIR)/$(pkg)/$(2)/, \
+				$(if $(filter copy,$(4)), \
+					$(empty), \
+					$(error per-package-rsync can only "copy" or "hardlink", not "$(4)") \
+				) \
+			) \
 			$(PER_PACKAGE_DIR)/$(pkg)/$(2)/ \
 			$(3)$(sep))
 endef
@@ -230,8 +239,8 @@  endef
 #
 # $1: space-separated list of packages to rsync from
 define prepare-per-package-directory
-	$(call per-package-rsync,$(1),host,$(HOST_DIR))
-	$(call per-package-rsync,$(1),target,$(TARGET_DIR))
+	$(call per-package-rsync,$(1),host,$(HOST_DIR),hardlink)
+	$(call per-package-rsync,$(1),target,$(TARGET_DIR),hardlink)
 endef
 
 # Ensure files like .la, .pc, .pri, .cmake, and so on, point to the