diff mbox series

[v4] support/scripts/fix-rpath: parallelize patching files

Message ID 20221020115512.483686-1-dumasv.dev@gmail.com
State Accepted
Headers show
Series [v4] support/scripts/fix-rpath: parallelize patching files | expand

Commit Message

Victor Dumas Oct. 20, 2022, 11:55 a.m. UTC
Using "xargs" instead of "while read" loop allows for the patching of files to be parallelized
This significantly reduces the amount of time it takes to fix all the paths.
On a larger RFS(~300MB) this script was taking 5 minutes, it now only takes about 30s on a 12 core machine

Signed-off-by: Victor Dumas <dumasv.dev@gmail.com>
---
 support/scripts/fix-rpath | 66 ++++++++++++++++++++++-----------------
 1 file changed, 38 insertions(+), 28 deletions(-)

Comments

Quentin Schulz Nov. 14, 2022, 4:33 p.m. UTC | #1
Hi Victor,

On 10/20/22 13:55, Victor Dumas wrote:
> Using "xargs" instead of "while read" loop allows for the patching of files to be parallelized
> This significantly reduces the amount of time it takes to fix all the paths.
> On a larger RFS(~300MB) this script was taking 5 minutes, it now only takes about 30s on a 12 core machine
> 
> Signed-off-by: Victor Dumas <dumasv.dev@gmail.com>
> ---
>   support/scripts/fix-rpath | 66 ++++++++++++++++++++++-----------------
>   1 file changed, 38 insertions(+), 28 deletions(-)
> 
> diff --git a/support/scripts/fix-rpath b/support/scripts/fix-rpath
> index 3e67e770e5..cef7a48e76 100755
> --- a/support/scripts/fix-rpath
> +++ b/support/scripts/fix-rpath
> @@ -58,6 +58,38 @@ HOST_EXCLUDEPATHS="/share/terminfo"
>   STAGING_EXCLUDEPATHS="/usr/include /usr/share/terminfo"
>   TARGET_EXCLUDEPATHS="/lib/firmware"
>   
> +patch_file() {
> +    PATCHELF="${1}"
> +    rootdir="${2}"
> +    sanitize_extra_args="${3}"
> +    file="${4}"
> +
> +    # check if it's an ELF file
> +    rpath=$(${PATCHELF} --print-rpath "${file}" 2>&1)
> +    if test $? -ne 0 ; then
> +        return 0
> +    fi
> +
> +    # make files writable if necessary
> +    changed=$(chmod -c u+w "${file}")
> +
> +    # With per-package directory support, most RPATH of host
> +    # binaries will point to per-package directories. This won't
> +    # work with the --make-rpath-relative ${rootdir} invocation as
> +    # the per-package host directory is not within ${rootdir}. So,
> +    # we rewrite all RPATHs pointing to per-package directories so
> +    # that they point to the global host directry.
> +    changed_rpath=$(echo ${rpath} | sed "s@${PER_PACKAGE_DIR}/[^/]\+/host@${HOST_DIR}@")
> +    if test "${rpath}" != "${changed_rpath}" ; then
> +        ${PATCHELF} --set-rpath ${changed_rpath} "${file}"
> +    fi
> +
> +    # call patchelf to sanitize the rpath
> +    ${PATCHELF} --make-rpath-relative "${rootdir}" ${sanitize_extra_args[@]} "${file}"
> +    # restore the original permission
> +    test "${changed}" != "" && chmod u-w "${file}"
> +}
> +
>   main() {
>       local rootdir
>       local tree="${1}"
> @@ -123,34 +155,12 @@ main() {
>               ;;
>       esac
>   
> -    find_args+=( "-type" "f" "-print" )
> -
> -    while read file ; do
> -        # check if it's an ELF file
> -        rpath=$(${PATCHELF} --print-rpath "${file}" 2>&1)
> -        if test $? -ne 0 ; then
> -            continue
> -        fi
> -
> -        # make files writable if necessary
> -        changed=$(chmod -c u+w "${file}")
> -
> -        # With per-package directory support, most RPATH of host
> -        # binaries will point to per-package directories. This won't
> -        # work with the --make-rpath-relative ${rootdir} invocation as
> -        # the per-package host directory is not within ${rootdir}. So,
> -        # we rewrite all RPATHs pointing to per-package directories so
> -        # that they point to the global host directry.
> -        changed_rpath=$(echo ${rpath} | sed "s@${PER_PACKAGE_DIR}/[^/]\+/host@${HOST_DIR}@")
> -        if test "${rpath}" != "${changed_rpath}" ; then
> -            ${PATCHELF} --set-rpath ${changed_rpath} "${file}"
> -        fi
> -
> -        # call patchelf to sanitize the rpath
> -        ${PATCHELF} --make-rpath-relative "${rootdir}" ${sanitize_extra_args[@]} "${file}"
> -        # restore the original permission
> -        test "${changed}" != "" && chmod u-w "${file}"
> -    done < <(find "${rootdir}" ${find_args[@]})
> +    find_args+=( "-type" "f" "-print0" )
> +
> +    export -f patch_file
> +    # Limit the number of cores used
> +    procs=$(echo -e "$(($(nproc)-2)) \n 1" | sort -n | tail -n1)
> +    find "${rootdir}" ${find_args[@]} | xargs -0 -r -P ${procs} -I {} bash -c "patch_file '${PATCHELF}' '${rootdir}' '${sanitize_extra_args}' $@" _ {}
>   

I think we should use PARALLEL_JOBS here to comply with the 
user-requested BR2_JLEVEL (the variable is assigned at the top of 
package/Makefile.in). It is used for all packages and also rootfs 
compression algorithms, so it would make sense to me to have this 
fix-rpath script respect it too.

I assume in order to pass this makefile option to the script, we'll need 
to add PARALLEL_JOBS=$(PARALLEL_JOBS) in front of each fix-rpath script? 
Similar to what's been done for PER_PACKAGE_DIR in Makefile.

I can suggest the following (to apply on top of the current patch and 
squash if that works for you):
```
diff --git a/Makefile b/Makefile
index 7c1c07a2e4..c171aa94de 100644
--- a/Makefile
+++ b/Makefile
@@ -593,8 +593,8 @@ world: target-post-image
  .PHONY: prepare-sdk
  prepare-sdk: world
  	@$(call MESSAGE,"Rendering the SDK relocatable")
-	PER_PACKAGE_DIR=$(PER_PACKAGE_DIR) $(TOPDIR)/support/scripts/fix-rpath 
host
-	PER_PACKAGE_DIR=$(PER_PACKAGE_DIR) $(TOPDIR)/support/scripts/fix-rpath 
staging
+	PARALLEL_JOBS=$(PARALLEL_JOBS) PER_PACKAGE_DIR=$(PER_PACKAGE_DIR) 
$(TOPDIR)/support/scripts/fix-rpath host
+	PARALLEL_JOBS=$(PARALLEL_JOBS) PER_PACKAGE_DIR=$(PER_PACKAGE_DIR) 
$(TOPDIR)/support/scripts/fix-rpath staging
  	$(INSTALL) -m 755 $(TOPDIR)/support/misc/relocate-sdk.sh 
$(HOST_DIR)/relocate-sdk.sh
  	mkdir -p $(HOST_DIR)/share/buildroot
  	echo $(HOST_DIR) > $(HOST_DIR)/share/buildroot/sdk-location
@@ -764,7 +764,7 @@ endif
  	ln -sf ../usr/lib/os-release $(TARGET_DIR)/etc

  	@$(call MESSAGE,"Sanitizing RPATH in target tree")
-	PER_PACKAGE_DIR=$(PER_PACKAGE_DIR) $(TOPDIR)/support/scripts/fix-rpath 
target
+	PARALLEL_JOBS=$(PARALLEL_JOBS) PER_PACKAGE_DIR=$(PER_PACKAGE_DIR) 
$(TOPDIR)/support/scripts/fix-rpath target

  # For a merged /usr, ensure that /lib, /bin and /sbin and their /usr
  # counterparts are appropriately setup as symlinks ones to the others.
diff --git a/support/scripts/fix-rpath b/support/scripts/fix-rpath
index cef7a48e76..6160efd461 100755
--- a/support/scripts/fix-rpath
+++ b/support/scripts/fix-rpath
@@ -158,9 +158,7 @@ main() {
      find_args+=( "-type" "f" "-print0" )

      export -f patch_file
-    # Limit the number of cores used
-    procs=$(echo -e "$(($(nproc)-2)) \n 1" | sort -n | tail -n1)
-    find "${rootdir}" ${find_args[@]} | xargs -0 -r -P ${procs} -I {} 
bash -c "patch_file '${PATCHELF}' '${rootdir}' '${sanitize_extra_args}' 
$@" _ {}
+    find "${rootdir}" ${find_args[@]} | xargs -0 -r -P ${PARALLEL_JOBS} 
-I {} bash -c "patch_file '${PATCHELF}' '${rootdir}' 
'${sanitize_extra_args}' $@" _ {}

      # Restore patched patchelf utility
      test "${tree}" = "host" && mv "${PATCHELF}.__to_be_patched" 
"${PATCHELF}"

```

I tested on a slow-ish laptop (Intel(R) Core(TM) i7-10510U CPU @ 1.80GHz 
4c/8t) with:
make raspberrypi4_defconfig
sudo sh -c "echo 3 > /proc/sys/vm/drop_caches"
time make target-finalize
# apply patch + diff above
sudo sh -c "echo 3 > /proc/sys/vm/drop_caches"
time make target-finalize

Before:
make target-finalize  8.23s user 3.52s system 91% cpu 12.851 total
After:
make target-finalize  9.25s user 4.81s system 158% cpu 8.892 total

Cheers,
Quentin
David Laight Nov. 15, 2022, 8:47 a.m. UTC | #2
From: Victor Dumas
> Sent: 20 October 2022 12:55
> 
> Using "xargs" instead of "while read" loop allows for the patching of files to be parallelized
> This significantly reduces the amount of time it takes to fix all the paths.
> On a larger RFS(~300MB) this script was taking 5 minutes, it now only takes about 30s on a 12 core
> machine
...
> +    # Limit the number of cores used
> +    procs=$(echo -e "$(($(nproc)-2)) \n 1" | sort -n | tail -n1)
> +    find "${rootdir}" ${find_args[@]} | xargs -0 -r -P ${procs} -I {} bash -c "patch_file
> '${PATCHELF}' '${rootdir}' '${sanitize_extra_args}' $@" _ {}

Are you sure that all versions of xargs the script needs to
run on support -P ?

Having xargs run 'bash -c "command" ...' and the quoting also looks odd.
I presume there is some reason xargs can't just run patrch_file.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
Quentin Schulz Nov. 15, 2022, 9:24 a.m. UTC | #3
Hi David,

On 11/15/22 09:47, David Laight wrote:
> From: Victor Dumas
>> Sent: 20 October 2022 12:55
>>
>> Using "xargs" instead of "while read" loop allows for the patching of files to be parallelized
>> This significantly reduces the amount of time it takes to fix all the paths.
>> On a larger RFS(~300MB) this script was taking 5 minutes, it now only takes about 30s on a 12 core
>> machine
> ...
>> +    # Limit the number of cores used
>> +    procs=$(echo -e "$(($(nproc)-2)) \n 1" | sort -n | tail -n1)
>> +    find "${rootdir}" ${find_args[@]} | xargs -0 -r -P ${procs} -I {} bash -c "patch_file
>> '${PATCHELF}' '${rootdir}' '${sanitize_extra_args}' $@" _ {}
> 
> Are you sure that all versions of xargs the script needs to
> run on support -P ?
> 

--max-procs was part of the initial commit of xargs in findutils in 
1996, c.f.:
https://git.savannah.gnu.org/cgit/findutils.git/commit/?id=c030b5ee33bbec3c93cddc3ca9ebec14c24dbe07

It is also supported in its initial commit in Busybox:
https://git.busybox.net/busybox/commit/?id=92a61c1206572f4a6e55baa24e7cdd4f180d4b64

The only question is about BSD support (since pkgs.org returns that all 
other distros are using find-utils/busybox basically) since they are 
using something else (toybox, heirloom, ...?).

Do we actually have a list of operating systems that are "officially" 
supported?

> Having xargs run 'bash -c "command" ...' and the quoting also looks odd.
> I presume there is some reason xargs can't just run patrch_file.
> 

IIRC, you can't call a shell function from xargs directly.

Cheers,
Quentin

> 	David
> 
> -
> Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
> Registration No: 1397386 (Wales)
> 
> _______________________________________________
> buildroot mailing list
> buildroot@buildroot.org
> https://urldefense.com/v3/__https://lists.buildroot.org/mailman/listinfo/buildroot__;!!OOPJP91ZZw!mrCuxJUbX9qAxmtZDc6047Tt9_CuDKxXgPQu86pqCMlg_9auvbbJCmj2U4UdX_OnaO0U9ecmUu0-uHnkJjFiEXoSX0FyA8U_YtvT0A$
Gleb Mazovetskiy Nov. 17, 2022, 1:21 p.m. UTC | #4
Rather than doing it via a shell script and xargs -P, consider doing
parallelization in a submake, similar to how we handle parallel glibc
locale generation:
https://github.com/buildroot/buildroot/blob/master/support/misc/gen-glibc-locales.mk
https://github.com/buildroot/buildroot/blob/bc9b716296f37e0e3e47fd34c8991e92b6baeebd/Makefile#L655-L656

This approach will reuse the make job server.

You'd need to assign the results of the find to a variable and then create
a target for each of the files using something like

$(foreach file,$(FILES),$(eval $(call make-fix-rpath-target,$(file))))


On Tue, Nov 15, 2022 at 9:24 AM Quentin Schulz via buildroot <
buildroot@buildroot.org> wrote:

> Hi David,
>
> On 11/15/22 09:47, David Laight wrote:
> > From: Victor Dumas
> >> Sent: 20 October 2022 12:55
> >>
> >> Using "xargs" instead of "while read" loop allows for the patching of
> files to be parallelized
> >> This significantly reduces the amount of time it takes to fix all the
> paths.
> >> On a larger RFS(~300MB) this script was taking 5 minutes, it now only
> takes about 30s on a 12 core
> >> machine
> > ...
> >> +    # Limit the number of cores used
> >> +    procs=$(echo -e "$(($(nproc)-2)) \n 1" | sort -n | tail -n1)
> >> +    find "${rootdir}" ${find_args[@]} | xargs -0 -r -P ${procs} -I {}
> bash -c "patch_file
> >> '${PATCHELF}' '${rootdir}' '${sanitize_extra_args}' $@" _ {}
> >
> > Are you sure that all versions of xargs the script needs to
> > run on support -P ?
> >
>
> --max-procs was part of the initial commit of xargs in findutils in
> 1996, c.f.:
>
> https://git.savannah.gnu.org/cgit/findutils.git/commit/?id=c030b5ee33bbec3c93cddc3ca9ebec14c24dbe07
>
> It is also supported in its initial commit in Busybox:
>
> https://git.busybox.net/busybox/commit/?id=92a61c1206572f4a6e55baa24e7cdd4f180d4b64
>
> The only question is about BSD support (since pkgs.org returns that all
> other distros are using find-utils/busybox basically) since they are
> using something else (toybox, heirloom, ...?).
>
> Do we actually have a list of operating systems that are "officially"
> supported?
>
> > Having xargs run 'bash -c "command" ...' and the quoting also looks odd.
> > I presume there is some reason xargs can't just run patrch_file.
> >
>
> IIRC, you can't call a shell function from xargs directly.
>
> Cheers,
> Quentin
>
> >       David
> >
> > -
> > Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes,
> MK1 1PT, UK
> > Registration No: 1397386 (Wales)
> >
> > _______________________________________________
> > buildroot mailing list
> > buildroot@buildroot.org
> >
> https://urldefense.com/v3/__https://lists.buildroot.org/mailman/listinfo/buildroot__;!!OOPJP91ZZw!mrCuxJUbX9qAxmtZDc6047Tt9_CuDKxXgPQu86pqCMlg_9auvbbJCmj2U4UdX_OnaO0U9ecmUu0-uHnkJjFiEXoSX0FyA8U_YtvT0A$
> _______________________________________________
> buildroot mailing list
> buildroot@buildroot.org
> https://lists.buildroot.org/mailman/listinfo/buildroot
>
diff mbox series

Patch

diff --git a/support/scripts/fix-rpath b/support/scripts/fix-rpath
index 3e67e770e5..cef7a48e76 100755
--- a/support/scripts/fix-rpath
+++ b/support/scripts/fix-rpath
@@ -58,6 +58,38 @@  HOST_EXCLUDEPATHS="/share/terminfo"
 STAGING_EXCLUDEPATHS="/usr/include /usr/share/terminfo"
 TARGET_EXCLUDEPATHS="/lib/firmware"
 
+patch_file() {
+    PATCHELF="${1}"
+    rootdir="${2}"
+    sanitize_extra_args="${3}"
+    file="${4}"
+
+    # check if it's an ELF file
+    rpath=$(${PATCHELF} --print-rpath "${file}" 2>&1)
+    if test $? -ne 0 ; then
+        return 0
+    fi
+
+    # make files writable if necessary
+    changed=$(chmod -c u+w "${file}")
+
+    # With per-package directory support, most RPATH of host
+    # binaries will point to per-package directories. This won't
+    # work with the --make-rpath-relative ${rootdir} invocation as
+    # the per-package host directory is not within ${rootdir}. So,
+    # we rewrite all RPATHs pointing to per-package directories so
+    # that they point to the global host directry.
+    changed_rpath=$(echo ${rpath} | sed "s@${PER_PACKAGE_DIR}/[^/]\+/host@${HOST_DIR}@")
+    if test "${rpath}" != "${changed_rpath}" ; then
+        ${PATCHELF} --set-rpath ${changed_rpath} "${file}"
+    fi
+
+    # call patchelf to sanitize the rpath
+    ${PATCHELF} --make-rpath-relative "${rootdir}" ${sanitize_extra_args[@]} "${file}"
+    # restore the original permission
+    test "${changed}" != "" && chmod u-w "${file}"
+}
+
 main() {
     local rootdir
     local tree="${1}"
@@ -123,34 +155,12 @@  main() {
             ;;
     esac
 
-    find_args+=( "-type" "f" "-print" )
-
-    while read file ; do
-        # check if it's an ELF file
-        rpath=$(${PATCHELF} --print-rpath "${file}" 2>&1)
-        if test $? -ne 0 ; then
-            continue
-        fi
-
-        # make files writable if necessary
-        changed=$(chmod -c u+w "${file}")
-
-        # With per-package directory support, most RPATH of host
-        # binaries will point to per-package directories. This won't
-        # work with the --make-rpath-relative ${rootdir} invocation as
-        # the per-package host directory is not within ${rootdir}. So,
-        # we rewrite all RPATHs pointing to per-package directories so
-        # that they point to the global host directry.
-        changed_rpath=$(echo ${rpath} | sed "s@${PER_PACKAGE_DIR}/[^/]\+/host@${HOST_DIR}@")
-        if test "${rpath}" != "${changed_rpath}" ; then
-            ${PATCHELF} --set-rpath ${changed_rpath} "${file}"
-        fi
-
-        # call patchelf to sanitize the rpath
-        ${PATCHELF} --make-rpath-relative "${rootdir}" ${sanitize_extra_args[@]} "${file}"
-        # restore the original permission
-        test "${changed}" != "" && chmod u-w "${file}"
-    done < <(find "${rootdir}" ${find_args[@]})
+    find_args+=( "-type" "f" "-print0" )
+
+    export -f patch_file
+    # Limit the number of cores used
+    procs=$(echo -e "$(($(nproc)-2)) \n 1" | sort -n | tail -n1)
+    find "${rootdir}" ${find_args[@]} | xargs -0 -r -P ${procs} -I {} bash -c "patch_file '${PATCHELF}' '${rootdir}' '${sanitize_extra_args}' $@" _ {}
 
     # Restore patched patchelf utility
     test "${tree}" = "host" && mv "${PATCHELF}.__to_be_patched" "${PATCHELF}"