diff mbox series

[1/3] base-files: sysupgrade: add tar.sh with helpers for building archives

Message ID 20240226141413.5570-1-zajec5@gmail.com
State Changes Requested
Delegated to: Rafał Miłecki
Headers show
Series [1/3] base-files: sysupgrade: add tar.sh with helpers for building archives | expand

Commit Message

Rafał Miłecki Feb. 26, 2024, 2:14 p.m. UTC
From: Jo-Philipp Wich <jo@mein.io>

This allows building uncompressed tar archives from shell scripts (and
compressing them later if needed)

Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
---
 package/base-files/files/lib/upgrade/tar.sh | 84 +++++++++++++++++++++
 1 file changed, 84 insertions(+)
 create mode 100644 package/base-files/files/lib/upgrade/tar.sh

Comments

Paul D Feb. 26, 2024, 9:27 p.m. UTC | #1
What tar standard are you aiming to adhere to?



On 2024-02-26 15:14, Rafał Miłecki wrote:
> From: Jo-Philipp Wich <jo@mein.io>
> 
> This allows building uncompressed tar archives from shell scripts (and
> compressing them later if needed)
> 
> Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
> ---
>   package/base-files/files/lib/upgrade/tar.sh | 84 +++++++++++++++++++++
>   1 file changed, 84 insertions(+)
>   create mode 100644 package/base-files/files/lib/upgrade/tar.sh
> 
> diff --git a/package/base-files/files/lib/upgrade/tar.sh b/package/base-files/files/lib/upgrade/tar.sh
> new file mode 100644
> index 0000000000..00057dd760
> --- /dev/null
> +++ b/package/base-files/files/lib/upgrade/tar.sh
> @@ -0,0 +1,84 @@

No shebang?

> +# SPDX-License-Identifier: GPL-2.0-or-later OR MIT
> +
> +__tar_print_padding() {
> +	[ $1 -eq 0 ] || dd if=/dev/zero bs=$1 count=1 2>/dev/null
> +}
> +
> +__tar_make_member() {
> +	local name="$1"
> +	local content="$2"
> +	local username="$3"
> +	local groupname="$4"
> +	local mtime="$5"
> +	local mode=644
> +	local uid=0
> +	local gid=0
> +	local size=${#content}
> +	local type=0
> +	local link=""
> +

recommend that they're ordered here same as struct order:

struct posix_header
{                              /* byte offset */
   char name[100];               /*   0 */
   char mode[8];                 /* 100 */
   char uid[8];                  /* 108 */
   char gid[8];                  /* 116 */
   char size[12];                /* 124 */
   char mtime[12];               /* 136 */
   char chksum[8];               /* 148 */
   char typeflag;                /* 156 */
   char linkname[100];           /* 157 */
   char magic[6];                /* 257 */
   char version[2];              /* 263 */
   char uname[32];               /* 265 */
   char gname[32];               /* 297 */
   char devmajor[8];             /* 329 */
   char devminor[8];             /* 337 */
   char prefix[155];             /* 345 */
                                 /* 500 */
};


> +	# 100 byte of padding bytes, using 0x01 since the shell does not tolate null bytes in strings

tolerate?


> +	local pad=$'\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1'
> +

maybe try:

local pad=$(printf '\1%.0s' $(seq 100))

> +	# validate name
> +	if [ "${name:0:1}" = "/" ]; then
> +		name="${name:1}"
> +	fi

One liner:

name=${name#/}

> +
> +	# truncate string header values to their maximum length
> +	name=${name:0:100}
> +	link=${link:0:100}
> +	username=${username:0:32}
> +	groupname=${groupname:0:32}
> +
> +	# construct header part before checksum field
> +	local header1="${name}${pad:0:$((100 - ${#name}))}"
> +	header1="${header1}$(printf '%07d\1' $mode)"
> +	header1="${header1}$(printf '%07o\1' $uid)"
> +	header1="${header1}$(printf '%07o\1' $gid)"
> +	header1="${header1}$(printf '%011o\1' $size)"
> +	header1="${header1}$(printf '%011o\1' $mtime)"
> +
> +	# construct header part after checksum field
> +	local header2="$(printf '%d' $type)"
> +	header2="${header2}${link}${pad:0:$((100 - ${#link}))}"
> +	header2="${header2}ustar  ${pad:0:1}"
> +	header2="${header2}${username}${pad:0:$((32 - ${#username}))}"
> +	header2="${header2}${groupname}${pad:0:$((32 - ${#groupname}))}"
> +
> +	# calculate checksum over header fields
> +	local checksum=0
> +	for byte in $(printf '%s%8s%s' "$header1" "" "$header2" | tr '\1' '\0' | hexdump -ve '1/1 "%u "'); do
> +		checksum=$((checksum + byte))
> +	done
> +
> +	# print member header, padded to 512 byte
> +	printf '%s%06o\0 %s' "$header1" $checksum "$header2" | tr '\1' '\0'
> +	__tar_print_padding 183
> +
> +	# print content data, padded to multiple of 512 byte
> +	printf "%s" "$content"
> +	__tar_print_padding $((512 - (size % 512)))
> +}
> +
> +tar_make_member_from_file() {
> +	local name="$1"
> +	local username="$(ls -l "$1" | tr -s ' ' | cut -d ' ' -f 3)"
> +	local groupname="$(ls -l "$1" | tr -s ' ' | cut -d ' ' -f 4)"
> +
> +	__tar_make_member "$name" "$(cat $name)" "$username" "$groupname" "$(date +%s -r "$1")"
> +}
> +
> +tar_make_member_inline() {
> +	local name="$1"
> +	local content="$2"
> +	local username="${3:-root}"
> +	local groupname="${4:-root}"
> +	local mtime="${5:-$(date +%s)}"
> +
> +	__tar_make_member "$name" "$content" "$username" "$groupname" "$mtime"
> +}
> +
> +tar_close() {
> +	__tar_print_padding 1024
> +}
Jo-Philipp Wich Feb. 27, 2024, 9:41 p.m. UTC | #2
Hi Rafał,

thanks for taking are of this. Please find some comments below.

Am 2/26/24 um 15:14 schrieb Rafał Miłecki:
> From: Jo-Philipp Wich <jo@mein.io>
> 
> This allows building uncompressed tar archives from shell scripts (and
> compressing them later if needed)
> 
> Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
> ---
>   package/base-files/files/lib/upgrade/tar.sh | 84 +++++++++++++++++++++
>   1 file changed, 84 insertions(+)
>   create mode 100644 package/base-files/files/lib/upgrade/tar.sh
> 
> diff --git a/package/base-files/files/lib/upgrade/tar.sh b/package/base-files/files/lib/upgrade/tar.sh
> new file mode 100644
> index 0000000000..00057dd760
> --- /dev/null
> +++ b/package/base-files/files/lib/upgrade/tar.sh
> @@ -0,0 +1,84 @@
> +# SPDX-License-Identifier: GPL-2.0-or-later OR MIT
> +
> +__tar_print_padding() {
> +	[ $1 -eq 0 ] || dd if=/dev/zero bs=$1 count=1 2>/dev/null
> +}
> +
> +__tar_make_member() {
> +	local name="$1"
> +	local content="$2"
> +	local username="$3"
> +	local groupname="$4"
> +	local mtime="$5"
> +	local mode=644

I think the uid and gid values should correspond to the given username 
and groupname values. Something like this would probably work:

local uid=$(id -u "$username")
local gid=$(sed -rne "s#^$groupname:[^:]*:([0-9]+):.*\$#\1#p" /etc/group)

> +	local uid=0
> +	local gid=0
> +	local size=${#content}
> +	local type=0
> +	local link=""
> +
> +	# 100 byte of padding bytes, using 0x01 since the shell does not tolate null bytes in strings
> +	local pad=$'\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1'
> +
> +	# validate name
> +	if [ "${name:0:1}" = "/" ]; then
> +		name="${name:1}"
> +	fi
> +
> +	# truncate string header values to their maximum length
> +	name=${name:0:100}
> +	link=${link:0:100}
> +	username=${username:0:32}
> +	groupname=${groupname:0:32}
> +
> +	# construct header part before checksum field
> +	local header1="${name}${pad:0:$((100 - ${#name}))}"
> +	header1="${header1}$(printf '%07d\1' $mode)"
> +	header1="${header1}$(printf '%07o\1' $uid)"
> +	header1="${header1}$(printf '%07o\1' $gid)"
> +	header1="${header1}$(printf '%011o\1' $size)"
> +	header1="${header1}$(printf '%011o\1' $mtime)"
> +
> +	# construct header part after checksum field
> +	local header2="$(printf '%d' $type)"
> +	header2="${header2}${link}${pad:0:$((100 - ${#link}))}"
> +	header2="${header2}ustar  ${pad:0:1}"
> +	header2="${header2}${username}${pad:0:$((32 - ${#username}))}"
> +	header2="${header2}${groupname}${pad:0:$((32 - ${#groupname}))}"
> +
> +	# calculate checksum over header fields
> +	local checksum=0
> +	for byte in $(printf '%s%8s%s' "$header1" "" "$header2" | tr '\1' '\0' | hexdump -ve '1/1 "%u "'); do
> +		checksum=$((checksum + byte))
> +	done
> +
> +	# print member header, padded to 512 byte
> +	printf '%s%06o\0 %s' "$header1" $checksum "$header2" | tr '\1' '\0'
> +	__tar_print_padding 183

I checked and noticed that `dd` accepts a `count` value of 0, so we can 
inline `__tar_print_padding()` (whose sole purpose was the != 0 check) 
and get rid of the extra function:

dd if=/dev/zero bs=183 count=1 2>/dev/null

> +
> +	# print content data, padded to multiple of 512 byte
> +	printf "%s" "$content"
> +	__tar_print_padding $((512 - (size % 512)))

Inline this `__tar_print_padding()` as (count may be zero):

dd if=/dev/zero bs=1 count=$((512 - (size % 512))) 2>/dev/null

> +}
> +
> +tar_make_member_from_file() {
> +	local name="$1"
> +	local username="$(ls -l "$1" | tr -s ' ' | cut -d ' ' -f 3)"
> +	local groupname="$(ls -l "$1" | tr -s ' ' | cut -d ' ' -f 4)"
> +
> +	__tar_make_member "$name" "$(cat $name)" "$username" "$groupname" "$(date +%s -r "$1")"
> +}
> +
> +tar_make_member_inline() {
> +	local name="$1"
> +	local content="$2"
> +	local username="${3:-root}"
> +	local groupname="${4:-root}"
> +	local mtime="${5:-$(date +%s)}"
> +
> +	__tar_make_member "$name" "$content" "$username" "$groupname" "$mtime"
> +}
> +
> +tar_close() {
> +	__tar_print_padding 1024

Inline this `__tar_print_padding()` as:

dd if=/dev/zero bs=1024 count=1 2>/dev/null

> +}


~ Jo
Eric Feb. 27, 2024, 11:46 p.m. UTC | #3
The sender domain has a DMARC Reject/Quarantine policy which disallows
sending mailing list messages using the original "From" header.

To mitigate this problem, the original message has been wrapped
automatically by the mailing list software.
On Tuesday, February 27th, 2024 at 13:41, Jo-Philipp Wich <jo@mein.io> wrote:
> I think the uid and gid values should correspond to the given username
> and groupname values. Something like this would probably work:
> 
> local uid=$(id -u "$username")
> local gid=$(sed -rne "s#^$groupname:[^:]:([0-9]+):.\$#\1#p" /etc/group)

Does this do the same thing?

local gid=$(uid -g "$groupname")

Eric
Eric Feb. 28, 2024, 12:11 a.m. UTC | #4
The sender domain has a DMARC Reject/Quarantine policy which disallows
sending mailing list messages using the original "From" header.

To mitigate this problem, the original message has been wrapped
automatically by the mailing list software.
Sent with Proton Mail secure email.

On Tuesday, February 27th, 2024 at 15:46, Eric <evil.function@proton.me> wrote:

> On Tuesday, February 27th, 2024 at 13:41, Jo-Philipp Wich jo@mein.io wrote:
> 
> > I think the uid and gid values should correspond to the given username
> > and groupname values. Something like this would probably work:
> > 
> > local uid=$(id -u "$username")
> > local gid=$(sed -rne "s#^$groupname:[^:]:([0-9]+):.\$#\1#p" /etc/group)
> 
> 
> Does this do the same thing?
> 
> local gid=$(uid -g "$groupname")

OOPS typo, 'id -g ...'
Rafał Miłecki Feb. 28, 2024, 7:29 a.m. UTC | #5
On 26.02.2024 22:27, Paul D wrote:
>> diff --git a/package/base-files/files/lib/upgrade/tar.sh b/package/base-files/files/lib/upgrade/tar.sh
>> new file mode 100644
>> index 0000000000..00057dd760
>> --- /dev/null
>> +++ b/package/base-files/files/lib/upgrade/tar.sh
>> @@ -0,0 +1,84 @@
> 
> No shebang?

Files /lib/upgrade/*.sh are for including.


>> +# SPDX-License-Identifier: GPL-2.0-or-later OR MIT
>> +
>> +__tar_print_padding() {
>> +    [ $1 -eq 0 ] || dd if=/dev/zero bs=$1 count=1 2>/dev/null
>> +}
>> +
>> +__tar_make_member() {
>> +    local name="$1"
>> +    local content="$2"
>> +    local username="$3"
>> +    local groupname="$4"
>> +    local mtime="$5"
>> +    local mode=644
>> +    local uid=0
>> +    local gid=0
>> +    local size=${#content}
>> +    local type=0
>> +    local link=""
>> +
> 
> recommend that they're ordered here same as struct order:
> 
> struct posix_header
> {                              /* byte offset */
>    char name[100];               /*   0 */
>    char mode[8];                 /* 100 */
>    char uid[8];                  /* 108 */
>    char gid[8];                  /* 116 */
>    char size[12];                /* 124 */
>    char mtime[12];               /* 136 */
>    char chksum[8];               /* 148 */
>    char typeflag;                /* 156 */
>    char linkname[100];           /* 157 */
>    char magic[6];                /* 257 */
>    char version[2];              /* 263 */
>    char uname[32];               /* 265 */
>    char gname[32];               /* 297 */
>    char devmajor[8];             /* 329 */
>    char devminor[8];             /* 337 */
>    char prefix[155];             /* 345 */
>                                  /* 500 */
> };

I'm not sure about it. In the first place I want code to be easy to
understand and maintain. Using some non-natural order (like messing with
order of argument variables and local variables) will be confusing.


>> +    local pad=$'\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1'
>> +
> 
> maybe try:
> 
> local pad=$(printf '\1%.0s' $(seq 100))

They produce the same result I believe:

# echo $'\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1' | md5sum
59c89925a4ef5bee948db1ec5dc9a4c4  -

# echo $(printf '\1%.0s' $(seq 100)) | md5sum
59c89925a4ef5bee948db1ec5dc9a4c4  -

The first is longer however it avoids two subshell executions. I don't
think there's a single winner here.
diff mbox series

Patch

diff --git a/package/base-files/files/lib/upgrade/tar.sh b/package/base-files/files/lib/upgrade/tar.sh
new file mode 100644
index 0000000000..00057dd760
--- /dev/null
+++ b/package/base-files/files/lib/upgrade/tar.sh
@@ -0,0 +1,84 @@ 
+# SPDX-License-Identifier: GPL-2.0-or-later OR MIT
+
+__tar_print_padding() {
+	[ $1 -eq 0 ] || dd if=/dev/zero bs=$1 count=1 2>/dev/null
+}
+
+__tar_make_member() {
+	local name="$1"
+	local content="$2"
+	local username="$3"
+	local groupname="$4"
+	local mtime="$5"
+	local mode=644
+	local uid=0
+	local gid=0
+	local size=${#content}
+	local type=0
+	local link=""
+
+	# 100 byte of padding bytes, using 0x01 since the shell does not tolate null bytes in strings
+	local pad=$'\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1'
+
+	# validate name
+	if [ "${name:0:1}" = "/" ]; then
+		name="${name:1}"
+	fi
+
+	# truncate string header values to their maximum length
+	name=${name:0:100}
+	link=${link:0:100}
+	username=${username:0:32}
+	groupname=${groupname:0:32}
+
+	# construct header part before checksum field
+	local header1="${name}${pad:0:$((100 - ${#name}))}"
+	header1="${header1}$(printf '%07d\1' $mode)"
+	header1="${header1}$(printf '%07o\1' $uid)"
+	header1="${header1}$(printf '%07o\1' $gid)"
+	header1="${header1}$(printf '%011o\1' $size)"
+	header1="${header1}$(printf '%011o\1' $mtime)"
+
+	# construct header part after checksum field
+	local header2="$(printf '%d' $type)"
+	header2="${header2}${link}${pad:0:$((100 - ${#link}))}"
+	header2="${header2}ustar  ${pad:0:1}"
+	header2="${header2}${username}${pad:0:$((32 - ${#username}))}"
+	header2="${header2}${groupname}${pad:0:$((32 - ${#groupname}))}"
+
+	# calculate checksum over header fields
+	local checksum=0
+	for byte in $(printf '%s%8s%s' "$header1" "" "$header2" | tr '\1' '\0' | hexdump -ve '1/1 "%u "'); do
+		checksum=$((checksum + byte))
+	done
+
+	# print member header, padded to 512 byte
+	printf '%s%06o\0 %s' "$header1" $checksum "$header2" | tr '\1' '\0'
+	__tar_print_padding 183
+
+	# print content data, padded to multiple of 512 byte
+	printf "%s" "$content"
+	__tar_print_padding $((512 - (size % 512)))
+}
+
+tar_make_member_from_file() {
+	local name="$1"
+	local username="$(ls -l "$1" | tr -s ' ' | cut -d ' ' -f 3)"
+	local groupname="$(ls -l "$1" | tr -s ' ' | cut -d ' ' -f 4)"
+
+	__tar_make_member "$name" "$(cat $name)" "$username" "$groupname" "$(date +%s -r "$1")"
+}
+
+tar_make_member_inline() {
+	local name="$1"
+	local content="$2"
+	local username="${3:-root}"
+	local groupname="${4:-root}"
+	local mtime="${5:-$(date +%s)}"
+
+	__tar_make_member "$name" "$content" "$username" "$groupname" "$mtime"
+}
+
+tar_close() {
+	__tar_print_padding 1024
+}