diff mbox series

[v9,2/5] Add script to generate arch(s) dependant syscalls

Message ID 20241101-generate_syscalls-v9-2-d2c3820b0323@suse.com
State Accepted
Headers show
Series Automatically generate syscalls.h | expand

Commit Message

Andrea Cervesato Nov. 1, 2024, 10:48 a.m. UTC
From: Andrea Cervesato <andrea.cervesato@suse.com>

Add generate_arch.sh script which can be used to generate arch(s)
dependant syscalls file. The way it works is pretty simple: for each
architecture defined into supported-arch.txt, compile kernel headers,
extract the list of syscalls and generate a .in file containing all of
them, associated with their own syscall's number.
The way syscalls files are generated, passes through a C application
which is automatically checking the availability of the syscalls in
the user space environment.

Reviewed-by: Li Wang <liwang@redhat.com>
Reviewed-by: Cyril Hrubis <chrubis@suse.cz>
Reviewed-by: Petr Vorel <pvorel@suse.cz>
Signed-off-by: Andrea Cervesato <andrea.cervesato@suse.com>
---
 include/lapi/syscalls/generate_arch.sh | 213 +++++++++++++++++++++++++++++++++
 1 file changed, 213 insertions(+)

Comments

Petr Vorel Nov. 1, 2024, 11:30 a.m. UTC | #1
Hi Andrea,

...
> +		for syscall in $(cat ${TEMP}/syscall-names.txt); do
> +			printf "
> +		#ifdef __NR_$syscall
> +			printf(\"$syscall %%d"
> +			# i know the following print is ugly, but dash and bash
> +			# treat double quoted strings in a different way and we
> +			# really need to inject '\n' character in the C code
> +			# rather than carriage return
> +			printf '\\n'
> +			printf "\", __NR_$syscall);

How about using heredocs? IMHO more compatible way of echo/printf "" for new
lines:
cat << EOF
...
EOF

I should have noted that at previous version.
Hint: looking into tst_test.sh can sometimes help.

Also there is echo/printf '' variant:
cat << 'EOF'
EOF
https://unix.stackexchange.com/questions/462593/how-to-escape-a-character-in-a-heredoc-bash-script/462595#462595

There is also ~EOF - ignore leading whitespace, even for terminating EOF.

Kind regards,
Petr
Andrea Cervesato Nov. 1, 2024, 1:23 p.m. UTC | #2
Hi,

On 11/1/24 12:30, Petr Vorel wrote:
> Hi Andrea,
>
> ...
>> +		for syscall in $(cat ${TEMP}/syscall-names.txt); do
>> +			printf "
>> +		#ifdef __NR_$syscall
>> +			printf(\"$syscall %%d"
>> +			# i know the following print is ugly, but dash and bash
>> +			# treat double quoted strings in a different way and we
>> +			# really need to inject '\n' character in the C code
>> +			# rather than carriage return
>> +			printf '\\n'
>> +			printf "\", __NR_$syscall);
> How about using heredocs? IMHO more compatible way of echo/printf "" for new
> lines:
> cat << EOF
> ...
> EOF
>
> I should have noted that at previous version.
> Hint: looking into tst_test.sh can sometimes help.
>
> Also there is echo/printf '' variant:
> cat << 'EOF'
> EOF
> https://unix.stackexchange.com/questions/462593/how-to-escape-a-character-in-a-heredoc-bash-script/462595#462595
>
> There is also ~EOF - ignore leading whitespace, even for terminating EOF.
>
> Kind regards,
> Petr

This method is so ugly: basically the ending EOF must be in the first 
column of the next line, otherwise it won't be parsed. And this breaks 
readability of course (probably some debugging). I really would avoid 
this way..

Andrea
Petr Vorel Nov. 1, 2024, 5:02 p.m. UTC | #3
> Hi,

> On 11/1/24 12:30, Petr Vorel wrote:
> > Hi Andrea,

> > ...
> > > +		for syscall in $(cat ${TEMP}/syscall-names.txt); do
> > > +			printf "
> > > +		#ifdef __NR_$syscall
> > > +			printf(\"$syscall %%d"
> > > +			# i know the following print is ugly, but dash and bash
> > > +			# treat double quoted strings in a different way and we
> > > +			# really need to inject '\n' character in the C code
> > > +			# rather than carriage return
> > > +			printf '\\n'
> > > +			printf "\", __NR_$syscall);
> > How about using heredocs? IMHO more compatible way of echo/printf "" for new
> > lines:
> > cat << EOF
> > ...
> > EOF

> > I should have noted that at previous version.
> > Hint: looking into tst_test.sh can sometimes help.

> > Also there is echo/printf '' variant:
> > cat << 'EOF'
> > EOF
> > https://unix.stackexchange.com/questions/462593/how-to-escape-a-character-in-a-heredoc-bash-script/462595#462595

> > There is also ~EOF - ignore leading whitespace, even for terminating EOF.

> > Kind regards,
> > Petr

> This method is so ugly: basically the ending EOF must be in the first column
> of the next line, otherwise it won't be parsed. And this breaks readability
> of course (probably some debugging). I really would avoid this way..

I would say EOF if quite standard way of using shell + it allows you to use \n
unescaped:

syscall="XXX"
cat > foo.txt <<EOF
	printf(\"$syscall %%d"
	printf '\n'
EOF

prints into foo.txt:
	printf(\"XXX %%d"
	printf '\n'

But sure, it's just a suggestion based your comment.

Kind regards,
Petr

> Andrea
diff mbox series

Patch

diff --git a/include/lapi/syscalls/generate_arch.sh b/include/lapi/syscalls/generate_arch.sh
new file mode 100755
index 0000000000000000000000000000000000000000..5d731794d6492a76c3ba39ac0117b60d8a374740
--- /dev/null
+++ b/include/lapi/syscalls/generate_arch.sh
@@ -0,0 +1,213 @@ 
+#!/bin/sh -eu
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (c) Linux Test Project, 2009-2024
+# Copyright (c) Marcin Juszkiewicz, 2023-2024
+#
+# This is an adaptation of the update-tables.sh script, included in the
+# syscalls-table project (https://github.com/hrw/syscalls-table) and released
+# under the MIT license.
+#
+# Author: Andrea Cervesato <andrea.cervesato@suse.com>
+
+if [ "$#" -eq "0" ]; then
+	echo "Please provide kernel sources:"
+	echo ""
+	echo "$0 path/to/Linux/kernel/sources"
+	echo ""
+	exit 1
+fi
+
+KERNELSRC="$1"
+
+# to keep sorting in order
+export LC_ALL=C
+
+if [ ! -d "${KERNELSRC}" ]; then
+	echo "${KERNELSRC} is not a directory"
+	exit 1
+fi
+
+if [ ! -e "${KERNELSRC}/Makefile" ]; then
+	echo "No Makefile in ${KERNELSRC} directory"
+	exit 1
+fi
+
+TEMP="$(mktemp -d)"
+KVER="$(make -C ${KERNELSRC} kernelversion -s)"
+
+SCRIPT_DIR="$(realpath $(dirname "$0"))"
+SUPPORTED_ARCH="${SCRIPT_DIR}/supported-arch.txt"
+LINUX_HEADERS="${TEMP}/headers"
+
+grab_syscall_names_from_tables() {
+	for tbl_file in $(find ${KERNELSRC}/arch -name syscall*.tbl); do
+		grep -E -v "(^#|^$|sys_ni_syscall)" $tbl_file |
+			awk '{ print $3 }' >>${TEMP}/syscall-names.tosort
+	done
+
+	drop_bad_entries
+}
+
+grab_syscall_names_from_unistd_h() {
+	grep -E -h "^#define __NR_" \
+		${LINUX_HEADERS}/usr/include/asm/unistd*.h \
+		${LINUX_HEADERS}/usr/include/asm-generic/unistd.h \
+		>${TEMP}/syscall-names.tosort
+
+	drop_bad_entries
+}
+
+drop_bad_entries() {
+	grep -E -v "(unistd.h|NR3264|__NR_syscall|__SC_COMP|__NR_.*Linux|__NR_FAST)" \
+		${TEMP}/syscall-names.tosort |
+		grep -E -v "(__SYSCALL|SYSCALL_BASE|SYSCALL_MASK)" |
+		sed -e "s/#define\s*__NR_//g" -e "s/\s.*//g" |
+		sort -u >${TEMP}/syscall-names.txt
+}
+
+generate_table() {
+	echo "- $arch"
+
+	if [ "$bits" -eq "32" ]; then
+		extraflags="${extraflags} -D__BITS_PER_LONG=32"
+	fi
+
+	local uppercase_arch=$(echo "$arch" | tr '[:lower:]' '[:upper:]')
+
+	# ignore any error generated by gcc. We want to obtain all the
+	# available architecture syscalls for the current platform and to handle
+	# only supported architectures later on
+	gcc ${TEMP}/list-syscalls.c -U__LP64__ -U__ILP32__ -U__i386__ \
+		-D${uppercase_arch} \
+		-D__${arch}__ ${extraflags} \
+		-I ${LINUX_HEADERS}/usr/include/ \
+		-o ${TEMP}/list-syscalls || true
+
+	${TEMP}/list-syscalls >"${TEMP}/${arch}.in.tosort"
+
+	sort -k2,2n "${TEMP}/${arch}.in.tosort" >"${TEMP}/${arch}.in"
+}
+
+generate_list_syscalls_c() {
+	(
+		printf "
+		#include <stdio.h>
+		#include <asm/unistd.h>
+
+		int main(void)
+		{
+		"
+		for syscall in $(cat ${TEMP}/syscall-names.txt); do
+			printf "
+		#ifdef __NR_$syscall
+			printf(\"$syscall %%d"
+			# i know the following print is ugly, but dash and bash
+			# treat double quoted strings in a different way and we
+			# really need to inject '\n' character in the C code
+			# rather than carriage return
+			printf '\\n'
+			printf "\", __NR_$syscall);
+		#endif
+		"
+		done
+		printf " return 0;
+		}"
+	) >${TEMP}/list-syscalls.c
+}
+
+export_headers() {
+	make -s -C ${KERNELSRC} ARCH=${arch} O=${LINUX_HEADERS} \
+		headers_install >/dev/null 2>&1
+}
+
+do_all_tables() {
+	for archdir in ${KERNELSRC}/arch/*; do
+		arch=$(basename $archdir)
+
+		bits=64
+		extraflags=
+
+		case ${arch} in
+		Kconfig)
+			continue
+			;;
+		um)
+			continue
+			;;
+		esac
+
+		export_headers
+		grab_syscall_names_from_unistd_h
+
+		case ${arch} in
+		arm)
+			bits=32
+			arch=armoabi extraflags= generate_table
+			arch=arm extraflags=-D__ARM_EABI__ generate_table
+			;;
+		loongarch)
+			# 32-bit variant of loongarch may appear
+			arch=loongarch64 extraflags=-D_LOONGARCH_SZLONG=64 generate_table
+			;;
+		mips)
+			arch=mips64 extraflags=-D_MIPS_SIM=_MIPS_SIM_ABI64 generate_table
+			bits=32
+			arch=mipso32 extraflags=-D_MIPS_SIM=_MIPS_SIM_ABI32 generate_table
+			arch=mips64n32 extraflags=-D_MIPS_SIM=_MIPS_SIM_NABI32 generate_table
+			;;
+		powerpc)
+			generate_table
+			arch=powerpc64 generate_table
+			;;
+		riscv)
+			arch=riscv64 extraflags=-D__LP64__ generate_table
+			bits=32
+			arch=riscv32 extraflags=-D__SIZEOF_POINTER__=4 generate_table
+			;;
+		s390)
+			bits=32
+			generate_table
+			bits=64
+			arch=s390x generate_table
+			;;
+		sparc)
+			bits=32
+			extraflags=-D__32bit_syscall_numbers__ generate_table
+			bits=64
+			arch=sparc64 extraflags=-D__arch64__ generate_table
+			;;
+		x86)
+			arch=x86_64 extraflags=-D__LP64__ generate_table
+			bits=32
+			arch=i386 generate_table
+			arch=x32 extraflags=-D__ILP32__ generate_table
+			;;
+		arc | csky | hexagon | m68k | microblaze | nios2 | openrisc | sh | xtensa)
+			bits=32 generate_table
+			;;
+		*)
+			generate_table
+			;;
+		esac
+	done
+}
+
+copy_supported_arch() {
+	while IFS= read -r arch; do
+		if [ -f "${TEMP}/${arch}.in" ]; then
+			echo "- ${arch}"
+			cp "${TEMP}/${arch}.in" "${SCRIPT_DIR}/${arch}.in"
+		fi
+	done <${SUPPORTED_ARCH}
+}
+
+echo "Temporary directory ${TEMP}"
+echo "Extracting syscalls"
+
+grab_syscall_names_from_tables
+generate_list_syscalls_c
+
+do_all_tables
+
+echo "Copying supported syscalls"
+copy_supported_arch