Message ID | 20210831000822.1891027-2-siddhesh@sourceware.org |
---|---|
State | New |
Headers | show |
Series | Source attribution cleanups | expand |
On 8/30/21 8:08 PM, Siddhesh Poyarekar wrote: > Since the shared code now has special status with respect to > copyrights, port them into a more structured format in the source tree > and add a python function that parses and returns a dictionary with > the information. > > I need this to exclude these files from the Contributed-by changes and > I reckon it would be useful to know these files for future tooling. LGTM. Reviewed-by: Carlos O'Donell <carlos@redhat.com> > --- > SHARED-FILES | 207 +++++++++++++++++++++++++++++++++++ > scripts/glibc_shared_code.py | 70 ++++++++++++ > 2 files changed, 277 insertions(+) > create mode 100644 SHARED-FILES > create mode 100644 scripts/glibc_shared_code.py > > diff --git a/SHARED-FILES b/SHARED-FILES > new file mode 100644 > index 0000000000..38f189727c > --- /dev/null > +++ b/SHARED-FILES > @@ -0,0 +1,207 @@ > +# Files shared with other projects. Pass a file path to the > +# get_glibc_shared_code() function in the python library > +# scripts/glibc_shared_code.py to get a dict object with this information. See > +# the library sources for more information. > + > +# The headers on most of these files indicate that glibc is the canonical > +# source for these files, although in many cases there seem to be useful > +# changes in the gnulib versions that could be merged back in. Not all gnulib > +# files contain such a header and it is not always consistent in its format, so > +# it would be useful to make sure that all gnulib files that are using glibc as > +# upstream have a greppable header. > +# > +# These files are quite hard to find without a header to grep for and each file > +# has to be compared manually so this list is likely incomplete or may contain > +# errors. > +gnulib: > + argp/argp-ba.c > + argp/argp-ba.c > + argp/argp-eexst.c > + argp/argp-fmtstream.c > + argp/argp-fmtstream.h > + argp/argp-fs-xinl.c > + argp/argp-help.c > + argp/argp-namefrob.h > + argp/argp-parse.c > + argp/argp-pv.c > + argp/argp-pvh.c > + argp/argp-xinl.c > + argp/argp.h > + crypt/md5.c > + crypt/md5.h > + dirent/alphasort.c > + dirent/scandir.c > + locale/programs/3level.h > + # Merged from gnulib 2014-6-23 > + malloc/obstack.c > + # Merged from gnulib 2014-6-23 > + malloc/obstack.h > + # Merged from gnulib 2014-07-10 > + misc/error.c > + misc/error.h > + misc/getpass.c > + misc/mkdtemp.c > + posix/fnmatch_loop.c > + # Intended to be the same. Gnulib copy contains glibc changes. > + posix/getopt.c > + # Intended to be the same. Gnulib copy contains glibc changes. > + posix/getopt1.c > + # Intended to be the same. Gnulib copy contains glibc changes. > + posix/getopt_int.h > + posix/glob.c > + posix/regcomp.c > + posix/regex.c > + posix/regex.h > + posix/regex_internal.c > + posix/regex_internal.h > + posix/regexec.c > + posix/spawn.c > + posix/spawn_faction_addclose.c > + posix/spawn_faction_adddup2.c > + posix/spawn_faction_addopen.c > + posix/spawn_faction_destroy.c > + posix/spawn_faction_init.c > + posix/spawn_int.h > + posix/spawnattr_destroy.c > + posix/spawnattr_getdefault.c > + posix/spawnattr_getflags.c > + posix/spawnattr_getpgroup.c > + posix/spawnattr_getschedparam.c > + posix/spawnattr_getschedpolicy.c > + posix/spawnattr_getsigmask.c > + posix/spawnattr_init.c > + posix/spawnattr_setdefault.c > + posix/spawnattr_setflags.c > + posix/spawnattr_setpgroup.c > + posix/spawnattr_setschedparam.c > + posix/spawnattr_setschedpolicy.c > + posix/spawnattr_setsigmask.c > + posix/spawnp.c > + stdlib/atoll.c > + stdlib/getsubopt.c > + stdlib/setenv.c > + stdlib/strtoll.c > + stdlib/strtoul.c > + # Merged from gnulib 2014-6-26, needs merge back > + string/memchr.c > + string/memcmp.c > + string/memmem.c > + string/mempcpy.c > + string/memrchr.c > + string/rawmemchr.c > + string/stpcpy.c > + string/stpncpy.c > + string/str-two-way.h > + string/strcasestr.c > + string/strcspn.c > + string/strdup.c > + string/strndup.c > + string/strpbrk.c > + string/strsignal.c > + string/strstr.c > + string/strtok_r.c > + string/strverscmp.c > + sysdeps/generic/pty-private.h > + sysdeps/generic/siglist.h > + sysdeps/posix/euidaccess.c > + sysdeps/posix/gai_strerror.c > + sysdeps/posix/getcwd.c > + sysdeps/posix/pwrite.c > + sysdeps/posix/spawni.c > + # Merged from gnulib 2014-6-23 > + sysdeps/posix/tempname.c > + # Merged from gnulib 2014-6-27 > + time/mktime.c > + time/mktime-internal.h > + time/strptime.c > + time/timegm.c > + > +# The last merge was 2014-12-11 and merged gettext 0.19.3 into glibc with a > +# patch submitted to the gettext mailing list for changes that could be merged > +# back. > +# > +# This commit was omitted from the merge as it does not appear to be compatible > +# with how glibc expects things to work: > +# > +# commit 279b57fc367251666f00e8e2b599b83703451afb > +# Author: Bruno Haible <bruno@clisp.org> > +# Date: Fri Jun 14 12:03:49 2002 +0000 > +# > +# Make absolute pathnames inside $LANGUAGE work. > +gettext: > + intl/bindtextdom.c > + intl/dcgettext.c > + intl/dcigettext.c > + intl/dcngettext.c > + intl/dgettext.c > + intl/dngettext.c > + intl/explodename.c > + intl/finddomain.c > + intl/gettext.c > + intl/gettextP.h > + intl/gmo.h > + intl/hash-string.c > + intl/hash-string.h > + intl/l10nflist.c > + intl/loadinfo.h > + intl/loadmsgcat.c > + intl/locale.alias > + intl/localealias.c > + intl/ngettext.c > + intl/plural-exp.c > + intl/plural-exp.h > + intl/plural.y > + intl/textdomain.c > + > +# The following files are shared with the upstream Unicode project and must be > +# updated regularly to stay in sync with the upstream unicode releases. > +# > +# Merged from Unicode 13.0.0 release. > +unicode: > + localedata/unicode-gen/UnicodeData.txt > + localedata/unicode-gen/unicode-license.txt > + localedata/unicode-gen/DerivedCoreProperties.txt > + localedata/unicode-gen/EastAsianWidth.txt > + localedata/unicode-gen/PropList.txt > + > +# The following files are shared with the upstream tzcode project and must be > +# updated regularly to stay in sync with the upstream releases. > +# > +# Update from tzcode 2017b. > +# Latest is 2018g: > +# https://mm.icann.org/pipermail/tz-announce/2018-October/000052.html > +tzcode: > + timezone/private.h > + timezone/tzfile.h > + timezone/zdump.c > + timezone/zic.c > + timezone/tzselect.ksh > + > +# The following files are shared with the upstream tzdata project but is not > +# synchronized regularly. The data files themselves are used only for testing > +# purposes and their data is never used to generate any output. We synchronize > +# them only to stay on top of newer data that might help with testing. > +# > +# Currently synced to 2009i. Latest is 2018g. > +# https://mm.icann.org/pipermail/tz-announce/2018-October/000052.html > +tzdata: > + timezone/africa > + timezone/antarctica > + timezone/asia > + timezone/australasia > + timezone/europe > + timezone/northamerica > + timezone/southamerica > + timezone/pacificnew > + timezone/etcetera > + timezone/factory > + timezone/backward > + timezone/systemv > + timezone/solar87 > + timezone/solar88 > + timezone/solar89 > + timezone/iso3166.tab > + timezone/zone.tab > + timezone/leapseconds > + # This is yearistype.sh in the parent project > + timezone/yearistype > diff --git a/scripts/glibc_shared_code.py b/scripts/glibc_shared_code.py > new file mode 100644 > index 0000000000..873a26117f > --- /dev/null > +++ b/scripts/glibc_shared_code.py > @@ -0,0 +1,70 @@ > +#!/usr/bin/python > +# Copyright (C) 2021 Free Software Foundation, Inc. > +# This file is part of the GNU C Library. > +# > +# The GNU C Library is free software; you can redistribute it and/or > +# modify it under the terms of the GNU Lesser General Public > +# License as published by the Free Software Foundation; either > +# version 2.1 of the License, or (at your option) any later version. > +# > +# The GNU C Library is distributed in the hope that it will be useful, > +# but WITHOUT ANY WARRANTY; without even the implied warranty of > +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > +# Lesser General Public License for more details. > +# > +# You should have received a copy of the GNU Lesser General Public > +# License along with the GNU C Library; if not, see > +# <https://www.gnu.org/licenses/>. > + > +def get_glibc_shared_code(path): > + """ Get glibc shared code information from a file > + > + The input file must have project names in their own line ending with a colon > + and all shared files in the project on their own lines following the project > + name. Whitespaces are ignored. Lines with # as the first non-whitespace > + character are ignored. > + > + Args: > + path: The path to file containing shared code information. > + > + Returns: > + A dictionary with project names as key and lists of files as values. > + """ > + > + projects = {} > + with open(path, 'r') as f: > + for line in f.readlines(): > + line = line.strip() > + if len(line) == 0 or line[0] == '#': > + continue > + if line[-1] == ':': > + cur = line[:-1] > + projects[cur] = [] > + else: > + projects[cur].append(line) > + > + return projects > + > +# Function testing. > +import sys > +from os import EX_NOINPUT > +from os.path import exists > +from pprint import * > + > +if __name__ == '__main__': > + if len(sys.argv) != 2: > + print('Usage: %s <file name>' % sys.argv[0]) > + print('Run this script from the base glibc source directory') > + sys.exit(EX_NOINPUT) > + > + print('Testing get_glibc_shared_code with %s:\n' % sys.argv[1]) > + r = get_glibc_shared_code(sys.argv[1]) > + errors = False > + for k in r.keys(): > + for f in r[k]: > + if not exists(f): > + print('%s does not exist' % f) > + errors = True > + > + if not errors: > + pprint(r) >
diff --git a/SHARED-FILES b/SHARED-FILES new file mode 100644 index 0000000000..38f189727c --- /dev/null +++ b/SHARED-FILES @@ -0,0 +1,207 @@ +# Files shared with other projects. Pass a file path to the +# get_glibc_shared_code() function in the python library +# scripts/glibc_shared_code.py to get a dict object with this information. See +# the library sources for more information. + +# The headers on most of these files indicate that glibc is the canonical +# source for these files, although in many cases there seem to be useful +# changes in the gnulib versions that could be merged back in. Not all gnulib +# files contain such a header and it is not always consistent in its format, so +# it would be useful to make sure that all gnulib files that are using glibc as +# upstream have a greppable header. +# +# These files are quite hard to find without a header to grep for and each file +# has to be compared manually so this list is likely incomplete or may contain +# errors. +gnulib: + argp/argp-ba.c + argp/argp-ba.c + argp/argp-eexst.c + argp/argp-fmtstream.c + argp/argp-fmtstream.h + argp/argp-fs-xinl.c + argp/argp-help.c + argp/argp-namefrob.h + argp/argp-parse.c + argp/argp-pv.c + argp/argp-pvh.c + argp/argp-xinl.c + argp/argp.h + crypt/md5.c + crypt/md5.h + dirent/alphasort.c + dirent/scandir.c + locale/programs/3level.h + # Merged from gnulib 2014-6-23 + malloc/obstack.c + # Merged from gnulib 2014-6-23 + malloc/obstack.h + # Merged from gnulib 2014-07-10 + misc/error.c + misc/error.h + misc/getpass.c + misc/mkdtemp.c + posix/fnmatch_loop.c + # Intended to be the same. Gnulib copy contains glibc changes. + posix/getopt.c + # Intended to be the same. Gnulib copy contains glibc changes. + posix/getopt1.c + # Intended to be the same. Gnulib copy contains glibc changes. + posix/getopt_int.h + posix/glob.c + posix/regcomp.c + posix/regex.c + posix/regex.h + posix/regex_internal.c + posix/regex_internal.h + posix/regexec.c + posix/spawn.c + posix/spawn_faction_addclose.c + posix/spawn_faction_adddup2.c + posix/spawn_faction_addopen.c + posix/spawn_faction_destroy.c + posix/spawn_faction_init.c + posix/spawn_int.h + posix/spawnattr_destroy.c + posix/spawnattr_getdefault.c + posix/spawnattr_getflags.c + posix/spawnattr_getpgroup.c + posix/spawnattr_getschedparam.c + posix/spawnattr_getschedpolicy.c + posix/spawnattr_getsigmask.c + posix/spawnattr_init.c + posix/spawnattr_setdefault.c + posix/spawnattr_setflags.c + posix/spawnattr_setpgroup.c + posix/spawnattr_setschedparam.c + posix/spawnattr_setschedpolicy.c + posix/spawnattr_setsigmask.c + posix/spawnp.c + stdlib/atoll.c + stdlib/getsubopt.c + stdlib/setenv.c + stdlib/strtoll.c + stdlib/strtoul.c + # Merged from gnulib 2014-6-26, needs merge back + string/memchr.c + string/memcmp.c + string/memmem.c + string/mempcpy.c + string/memrchr.c + string/rawmemchr.c + string/stpcpy.c + string/stpncpy.c + string/str-two-way.h + string/strcasestr.c + string/strcspn.c + string/strdup.c + string/strndup.c + string/strpbrk.c + string/strsignal.c + string/strstr.c + string/strtok_r.c + string/strverscmp.c + sysdeps/generic/pty-private.h + sysdeps/generic/siglist.h + sysdeps/posix/euidaccess.c + sysdeps/posix/gai_strerror.c + sysdeps/posix/getcwd.c + sysdeps/posix/pwrite.c + sysdeps/posix/spawni.c + # Merged from gnulib 2014-6-23 + sysdeps/posix/tempname.c + # Merged from gnulib 2014-6-27 + time/mktime.c + time/mktime-internal.h + time/strptime.c + time/timegm.c + +# The last merge was 2014-12-11 and merged gettext 0.19.3 into glibc with a +# patch submitted to the gettext mailing list for changes that could be merged +# back. +# +# This commit was omitted from the merge as it does not appear to be compatible +# with how glibc expects things to work: +# +# commit 279b57fc367251666f00e8e2b599b83703451afb +# Author: Bruno Haible <bruno@clisp.org> +# Date: Fri Jun 14 12:03:49 2002 +0000 +# +# Make absolute pathnames inside $LANGUAGE work. +gettext: + intl/bindtextdom.c + intl/dcgettext.c + intl/dcigettext.c + intl/dcngettext.c + intl/dgettext.c + intl/dngettext.c + intl/explodename.c + intl/finddomain.c + intl/gettext.c + intl/gettextP.h + intl/gmo.h + intl/hash-string.c + intl/hash-string.h + intl/l10nflist.c + intl/loadinfo.h + intl/loadmsgcat.c + intl/locale.alias + intl/localealias.c + intl/ngettext.c + intl/plural-exp.c + intl/plural-exp.h + intl/plural.y + intl/textdomain.c + +# The following files are shared with the upstream Unicode project and must be +# updated regularly to stay in sync with the upstream unicode releases. +# +# Merged from Unicode 13.0.0 release. +unicode: + localedata/unicode-gen/UnicodeData.txt + localedata/unicode-gen/unicode-license.txt + localedata/unicode-gen/DerivedCoreProperties.txt + localedata/unicode-gen/EastAsianWidth.txt + localedata/unicode-gen/PropList.txt + +# The following files are shared with the upstream tzcode project and must be +# updated regularly to stay in sync with the upstream releases. +# +# Update from tzcode 2017b. +# Latest is 2018g: +# https://mm.icann.org/pipermail/tz-announce/2018-October/000052.html +tzcode: + timezone/private.h + timezone/tzfile.h + timezone/zdump.c + timezone/zic.c + timezone/tzselect.ksh + +# The following files are shared with the upstream tzdata project but is not +# synchronized regularly. The data files themselves are used only for testing +# purposes and their data is never used to generate any output. We synchronize +# them only to stay on top of newer data that might help with testing. +# +# Currently synced to 2009i. Latest is 2018g. +# https://mm.icann.org/pipermail/tz-announce/2018-October/000052.html +tzdata: + timezone/africa + timezone/antarctica + timezone/asia + timezone/australasia + timezone/europe + timezone/northamerica + timezone/southamerica + timezone/pacificnew + timezone/etcetera + timezone/factory + timezone/backward + timezone/systemv + timezone/solar87 + timezone/solar88 + timezone/solar89 + timezone/iso3166.tab + timezone/zone.tab + timezone/leapseconds + # This is yearistype.sh in the parent project + timezone/yearistype diff --git a/scripts/glibc_shared_code.py b/scripts/glibc_shared_code.py new file mode 100644 index 0000000000..873a26117f --- /dev/null +++ b/scripts/glibc_shared_code.py @@ -0,0 +1,70 @@ +#!/usr/bin/python +# Copyright (C) 2021 Free Software Foundation, Inc. +# This file is part of the GNU C Library. +# +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library; if not, see +# <https://www.gnu.org/licenses/>. + +def get_glibc_shared_code(path): + """ Get glibc shared code information from a file + + The input file must have project names in their own line ending with a colon + and all shared files in the project on their own lines following the project + name. Whitespaces are ignored. Lines with # as the first non-whitespace + character are ignored. + + Args: + path: The path to file containing shared code information. + + Returns: + A dictionary with project names as key and lists of files as values. + """ + + projects = {} + with open(path, 'r') as f: + for line in f.readlines(): + line = line.strip() + if len(line) == 0 or line[0] == '#': + continue + if line[-1] == ':': + cur = line[:-1] + projects[cur] = [] + else: + projects[cur].append(line) + + return projects + +# Function testing. +import sys +from os import EX_NOINPUT +from os.path import exists +from pprint import * + +if __name__ == '__main__': + if len(sys.argv) != 2: + print('Usage: %s <file name>' % sys.argv[0]) + print('Run this script from the base glibc source directory') + sys.exit(EX_NOINPUT) + + print('Testing get_glibc_shared_code with %s:\n' % sys.argv[1]) + r = get_glibc_shared_code(sys.argv[1]) + errors = False + for k in r.keys(): + for f in r[k]: + if not exists(f): + print('%s does not exist' % f) + errors = True + + if not errors: + pprint(r)