diff mbox series

[v2] contrib/mklog.py: Improve PR handling (was: Re: git gcc-commit-mklog doesn't extract PR number to ChangeLog)

Message ID 3c66efd4-eb5e-f2bb-6138-4126b5909c9c@codesourcery.com
State New
Headers show
Series [v2] contrib/mklog.py: Improve PR handling (was: Re: git gcc-commit-mklog doesn't extract PR number to ChangeLog) | expand

Commit Message

Tobias Burnus June 21, 2021, 7:54 a.m. UTC
On 17.06.21 02:17, Martin Sebor via Gcc wrote:
> @@ -147,6 +152,12 @@ def generate_changelog(data, no_functions=False, fill_pr_titles=False):
>
>           # Extract PR entries from newly added tests
>           if 'testsuite' in file.path and file.is_added_file:
> +            name = os.path.basename(file.path)
> +            name = os.path.splitext(name)[0]
> +            if name.startswith("pr"):
> +                name = name[2:]
> +                name = "PR " + name
> +                prs.append(name)

I think you need a regular expression to extract the PR – as it will both match
too much and to little. We have file names such as:
* libstdc++-pr91488.C (other prefix)
* PR37039.f90  (capitalized PR)
* pr98218-1.C (suffix with '-')
* pr40724_1.f (suffix with '_')
* pr101023a.C (suffix with a letter)

But otherwise, I like that idea.

  * * *

Changes in my patch compared to v1:
- (From Martin's patch:) Extract the PR from new-files file
   name (using pattern matching), but only take the PR if the
   PR wasn't found in the file as PR comment.
   (The latter happens, e.g., with b376b1ef389.)
- Avoid printing the same PR multiple times as summary line
   (duplicates occur due to 'PR 134' vs. 'PR comp/123' vs.
   'PR othercomp/123') — This does not avoid all issues but at least
   some. If this becomes a real world issue, we can try harder.

OK to commit this one? — Comments?

  * * *

I did leave out other changes as they seem to be less clear cut,
and which can be still be handled as follow up. Like:
- Adding 'Resolves:' (as in some cases it only resolves part of
   the PR)
- ... other changes/patches I missed. (This thread has too many
   emails.) In particular, if
   ^PR <comp>/<pr> - ....
   is accepted by gcc-commit/, then there is no need to list the
   PRs individually later on. But currently, it is still required.

  * * *

Cross ref:
* v1 of my patch was at
   https://gcc.gnu.org/pipermail/gcc/2021-June/236498.html
* Discussion of the -b option is at
   https://gcc.gnu.org/pipermail/gcc/2021-June/236519.html
* Martin S's patch (partially quoted above) is at
   https://gcc.gnu.org/pipermail/gcc/2021-June/236460.html

Tobias

-----------------
Mentor Graphics (Deutschland) GmbH, Arnulfstrasse 201, 80634 München Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Frank Thürauf

Comments

Martin Liška June 21, 2021, 8:09 a.m. UTC | #1
Hi.

I see the following warnings:

$ pytest test_mklog.py
FAILED test_mklog.py::TestMklog::test_sorting - AssertionError: assert '\n\tPR 50209...New test.\n\n' == 'gcc/ChangeLo...New test.\n\n'

$ flake8 mklog.py
mklog.py:187:23: Q000 Remove bad quotes

and ...

> contrib/mklog.py: Improve PR handling
> 
> Co-authored-by: Martin Sebor <msebor@redhat.com>
> 
> contrib/ChangeLog:
> 
> 	* mklog.py (bugzilla_url): Fetch also component.
> 	(pr_filename_regex): New.
> 	(get_pr_titles): Update PR string with correct format and component.
> 	(generate_changelog): Take additional PRs; extract PR from the
> 	filename.
> 	(__main__): Add -b/--pr-numbers argument.
> 
>  contrib/mklog.py | 41 ++++++++++++++++++++++++++++++++---------
>  1 file changed, 32 insertions(+), 9 deletions(-)
> 
> diff --git a/contrib/mklog.py b/contrib/mklog.py
> index 1f59055e723..bba6c1a0e1a 100755
> --- a/contrib/mklog.py
> +++ b/contrib/mklog.py
> @@ -42,6 +42,7 @@ pr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<pr>PR [a-z+-]+\/[0-9]+)')
>  prnum_regex = re.compile(r'PR (?P<comp>[a-z+-]+)/(?P<num>[0-9]+)')
>  dr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<dr>DR [0-9]+)')
>  dg_regex = re.compile(r'{\s+dg-(error|warning)')
> +pr_filename_regex = re.compile(r'(^|[\W_])[Pp][Rr](?P<pr>\d{4,})')
>  identifier_regex = re.compile(r'^([a-zA-Z0-9_#].*)')
>  comment_regex = re.compile(r'^\/\*')
>  struct_regex = re.compile(r'^(class|struct|union|enum)\s+'
> @@ -52,7 +53,7 @@ fn_regex = re.compile(r'([a-zA-Z_][^()\s]*)\s*\([^*]')
>  template_and_param_regex = re.compile(r'<[^<>]*>')
>  md_def_regex = re.compile(r'\(define.*\s+"(.*)"')
>  bugzilla_url = 'https://gcc.gnu.org/bugzilla/rest.cgi/bug?id=%s&' \
> -               'include_fields=summary'
> +               'include_fields=summary,component'
>  
>  function_extensions = {'.c', '.cpp', '.C', '.cc', '.h', '.inc', '.def', '.md'}
>  
> @@ -118,20 +119,23 @@ def sort_changelog_files(changed_file):
>  
>  
>  def get_pr_titles(prs):
> -    output = ''
> -    for pr in prs:
> +    output = []
> +    for idx, pr in enumerate(prs):
>          pr_id = pr.split('/')[-1]
>          r = requests.get(bugzilla_url % pr_id)
>          bugs = r.json()['bugs']
>          if len(bugs) == 1:
> -            output += '%s - %s\n' % (pr, bugs[0]['summary'])
> -            print(output)
> +            prs[idx] = 'PR %s/%s' % (bugs[0]['component'], pr_id)
> +            out = '%s - %s\n' % (prs[idx], bugs[0]['summary'])
> +            if out not in output:
> +                output.append(out)
>      if output:
> -        output += '\n'
> -    return output
> +        output.append('')
> +    return '\n'.join(output)
>  
>  
> -def generate_changelog(data, no_functions=False, fill_pr_titles=False):
> +def generate_changelog(data, no_functions=False, fill_pr_titles=False,
> +                       additional_prs=None):
>      changelogs = {}
>      changelog_list = []
>      prs = []
> @@ -139,6 +143,8 @@ def generate_changelog(data, no_functions=False, fill_pr_titles=False):
>      diff = PatchSet(data)
>      global firstpr
>  
> +    if additional_prs:
> +        prs = [pr for pr in additional_prs if pr not in prs]
>      for file in diff:
>          # skip files that can't be parsed
>          if file.path == '/dev/null':
> @@ -154,21 +160,33 @@ def generate_changelog(data, no_functions=False, fill_pr_titles=False):
>              # Only search first ten lines as later lines may
>              # contains commented code which a note that it
>              # has not been tested due to a certain PR or DR.
> +            this_file_prs = []
>              for line in list(file)[0][0:10]:
>                  m = pr_regex.search(line.value)
>                  if m:
>                      pr = m.group('pr')
>                      if pr not in prs:
>                          prs.append(pr)
> +                        this_file_prs.append(pr.split('/')[-1])
>                  else:
>                      m = dr_regex.search(line.value)
>                      if m:
>                          dr = m.group('dr')
>                          if dr not in prs:
>                              prs.append(dr)
> +                            this_file_prs.append(dr.split('/')[-1])
>                      elif dg_regex.search(line.value):
>                          # Found dg-warning/dg-error line
>                          break
> +            # PR number in the file name
> +            fname = os.path.basename(file.path)

This is a dead code.

> +            fname = os.path.splitext(fname)[0]
> +            m = pr_filename_regex.search(fname)
> +            if m:
> +                pr = m.group('pr')
> +                pr2 = "PR " + pr

Bad quotes here.

> +                if pr not in this_file_prs and pr2 not in prs:
> +                    prs.append(pr2)
>  
>      if prs:
>          firstpr = prs[0]
> @@ -286,6 +304,8 @@ if __name__ == '__main__':
>      parser = argparse.ArgumentParser(description=help_message)
>      parser.add_argument('input', nargs='?',
>                          help='Patch file (or missing, read standard input)')
> +    parser.add_argument('-b', '--pr-numbers', action='append',
> +                        help='Add the specified PRs (comma separated)')

Do we really want to support '-b 1 -b 2' and also -b '1,2' formats? Seems to me quite
complicated.

Cheers,
Martin

>      parser.add_argument('-s', '--no-functions', action='store_true',
>                          help='Do not generate function names in ChangeLogs')
>      parser.add_argument('-p', '--fill-up-bug-titles', action='store_true',
> @@ -308,8 +328,11 @@ if __name__ == '__main__':
>      if args.update_copyright:
>          update_copyright(data)
>      else:
> +        pr_numbers = args.pr_numbers
> +        if pr_numbers:
> +            pr_numbers = [b for i in args.pr_numbers for b in i.split(',')]
>          output = generate_changelog(data, args.no_functions,
> -                                    args.fill_up_bug_titles)
> +                                    args.fill_up_bug_titles, pr_numbers)
>          if args.changelog:
>              lines = open(args.changelog).read().split('\n')
>              start = list(takewhile(lambda l: not l.startswith('#'), lines))



On 6/21/21 9:54 AM, Tobias Burnus wrote:
> On 17.06.21 02:17, Martin Sebor via Gcc wrote:
>> @@ -147,6 +152,12 @@ def generate_changelog(data, no_functions=False, fill_pr_titles=False):
>>
>>           # Extract PR entries from newly added tests
>>           if 'testsuite' in file.path and file.is_added_file:
>> +            name = os.path.basename(file.path)
>> +            name = os.path.splitext(name)[0]
>> +            if name.startswith("pr"):
>> +                name = name[2:]
>> +                name = "PR " + name
>> +                prs.append(name)
> 
> I think you need a regular expression to extract the PR – as it will both match
> too much and to little. We have file names such as:
> * libstdc++-pr91488.C (other prefix)
> * PR37039.f90  (capitalized PR)
> * pr98218-1.C (suffix with '-')
> * pr40724_1.f (suffix with '_')
> * pr101023a.C (suffix with a letter)
> 
> But otherwise, I like that idea.
> 
>   * * *
> 
> Changes in my patch compared to v1:
> - (From Martin's patch:) Extract the PR from new-files file
>    name (using pattern matching), but only take the PR if the
>    PR wasn't found in the file as PR comment.
>    (The latter happens, e.g., with b376b1ef389.)
> - Avoid printing the same PR multiple times as summary line
>    (duplicates occur due to 'PR 134' vs. 'PR comp/123' vs.
>    'PR othercomp/123') — This does not avoid all issues but at least
>    some. If this becomes a real world issue, we can try harder.
> 
> OK to commit this one? — Comments?
> 
>   * * *
> 
> I did leave out other changes as they seem to be less clear cut,
> and which can be still be handled as follow up. Like:
> - Adding 'Resolves:' (as in some cases it only resolves part of
>    the PR)
> - ... other changes/patches I missed. (This thread has too many
>    emails.) In particular, if
>    ^PR <comp>/<pr> - ....
>    is accepted by gcc-commit/, then there is no need to list the
>    PRs individually later on. But currently, it is still required.
> 
>   * * *
> 
> Cross ref:
> * v1 of my patch was at
>    https://gcc.gnu.org/pipermail/gcc/2021-June/236498.html
> * Discussion of the -b option is at
>    https://gcc.gnu.org/pipermail/gcc/2021-June/236519.html
> * Martin S's patch (partially quoted above) is at
>    https://gcc.gnu.org/pipermail/gcc/2021-June/236460.html
> 
> Tobias
> 
> -----------------
> Mentor Graphics (Deutschland) GmbH, Arnulfstrasse 201, 80634 München Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Frank Thürauf
Tobias Burnus June 21, 2021, 8:37 a.m. UTC | #2
On 21.06.21 10:09, Martin Liška wrote:

> $ pytest test_mklog.py
> FAILED test_mklog.py::TestMklog::test_sorting - AssertionError: assert
> '\n\tPR 50209...New test.\n\n' == 'gcc/ChangeLo...New test.\n\n'
Aha, missed that there is indeed a testsuite - nice!
> $ flake8 mklog.py
> mklog.py:187:23: Q000 Remove bad quotes
I have now filled:
https://bugs.launchpad.net/ubuntu/+source/python-pytest-flake8/+bug/1933075

>> +            # PR number in the file name
>> +            fname = os.path.basename(file.path)
>
> This is a dead code.
>
>> + fname = os.path.splitext(fname)[0]
>> +            m = pr_filename_regex.search(fname)
It does not look like dead code to me.
>> + parser.add_argument('-b', '--pr-numbers', action='append',
>> +                        help='Add the specified PRs (comma separated)')
>
> Do we really want to support '-b 1 -b 2' and also -b '1,2' formats?
> Seems to me quite
> complicated.

I don't have a strong opinion. I started with '-b 123,245', believing
that the syntax is fine. But then I realized that without '-p'
specifying multiple '-b' looks better by having multiple '-b' if 'PR
<component>/'  (needed for -p as the string is than taken as is). Thus,
I ended up supporting either variant.

But I also happily drop the ',' support.

Change: One quote change, one test_mklog update.

Tobias

-----------------
Mentor Graphics (Deutschland) GmbH, Arnulfstrasse 201, 80634 München Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Frank Thürauf
Martin Liška June 21, 2021, 12:53 p.m. UTC | #3
On 6/21/21 10:37 AM, Tobias Burnus wrote:
> On 21.06.21 10:09, Martin Liška wrote:
> 
>> $ pytest test_mklog.py
>> FAILED test_mklog.py::TestMklog::test_sorting - AssertionError: assert
>> '\n\tPR 50209...New test.\n\n' == 'gcc/ChangeLo...New test.\n\n'
> Aha, missed that there is indeed a testsuite - nice!
>> $ flake8 mklog.py
>> mklog.py:187:23: Q000 Remove bad quotes
> I have now filled:
> https://bugs.launchpad.net/ubuntu/+source/python-pytest-flake8/+bug/1933075
> 
>>> +            # PR number in the file name
>>> +            fname = os.path.basename(file.path)
>>
>> This is a dead code.
>>
>>> + fname = os.path.splitext(fname)[0]
>>> +            m = pr_filename_regex.search(fname)
> It does not look like dead code to me.

Hello.

The code is weird as os.path.basename returns:

In [5]: os.path.basename('/tmp/a/b/c.txt')
Out[5]: 'c.txt'

why do you need os.path.splitext(fname) call?


>>> + parser.add_argument('-b', '--pr-numbers', action='append',
>>> +                        help='Add the specified PRs (comma separated)')
>>
>> Do we really want to support '-b 1 -b 2' and also -b '1,2' formats?
>> Seems to me quite
>> complicated.
> 
> I don't have a strong opinion. I started with '-b 123,245', believing
> that the syntax is fine. But then I realized that without '-p'
> specifying multiple '-b' looks better by having multiple '-b' if 'PR
> <component>/'  (needed for -p as the string is than taken as is). Thus,
> I ended up supporting either variant.

I would start with -b 1,2,3,4 syntax. It will be likely easier for git alias integration.

Martin

> 
> But I also happily drop the ',' support.
> 
> Change: One quote change, one test_mklog update.
> 
> Tobias
> 
> -----------------
> Mentor Graphics (Deutschland) GmbH, Arnulfstrasse 201, 80634 München Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Frank Thürauf
Tobias Burnus June 21, 2021, 1:26 p.m. UTC | #4
Now committed as r12-1700-gedf0c3ffb59d75c11e05bc722432dc554e275c72 / as
attached.

(We had some follow-up discussion on IRC.)

On 21.06.21 14:53, Martin Liška wrote:
>>>> +            # PR number in the file name
>>>> +            fname = os.path.basename(file.path)
>>>
>>> This is a dead code.
>>>
>>>> + fname = os.path.splitext(fname)[0]
>>>> +            m = pr_filename_regex.search(fname)
(Meant was the 'splitext' line - it is dead code as the re.search globs
all digits after 'pr' and then stops, ignoring the rest, including file
extensions. – I first thought it referred to the basename line, which
confused me.)
>>>> + parser.add_argument('-b', '--pr-numbers', action='append',
>>>> +                        help='Add the specified PRs (comma
>>>> separated)')
>>>
>>> Do we really want to support '-b 1 -b 2' and also -b '1,2' formats?
>>> Seems to me quite
>>> complicated.
> [...]
> I would start with -b 1,2,3,4 syntax. It will be likely easier for git
> alias integration.

Done so.

I note that argparse permits '-d dir1 -d dir2 -b bug1 -b bug2' which
then only keeps the dir2 as directory and bug2 as bug without printing
an error (or warning) for ignoring dir1 and bug1.

Tobias

-----------------
Mentor Graphics (Deutschland) GmbH, Arnulfstrasse 201, 80634 München Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Frank Thürauf
diff mbox series

Patch

contrib/mklog.py: Improve PR handling

Co-authored-by: Martin Sebor <msebor@redhat.com>

contrib/ChangeLog:

	* mklog.py (bugzilla_url): Fetch also component.
	(pr_filename_regex): New.
	(get_pr_titles): Update PR string with correct format and component.
	(generate_changelog): Take additional PRs; extract PR from the
	filename.
	(__main__): Add -b/--pr-numbers argument.

 contrib/mklog.py | 41 ++++++++++++++++++++++++++++++++---------
 1 file changed, 32 insertions(+), 9 deletions(-)

diff --git a/contrib/mklog.py b/contrib/mklog.py
index 1f59055e723..bba6c1a0e1a 100755
--- a/contrib/mklog.py
+++ b/contrib/mklog.py
@@ -42,6 +42,7 @@  pr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<pr>PR [a-z+-]+\/[0-9]+)')
 prnum_regex = re.compile(r'PR (?P<comp>[a-z+-]+)/(?P<num>[0-9]+)')
 dr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<dr>DR [0-9]+)')
 dg_regex = re.compile(r'{\s+dg-(error|warning)')
+pr_filename_regex = re.compile(r'(^|[\W_])[Pp][Rr](?P<pr>\d{4,})')
 identifier_regex = re.compile(r'^([a-zA-Z0-9_#].*)')
 comment_regex = re.compile(r'^\/\*')
 struct_regex = re.compile(r'^(class|struct|union|enum)\s+'
@@ -52,7 +53,7 @@  fn_regex = re.compile(r'([a-zA-Z_][^()\s]*)\s*\([^*]')
 template_and_param_regex = re.compile(r'<[^<>]*>')
 md_def_regex = re.compile(r'\(define.*\s+"(.*)"')
 bugzilla_url = 'https://gcc.gnu.org/bugzilla/rest.cgi/bug?id=%s&' \
-               'include_fields=summary'
+               'include_fields=summary,component'
 
 function_extensions = {'.c', '.cpp', '.C', '.cc', '.h', '.inc', '.def', '.md'}
 
@@ -118,20 +119,23 @@  def sort_changelog_files(changed_file):
 
 
 def get_pr_titles(prs):
-    output = ''
-    for pr in prs:
+    output = []
+    for idx, pr in enumerate(prs):
         pr_id = pr.split('/')[-1]
         r = requests.get(bugzilla_url % pr_id)
         bugs = r.json()['bugs']
         if len(bugs) == 1:
-            output += '%s - %s\n' % (pr, bugs[0]['summary'])
-            print(output)
+            prs[idx] = 'PR %s/%s' % (bugs[0]['component'], pr_id)
+            out = '%s - %s\n' % (prs[idx], bugs[0]['summary'])
+            if out not in output:
+                output.append(out)
     if output:
-        output += '\n'
-    return output
+        output.append('')
+    return '\n'.join(output)
 
 
-def generate_changelog(data, no_functions=False, fill_pr_titles=False):
+def generate_changelog(data, no_functions=False, fill_pr_titles=False,
+                       additional_prs=None):
     changelogs = {}
     changelog_list = []
     prs = []
@@ -139,6 +143,8 @@  def generate_changelog(data, no_functions=False, fill_pr_titles=False):
     diff = PatchSet(data)
     global firstpr
 
+    if additional_prs:
+        prs = [pr for pr in additional_prs if pr not in prs]
     for file in diff:
         # skip files that can't be parsed
         if file.path == '/dev/null':
@@ -154,21 +160,33 @@  def generate_changelog(data, no_functions=False, fill_pr_titles=False):
             # Only search first ten lines as later lines may
             # contains commented code which a note that it
             # has not been tested due to a certain PR or DR.
+            this_file_prs = []
             for line in list(file)[0][0:10]:
                 m = pr_regex.search(line.value)
                 if m:
                     pr = m.group('pr')
                     if pr not in prs:
                         prs.append(pr)
+                        this_file_prs.append(pr.split('/')[-1])
                 else:
                     m = dr_regex.search(line.value)
                     if m:
                         dr = m.group('dr')
                         if dr not in prs:
                             prs.append(dr)
+                            this_file_prs.append(dr.split('/')[-1])
                     elif dg_regex.search(line.value):
                         # Found dg-warning/dg-error line
                         break
+            # PR number in the file name
+            fname = os.path.basename(file.path)
+            fname = os.path.splitext(fname)[0]
+            m = pr_filename_regex.search(fname)
+            if m:
+                pr = m.group('pr')
+                pr2 = "PR " + pr
+                if pr not in this_file_prs and pr2 not in prs:
+                    prs.append(pr2)
 
     if prs:
         firstpr = prs[0]
@@ -286,6 +304,8 @@  if __name__ == '__main__':
     parser = argparse.ArgumentParser(description=help_message)
     parser.add_argument('input', nargs='?',
                         help='Patch file (or missing, read standard input)')
+    parser.add_argument('-b', '--pr-numbers', action='append',
+                        help='Add the specified PRs (comma separated)')
     parser.add_argument('-s', '--no-functions', action='store_true',
                         help='Do not generate function names in ChangeLogs')
     parser.add_argument('-p', '--fill-up-bug-titles', action='store_true',
@@ -308,8 +328,11 @@  if __name__ == '__main__':
     if args.update_copyright:
         update_copyright(data)
     else:
+        pr_numbers = args.pr_numbers
+        if pr_numbers:
+            pr_numbers = [b for i in args.pr_numbers for b in i.split(',')]
         output = generate_changelog(data, args.no_functions,
-                                    args.fill_up_bug_titles)
+                                    args.fill_up_bug_titles, pr_numbers)
         if args.changelog:
             lines = open(args.changelog).read().split('\n')
             start = list(takewhile(lambda l: not l.startswith('#'), lines))