diff mbox series

[gitdm,1/1] Convert to Python 3

Message ID 20230607055754.51065-1-heinrich.schuchardt@canonical.com
State Deferred
Delegated to: Tom Rini
Headers show
Series [gitdm,1/1] Convert to Python 3 | expand

Commit Message

Heinrich Schuchardt June 7, 2023, 5:57 a.m. UTC
This is the result of running 2to3 and manually converting the comparison
functions to lambda expressions.

Signed-off-by: Heinrich Schuchardt <heinrich.schuchardt@canonical.com>
---
 ConfigFile.py |   4 +-
 csvdump.py    |   4 +-
 database.py   |  22 +++++------
 gitdm         |  27 ++++++-------
 gitlog.py     |   8 ++--
 logparser.py  |   6 +--
 reports.py    | 106 +++++++++++++++-----------------------------------
 utils.py      |   4 +-
 8 files changed, 70 insertions(+), 111 deletions(-)

Comments

Heinrich Schuchardt June 7, 2023, 6:04 a.m. UTC | #1
> This is the result of running 2to3 and manually converting the comparison
> functions to lambda expressions.
> 
> Signed-off-by: Heinrich Schuchardt <heinrich.schuchardt@canonical.com>

Hello Tom,

which is the branch that you use for the gitdm U-Boot stats? Is it 
master or u-boot? If it is u-boot, please, update gitlab to make u-boot 
the main branch.

The current patch applies to the master branch.

Best regards

Heinrich

> ---
>   ConfigFile.py |   4 +-
>   csvdump.py    |   4 +-
>   database.py   |  22 +++++------
>   gitdm         |  27 ++++++-------
>   gitlog.py     |   8 ++--
>   logparser.py  |   6 +--
>   reports.py    | 106 +++++++++++++++-----------------------------------
>   utils.py      |   4 +-
>   8 files changed, 70 insertions(+), 111 deletions(-)
> 
> diff --git a/ConfigFile.py b/ConfigFile.py
> index 3a1e208..e58c601 100644
> --- a/ConfigFile.py
> +++ b/ConfigFile.py
> @@ -147,10 +147,10 @@ def ReadFileType (filename):
>           m = regex_file_type.match (line)
>           if not m or len (m.groups ()) != 2:
>               ConfigFile.croak ('Funky file type line "%s"' % (line))
> -        if not patterns.has_key (m.group (1)):
> +        if m.group (1) not in patterns:
>               patterns[m.group (1)] = []
>           if m.group (1) not in order:
> -            print '%s not found, appended to the last order' % m.group (1)
> +            print('%s not found, appended to the last order' % m.group (1))
>               order.append (m.group (1))
>   
>           patterns[m.group (1)].append (re.compile (m.group (2), re.IGNORECASE))
> diff --git a/csvdump.py b/csvdump.py
> index 9d1a65e..55e38a0 100644
> --- a/csvdump.py
> +++ b/csvdump.py
> @@ -50,7 +50,7 @@ def store_patch(patch):
>           ChangeSets.append([patch.commit, str(patch.date),
>                              patch.email, domain, author, employer,
>                              patch.added, patch.removed])
> -        for (filetype, (added, removed)) in patch.filetypes.iteritems():
> +        for (filetype, (added, removed)) in patch.filetypes.items():
>               FileTypes.append([patch.commit, filetype, added, removed])
>   
>   
> @@ -82,7 +82,7 @@ def OutputCSV (file):
>       writer = csv.writer (file, quoting=csv.QUOTE_NONNUMERIC)
>       writer.writerow (['Name', 'Email', 'Affliation', 'Date',
>                         'Added', 'Removed', 'Changesets'])
> -    for date, stat in PeriodCommitHash.items():
> +    for date, stat in list(PeriodCommitHash.items()):
>           # sanitise names " is common and \" sometimes too
>           empl_name = stat.employer.name.replace ('"', '.').replace ('\\', '.')
>           author_name = stat.name.replace ('"', '.').replace ('\\', '.')
> diff --git a/database.py b/database.py
> index bf13227..b267ed5 100644
> --- a/database.py
> +++ b/database.py
> @@ -40,7 +40,7 @@ class Hacker:
>                   for edate, empl in self.employer[i]:
>                       if edate > date:
>                           return empl
> -        print 'OOPS.  ', self.name, self.employer, self.email, email, date
> +        print('OOPS.  ', self.name, self.employer, self.email, email, date)
>           return None # Should not happen
>   
>       def addpatch (self, patch):
> @@ -124,11 +124,11 @@ def LookupStoreHacker(name, email, mapunknown = True):
>   
>   
>   def AllHackers ():
> -    return HackersByID.values ()
> +    return list(HackersByID.values ())
>   
>   def DumpDB ():
>       out = open ('database.dump', 'w')
> -    names = HackersByName.keys ()
> +    names = list(HackersByName.keys ())
>       names.sort ()
>       for name in names:
>           h = HackersByName[name]
> @@ -149,7 +149,7 @@ def DumpDB ():
>   # push it backward through the changes we've already seen.
>   #
>   def ApplyFirstTag (tag):
> -    for n in HackersByName.keys ():
> +    for n in list(HackersByName.keys ()):
>           if HackersByName[n].versions:
>               HackersByName[n].versions = [tag]
>   
> @@ -185,7 +185,7 @@ def GetEmployer (name):
>           return e
>   
>   def AllEmployers ():
> -    return Employers.values ()
> +    return list(Employers.values ())
>   
>   #
>   # Certain obnoxious developers, who will remain nameless (because we
> @@ -215,8 +215,8 @@ class VirtualEmployer (Employer):
>           self.__init__ (name) # Reset counts just in case
>   
>       def store (self):
> -        if Employers.has_key (self.name):
> -            print Employers[self.name]
> +        if self.name in Employers:
> +            print(Employers[self.name])
>               sys.stderr.write ('WARNING: Virtual empl %s overwrites another\n'
>                                 % (self.name))
>           if len (self.splits) == 0:
> @@ -235,7 +235,7 @@ class FileType:
>           order = order or self.order
>   
>           for file_type in order:
> -            if patterns.has_key (file_type):
> +            if file_type in patterns:
>                   for patt in patterns[file_type]:
>                       if patt.search (filename):
>                           return file_type
> @@ -261,7 +261,7 @@ def MixVirtuals ():
>   EmailAliases = { }
>   
>   def AddEmailAlias (variant, canonical):
> -    if EmailAliases.has_key (variant):
> +    if variant in EmailAliases:
>           sys.stderr.write ('Duplicate email alias for %s\n' % (variant))
>       EmailAliases[variant] = canonical
>   
> @@ -288,7 +288,7 @@ def AddEmailEmployerMapping (email, employer, end = nextyear):
>           for i in range (0, len(l)):
>               date, xempl = l[i]
>               if date == end:  # probably both nextyear
> -                print 'WARNING: duplicate email/empl for %s' % (email)
> +                print('WARNING: duplicate email/empl for %s' % (email))
>               if date > end:
>                   l.insert (i, (end, empl))
>                   return
> @@ -305,7 +305,7 @@ def MapToEmployer (email, unknown = 0):
>           pass
>       namedom = email.split ('@')
>       if len (namedom) < 2:
> -        print 'Oops...funky email %s' % email
> +        print('Oops...funky email %s' % email)
>           return [(nextyear, GetEmployer ('Funky'))]
>       s = namedom[1].split ('.')
>       for dots in range (len (s) - 2, -1, -1):
> diff --git a/gitdm b/gitdm
> index 61318ad..f426cc7 100755
> --- a/gitdm
> +++ b/gitdm
> @@ -1,4 +1,4 @@
> -#!/usr/bin/pypy
> +#!/usr/bin/python3
>   #-*- coding:utf-8 -*-
>   #
>   
> @@ -15,7 +15,8 @@
>   
>   import database, csvdump, ConfigFile, reports
>   import getopt, datetime
> -import os, re, sys, rfc822, string, os.path
> +from email.utils import parsedate_tz
> +import os, re, sys, string, os.path
>   import logparser
>   from patterns import patterns
>   
> @@ -108,7 +109,7 @@ def ParseOpts():
>           elif opt[0] == '-p':
>               CSVPrefix = opt[1]
>           elif opt[0] == '-r':
> -            print 'Filter on "%s"' % (opt[1])
> +            print('Filter on "%s"' % (opt[1]))
>               FileFilter = re.compile(opt[1])
>           elif opt[0] == '-s':
>               AuthorSOBs = 0
> @@ -120,7 +121,7 @@ def ParseOpts():
>               ReportUnknowns = True
>           elif opt[0] == '-x':
>               CSVFile = open(opt[1], 'w')
> -            print "open output file " + opt[1] + "\n"
> +            print("open output file " + opt[1] + "\n")
>           elif opt [0] == '-w':
>               Aggregate = 'week'
>           elif opt [0] == '-y':
> @@ -172,7 +173,7 @@ DateMap = { }
>   
>   def AddDateLines(date, lines):
>       if lines > 1000000:
> -        print 'Skip big patch (%d)' % lines
> +        print('Skip big patch (%d)' % lines)
>           return
>       try:
>           DateMap[date] += lines
> @@ -180,7 +181,7 @@ def AddDateLines(date, lines):
>           DateMap[date] = lines
>   
>   def PrintDateStats():
> -    dates = DateMap.keys()
> +    dates = list(DateMap.keys())
>       dates.sort()
>       total = 0
>       datef = open('datelc.csv', 'w')
> @@ -195,7 +196,7 @@ def PrintDateStats():
>   # Let's slowly try to move some smarts into this class.
>   #
>   class patch:
> -    (ADDED, REMOVED) = range(2)
> +    (ADDED, REMOVED) = list(range(2))
>   
>       def __init__(self, commit):
>           self.commit = commit
> @@ -219,7 +220,7 @@ class patch:
>           self.reports.append(reporter)
>   
>       def addfiletype(self, filetype, added, removed):
> -        if self.filetypes.has_key(filetype):
> +        if filetype in self.filetypes:
>               self.filetypes[filetype][self.ADDED] += added
>               self.filetypes[filetype][self.REMOVED] += removed
>           else:
> @@ -330,7 +331,7 @@ def grabpatch(logpatch):
>           #
>           m = patterns['date'].match(Line)
>           if m:
> -            dt = rfc822.parsedate(m.group(2))
> +            dt = parsedate_tz(m.group(2))
>               p.date = datetime.date(dt[0], dt[1], dt[2])
>               if p.date > Today:
>                   sys.stderr.write('Funky date: %s\n' % p.date)
> @@ -389,7 +390,7 @@ def GripeAboutAuthorName(name):
>       if name in GripedAuthorNames:
>           return
>       GripedAuthorNames.append(name)
> -    print '%s is an author name, probably not what you want' % (name)
> +    print('%s is an author name, probably not what you want' % (name))
>   
>   def ApplyFileFilter(line, ignore):
>       #
> @@ -462,14 +463,14 @@ TotalChanged = TotalAdded = TotalRemoved = 0
>   #
>   # Snarf changesets.
>   #
> -print >> sys.stderr, 'Grabbing changesets...\r',
> +print('Grabbing changesets...\r', end=' ', file=sys.stderr)
>   
>   patches = logparser.LogPatchSplitter(sys.stdin)
>   printcount = CSCount = 0
>   
>   for logpatch in patches:
>       if (printcount % 50) == 0:
> -        print >> sys.stderr, 'Grabbing changesets...%d\r' % printcount,
> +        print('Grabbing changesets...%d\r' % printcount, end=' ', file=sys.stderr)
>       printcount += 1
>   
>       # We want to ignore commits on svn tags since in Subversion
> @@ -528,7 +529,7 @@ for logpatch in patches:
>           CSCount += 1
>       csvdump.AccumulatePatch(p, Aggregate)
>       csvdump.store_patch(p)
> -print >> sys.stderr, 'Grabbing changesets...done       '
> +print('Grabbing changesets...done       ', file=sys.stderr)
>   
>   if DumpDB:
>       database.DumpDB()
> diff --git a/gitlog.py b/gitlog.py
> index 71efee1..4b1c5d6 100644
> --- a/gitlog.py
> +++ b/gitlog.py
> @@ -61,7 +61,7 @@ S_DONE = 5
>   def get_header(patch, line, input):
>       if line == '':
>           if patch.author == '':
> -            print 'Funky auth line in', patch.commit
> +            print('Funky auth line in', patch.commit)
>               patch.author = database.LookupStoreHacker('Unknown',
>                                                         'unknown@hacker.net')
>           return S_DESC
> @@ -78,7 +78,7 @@ def get_header(patch, line, input):
>   
>   def get_desc(patch, line, input):
>       if not line:
> -        print 'Missing desc in', patch.commit
> +        print('Missing desc in', patch.commit)
>           return S_CHANGELOG
>       patch.desc = line
>       line = getline(input)
> @@ -188,7 +188,7 @@ def grabpatch(input):
>           return None
>       m = patterns['commit'].match(line)
>       if not m:
> -        print 'noncommit', line
> +        print('noncommit', line)
>           return None
>       p = patch(m.group(1))
>       state = S_HEADER
> @@ -199,7 +199,7 @@ def grabpatch(input):
>           line = getline(input)
>           if line is None:
>               if state != S_NUMSTAT:
> -                print 'Ran out of patch', state
> +                print('Ran out of patch', state)
>                   return None
>               return p
>           state = grabbers[state](p, line, input)
> diff --git a/logparser.py b/logparser.py
> index b375034..88293c5 100644
> --- a/logparser.py
> +++ b/logparser.py
> @@ -41,7 +41,7 @@ class LogPatchSplitter:
>       def __iter__(self):
>           return self
>   
> -    def next(self):
> +    def __next__(self):
>           patch = self.__grab_patch__()
>           if not patch:
>               raise StopIteration
> @@ -85,6 +85,6 @@ if __name__ == '__main__':
>       patches = LogPatchSplitter(sys.stdin)
>   
>       for patch in patches:
> -        print '---------- NEW PATCH ----------'
> +        print('---------- NEW PATCH ----------')
>           for line in patch:
> -            print line,
> +            print(line, end=' ')
> diff --git a/reports.py b/reports.py
> index d7a96bc..3e03e69 100644
> --- a/reports.py
> +++ b/reports.py
> @@ -69,11 +69,8 @@ def EndReport():
>   #
>   # Comparison and report generation functions.
>   #
> -def ComparePCount(h1, h2):
> -    return len(h2.patches) - len(h1.patches)
> -
>   def ReportByPCount(hlist, cscount):
> -    hlist.sort(ComparePCount)
> +    hlist.sort(key=lambda h: -len(h.patches))
>       count = 0
>       BeginReport('Developers with the most changesets')
>       for h in hlist:
> @@ -87,11 +84,8 @@ def ReportByPCount(hlist, cscount):
>               break
>       EndReport()
>               
> -def CompareLChanged(h1, h2):
> -    return h2.changed - h1.changed
> -
>   def ReportByLChanged(hlist, totalchanged):
> -    hlist.sort(CompareLChanged)
> +    hlist.sort(key=lambda h: -h.changed)
>       count = 0
>       BeginReport('Developers with the most changed lines')
>       for h in hlist:
> @@ -103,11 +97,8 @@ def ReportByLChanged(hlist, totalchanged):
>               break
>       EndReport()
>               
> -def CompareLRemoved(h1, h2):
> -    return (h2.removed - h2.added) - (h1.removed - h1.added)
> -
>   def ReportByLRemoved(hlist, totalremoved):
> -    hlist.sort(CompareLRemoved)
> +    hlist.sort(key=lambda h: h.added - h.removed)
>       count = 0
>       BeginReport('Developers with the most lines removed')
>       for h in hlist:
> @@ -121,11 +112,8 @@ def ReportByLRemoved(hlist, totalremoved):
>               break
>       EndReport()
>   
> -def CompareEPCount(e1, e2):
> -    return e2.count - e1.count
> -
>   def ReportByPCEmpl(elist, cscount):
> -    elist.sort(CompareEPCount)
> +    elist.sort(key=lambda e: -e.count)
>       count = 0
>       BeginReport('Top changeset contributors by employer')
>       for e in elist:
> @@ -137,11 +125,8 @@ def ReportByPCEmpl(elist, cscount):
>       EndReport()
>   
>   
> -def CompareELChanged(e1, e2):
> -    return e2.changed - e1.changed
> -
>   def ReportByELChanged(elist, totalchanged):
> -    elist.sort(CompareELChanged)
> +    elist.sort(key=lambda e: -e.changed)
>       count = 0
>       BeginReport('Top lines changed by employer')
>       for e in elist:
> @@ -154,11 +139,8 @@ def ReportByELChanged(elist, totalchanged):
>   
>   
>   
> -def CompareSOBs(h1, h2):
> -    return len(h2.signoffs) - len(h1.signoffs)
> -
>   def ReportBySOBs(hlist):
> -    hlist.sort(CompareSOBs)
> +    hlist.sort(key = lambda h: -len(h.signoffs))
>       totalsobs = 0
>       for h in hlist:
>           totalsobs += len(h.signoffs)
> @@ -176,11 +158,8 @@ def ReportBySOBs(hlist):
>   #
>   # Reviewer reporting.
>   #
> -def CompareRevs(h1, h2):
> -    return len(h2.reviews) - len(h1.reviews)
> -
>   def ReportByRevs(hlist):
> -    hlist.sort(CompareRevs)
> +    hlist.sort(key=lambda h: -len(h.reviews))
>       totalrevs = 0
>       for h in hlist:
>           totalrevs += len(h.reviews)
> @@ -198,11 +177,8 @@ def ReportByRevs(hlist):
>   #
>   # tester reporting.
>   #
> -def CompareTests(h1, h2):
> -    return len(h2.tested) - len(h1.tested)
> -
>   def ReportByTests(hlist):
> -    hlist.sort(CompareTests)
> +    hlist.sort(key=lambda h: -len(h.tested))
>       totaltests = 0
>       for h in hlist:
>           totaltests += len(h.tested)
> @@ -217,11 +193,8 @@ def ReportByTests(hlist):
>               break
>       EndReport()
>   
> -def CompareTestCred(h1, h2):
> -    return h2.testcred - h1.testcred
> -
>   def ReportByTestCreds(hlist):
> -    hlist.sort(CompareTestCred)
> +    hlist.sort(key=lambda h: -h.testcred)
>       totaltests = 0
>       for h in hlist:
>           totaltests += h.testcred
> @@ -240,11 +213,8 @@ def ReportByTestCreds(hlist):
>   #
>   # Reporter reporting.
>   #
> -def CompareReports(h1, h2):
> -    return len(h2.reports) - len(h1.reports)
> -
>   def ReportByReports(hlist):
> -    hlist.sort(CompareReports)
> +    hlist.sort(key=lambda h: -len(h.reports))
>       totalreps = 0
>       for h in hlist:
>           totalreps += len(h.reports)
> @@ -259,11 +229,8 @@ def ReportByReports(hlist):
>               break
>       EndReport()
>   
> -def CompareRepCred(h1, h2):
> -    return h2.repcred - h1.repcred
> -
>   def ReportByRepCreds(hlist):
> -    hlist.sort(CompareRepCred)
> +    hlist.sort(key=lambda h: -h.repcred)
>       totalreps = 0
>       for h in hlist:
>           totalreps += h.repcred
> @@ -280,14 +247,11 @@ def ReportByRepCreds(hlist):
>   #
>   # Versions.
>   #
> -def CompareVersionCounts(h1, h2):
> -    if h1.versions and h2.versions:
> -        return len(h2.versions) - len(h1.versions)
> -    if h2.versions:
> -        return 1
> -    if h1.versions:
> -        return -1
> -    return 0
> +def VersionCount(h):
> +    if h.versions:
> +        return len(h.versions)
> +    else:
> +        return 0
>   
>   def MissedVersions(hv, allv):
>       missed = [v for v in allv if v not in hv]
> @@ -295,7 +259,7 @@ def MissedVersions(hv, allv):
>       return ' '.join(missed)
>   
>   def ReportVersions(hlist):
> -    hlist.sort(CompareVersionCounts)
> +    hlist.sort(key=lambda h: -VersionCount(h))
>       BeginReport('Developers represented in the most kernel versions')
>       count = 0
>       allversions = hlist[0].versions
> @@ -307,11 +271,8 @@ def ReportVersions(hlist):
>       EndReport()
>   
>   
> -def CompareESOBs(e1, e2):
> -    return e2.sobs - e1.sobs
> -
>   def ReportByESOBs(elist):
> -    elist.sort(CompareESOBs)
> +    elist.sort(key=lambda e: -e.sobs)
>       totalsobs = 0
>       for e in elist:
>           totalsobs += e.sobs
> @@ -325,11 +286,8 @@ def ReportByESOBs(elist):
>               break
>       EndReport()
>      
> -def CompareHackers(e1, e2):
> -    return len(e2.hackers) - len(e1.hackers)
> -
>   def ReportByEHackers(elist):
> -    elist.sort(CompareHackers)
> +    elist.sort(key=lambda e: -len(e.hackers))
>       totalhackers = 0
>       for e in elist:
>           totalhackers += len(e.hackers)
> @@ -375,7 +333,7 @@ def ReportUnknowns(hlist, cscount):
>       # mapping to (Unknown) is happening or not.
>       #
>       ulist = [ h for h in hlist if IsUnknown(h) ]
> -    ulist.sort(ComparePCount)
> +    ulist.sort(lambda h: len(h.patches))
>       count = 0
>       BeginReport('Developers with unknown affiliation')
>       for h in ulist:
> @@ -398,46 +356,46 @@ def ReportByFileType(hacker_list):
>           by_hacker = {}
>           for patch in h.patches:
>               # Get a summary by hacker
> -            for (filetype, (added, removed)) in patch.filetypes.iteritems():
> -                if by_hacker.has_key(filetype):
> +            for (filetype, (added, removed)) in patch.filetypes.items():
> +                if filetype in by_hacker:
>                       by_hacker[filetype][patch.ADDED] += added
>                       by_hacker[filetype][patch.REMOVED] += removed
>                   else:
>                       by_hacker[filetype] = [added, removed]
>   
>                   # Update the totals
> -                if total.has_key(filetype):
> +                if filetype in total:
>                       total[filetype][patch.ADDED] += added
>                       total[filetype][patch.REMOVED] += removed
>                   else:
>                       total[filetype] = [added, removed, []]
>   
>           # Print a summary by hacker
> -        print h.name
> -        for filetype, counters in by_hacker.iteritems():
> -            print '\t', filetype, counters
> +        print(h.name)
> +        for filetype, counters in by_hacker.items():
> +            print('\t', filetype, counters)
>               h_added = by_hacker[filetype][patch.ADDED]
>               h_removed = by_hacker[filetype][patch.REMOVED]
>               total[filetype][2].append([h.name, h_added, h_removed])
>   
>       # Print the global summary
>       BeginReport('Contributions by type and developers')
> -    for filetype, (added, removed, hackers) in total.iteritems():
> -        print filetype, added, removed
> +    for filetype, (added, removed, hackers) in total.items():
> +        print(filetype, added, removed)
>           for h, h_added, h_removed in hackers:
> -            print '\t%s: [%d, %d]' % (h, h_added, h_removed)
> +            print('\t%s: [%d, %d]' % (h, h_added, h_removed))
>   
>       # Print the very global summary
>       BeginReport('General contributions by type')
> -    for filetype, (added, removed, hackers) in total.iteritems():
> -        print filetype, added, removed
> +    for filetype, (added, removed, hackers) in total.items():
> +        print(filetype, added, removed)
>   
>   #
>   # The file access report is a special beast.
>   #
>   def FileAccessReport(name, accesses, total):
>       outf = open(name, 'w')
> -    files = accesses.keys()
> +    files = list(accesses.keys())
>       files.sort()
>       for file in files:
>           a = accesses[file]
> diff --git a/utils.py b/utils.py
> index 2b3be5d..9f17911 100644
> --- a/utils.py
> +++ b/utils.py
> @@ -21,7 +21,7 @@ class accumulator:
>               return default
>   
>       def append(self, key, item, unique = False):
> -        if unique and self._data.has_key(key) and \
> +        if unique and key in self._data and \
>              item in self._data[key]:
>               return
>           try:
> @@ -30,7 +30,7 @@ class accumulator:
>               self._data[key] = [item]
>   
>       def keys(self):
> -        return self._data.keys()
> +        return list(self._data.keys())
>   
>       def __getitem__(self, key):
>           return self._data[key]
Tom Rini June 7, 2023, 5:18 p.m. UTC | #2
On Wed, Jun 07, 2023 at 07:57:54AM +0200, Heinrich Schuchardt wrote:

> This is the result of running 2to3 and manually converting the comparison
> functions to lambda expressions.
> 
> Signed-off-by: Heinrich Schuchardt <heinrich.schuchardt@canonical.com>
> ---
>  ConfigFile.py |   4 +-
>  csvdump.py    |   4 +-
>  database.py   |  22 +++++------
>  gitdm         |  27 ++++++-------
>  gitlog.py     |   8 ++--
>  logparser.py  |   6 +--
>  reports.py    | 106 +++++++++++++++-----------------------------------
>  utils.py      |   4 +-
>  8 files changed, 70 insertions(+), 111 deletions(-)
> 

Is this really versus our current tree? It's already running in python3.
If there's specific "this is better python" changes, we should send them
to Jon first and sync back.
diff mbox series

Patch

diff --git a/ConfigFile.py b/ConfigFile.py
index 3a1e208..e58c601 100644
--- a/ConfigFile.py
+++ b/ConfigFile.py
@@ -147,10 +147,10 @@  def ReadFileType (filename):
         m = regex_file_type.match (line)
         if not m or len (m.groups ()) != 2:
             ConfigFile.croak ('Funky file type line "%s"' % (line))
-        if not patterns.has_key (m.group (1)):
+        if m.group (1) not in patterns:
             patterns[m.group (1)] = []
         if m.group (1) not in order:
-            print '%s not found, appended to the last order' % m.group (1)
+            print('%s not found, appended to the last order' % m.group (1))
             order.append (m.group (1))
 
         patterns[m.group (1)].append (re.compile (m.group (2), re.IGNORECASE))
diff --git a/csvdump.py b/csvdump.py
index 9d1a65e..55e38a0 100644
--- a/csvdump.py
+++ b/csvdump.py
@@ -50,7 +50,7 @@  def store_patch(patch):
         ChangeSets.append([patch.commit, str(patch.date),
                            patch.email, domain, author, employer,
                            patch.added, patch.removed])
-        for (filetype, (added, removed)) in patch.filetypes.iteritems():
+        for (filetype, (added, removed)) in patch.filetypes.items():
             FileTypes.append([patch.commit, filetype, added, removed])
 
 
@@ -82,7 +82,7 @@  def OutputCSV (file):
     writer = csv.writer (file, quoting=csv.QUOTE_NONNUMERIC)
     writer.writerow (['Name', 'Email', 'Affliation', 'Date',
                       'Added', 'Removed', 'Changesets'])
-    for date, stat in PeriodCommitHash.items():
+    for date, stat in list(PeriodCommitHash.items()):
         # sanitise names " is common and \" sometimes too
         empl_name = stat.employer.name.replace ('"', '.').replace ('\\', '.')
         author_name = stat.name.replace ('"', '.').replace ('\\', '.')
diff --git a/database.py b/database.py
index bf13227..b267ed5 100644
--- a/database.py
+++ b/database.py
@@ -40,7 +40,7 @@  class Hacker:
                 for edate, empl in self.employer[i]:
                     if edate > date:
                         return empl
-        print 'OOPS.  ', self.name, self.employer, self.email, email, date
+        print('OOPS.  ', self.name, self.employer, self.email, email, date)
         return None # Should not happen
 
     def addpatch (self, patch):
@@ -124,11 +124,11 @@  def LookupStoreHacker(name, email, mapunknown = True):
 
 
 def AllHackers ():
-    return HackersByID.values ()
+    return list(HackersByID.values ())
 
 def DumpDB ():
     out = open ('database.dump', 'w')
-    names = HackersByName.keys ()
+    names = list(HackersByName.keys ())
     names.sort ()
     for name in names:
         h = HackersByName[name]
@@ -149,7 +149,7 @@  def DumpDB ():
 # push it backward through the changes we've already seen.
 #
 def ApplyFirstTag (tag):
-    for n in HackersByName.keys ():
+    for n in list(HackersByName.keys ()):
         if HackersByName[n].versions:
             HackersByName[n].versions = [tag]
 
@@ -185,7 +185,7 @@  def GetEmployer (name):
         return e
 
 def AllEmployers ():
-    return Employers.values ()
+    return list(Employers.values ())
 
 #
 # Certain obnoxious developers, who will remain nameless (because we
@@ -215,8 +215,8 @@  class VirtualEmployer (Employer):
         self.__init__ (name) # Reset counts just in case
 
     def store (self):
-        if Employers.has_key (self.name):
-            print Employers[self.name]
+        if self.name in Employers:
+            print(Employers[self.name])
             sys.stderr.write ('WARNING: Virtual empl %s overwrites another\n'
                               % (self.name))
         if len (self.splits) == 0:
@@ -235,7 +235,7 @@  class FileType:
         order = order or self.order
 
         for file_type in order:
-            if patterns.has_key (file_type):
+            if file_type in patterns:
                 for patt in patterns[file_type]:
                     if patt.search (filename):
                         return file_type
@@ -261,7 +261,7 @@  def MixVirtuals ():
 EmailAliases = { }
 
 def AddEmailAlias (variant, canonical):
-    if EmailAliases.has_key (variant):
+    if variant in EmailAliases:
         sys.stderr.write ('Duplicate email alias for %s\n' % (variant))
     EmailAliases[variant] = canonical
 
@@ -288,7 +288,7 @@  def AddEmailEmployerMapping (email, employer, end = nextyear):
         for i in range (0, len(l)):
             date, xempl = l[i]
             if date == end:  # probably both nextyear
-                print 'WARNING: duplicate email/empl for %s' % (email)
+                print('WARNING: duplicate email/empl for %s' % (email))
             if date > end:
                 l.insert (i, (end, empl))
                 return
@@ -305,7 +305,7 @@  def MapToEmployer (email, unknown = 0):
         pass
     namedom = email.split ('@')
     if len (namedom) < 2:
-        print 'Oops...funky email %s' % email
+        print('Oops...funky email %s' % email)
         return [(nextyear, GetEmployer ('Funky'))]
     s = namedom[1].split ('.')
     for dots in range (len (s) - 2, -1, -1):
diff --git a/gitdm b/gitdm
index 61318ad..f426cc7 100755
--- a/gitdm
+++ b/gitdm
@@ -1,4 +1,4 @@ 
-#!/usr/bin/pypy
+#!/usr/bin/python3
 #-*- coding:utf-8 -*-
 #
 
@@ -15,7 +15,8 @@ 
 
 import database, csvdump, ConfigFile, reports
 import getopt, datetime
-import os, re, sys, rfc822, string, os.path
+from email.utils import parsedate_tz
+import os, re, sys, string, os.path
 import logparser
 from patterns import patterns
 
@@ -108,7 +109,7 @@  def ParseOpts():
         elif opt[0] == '-p':
             CSVPrefix = opt[1]
         elif opt[0] == '-r':
-            print 'Filter on "%s"' % (opt[1])
+            print('Filter on "%s"' % (opt[1]))
             FileFilter = re.compile(opt[1])
         elif opt[0] == '-s':
             AuthorSOBs = 0
@@ -120,7 +121,7 @@  def ParseOpts():
             ReportUnknowns = True
         elif opt[0] == '-x':
             CSVFile = open(opt[1], 'w')
-            print "open output file " + opt[1] + "\n"
+            print("open output file " + opt[1] + "\n")
         elif opt [0] == '-w':
             Aggregate = 'week'
         elif opt [0] == '-y':
@@ -172,7 +173,7 @@  DateMap = { }
 
 def AddDateLines(date, lines):
     if lines > 1000000:
-        print 'Skip big patch (%d)' % lines
+        print('Skip big patch (%d)' % lines)
         return
     try:
         DateMap[date] += lines
@@ -180,7 +181,7 @@  def AddDateLines(date, lines):
         DateMap[date] = lines
 
 def PrintDateStats():
-    dates = DateMap.keys()
+    dates = list(DateMap.keys())
     dates.sort()
     total = 0
     datef = open('datelc.csv', 'w')
@@ -195,7 +196,7 @@  def PrintDateStats():
 # Let's slowly try to move some smarts into this class.
 #
 class patch:
-    (ADDED, REMOVED) = range(2)
+    (ADDED, REMOVED) = list(range(2))
 
     def __init__(self, commit):
         self.commit = commit
@@ -219,7 +220,7 @@  class patch:
         self.reports.append(reporter)
 
     def addfiletype(self, filetype, added, removed):
-        if self.filetypes.has_key(filetype):
+        if filetype in self.filetypes:
             self.filetypes[filetype][self.ADDED] += added
             self.filetypes[filetype][self.REMOVED] += removed
         else:
@@ -330,7 +331,7 @@  def grabpatch(logpatch):
         #
         m = patterns['date'].match(Line)
         if m:
-            dt = rfc822.parsedate(m.group(2))
+            dt = parsedate_tz(m.group(2))
             p.date = datetime.date(dt[0], dt[1], dt[2])
             if p.date > Today:
                 sys.stderr.write('Funky date: %s\n' % p.date)
@@ -389,7 +390,7 @@  def GripeAboutAuthorName(name):
     if name in GripedAuthorNames:
         return
     GripedAuthorNames.append(name)
-    print '%s is an author name, probably not what you want' % (name)
+    print('%s is an author name, probably not what you want' % (name))
 
 def ApplyFileFilter(line, ignore):
     #
@@ -462,14 +463,14 @@  TotalChanged = TotalAdded = TotalRemoved = 0
 #
 # Snarf changesets.
 #
-print >> sys.stderr, 'Grabbing changesets...\r',
+print('Grabbing changesets...\r', end=' ', file=sys.stderr)
 
 patches = logparser.LogPatchSplitter(sys.stdin)
 printcount = CSCount = 0
 
 for logpatch in patches:
     if (printcount % 50) == 0:
-        print >> sys.stderr, 'Grabbing changesets...%d\r' % printcount,
+        print('Grabbing changesets...%d\r' % printcount, end=' ', file=sys.stderr)
     printcount += 1
 
     # We want to ignore commits on svn tags since in Subversion
@@ -528,7 +529,7 @@  for logpatch in patches:
         CSCount += 1
     csvdump.AccumulatePatch(p, Aggregate)
     csvdump.store_patch(p)
-print >> sys.stderr, 'Grabbing changesets...done       '
+print('Grabbing changesets...done       ', file=sys.stderr)
 
 if DumpDB:
     database.DumpDB()
diff --git a/gitlog.py b/gitlog.py
index 71efee1..4b1c5d6 100644
--- a/gitlog.py
+++ b/gitlog.py
@@ -61,7 +61,7 @@  S_DONE = 5
 def get_header(patch, line, input):
     if line == '':
         if patch.author == '':
-            print 'Funky auth line in', patch.commit
+            print('Funky auth line in', patch.commit)
             patch.author = database.LookupStoreHacker('Unknown',
                                                       'unknown@hacker.net')
         return S_DESC
@@ -78,7 +78,7 @@  def get_header(patch, line, input):
 
 def get_desc(patch, line, input):
     if not line:
-        print 'Missing desc in', patch.commit
+        print('Missing desc in', patch.commit)
         return S_CHANGELOG
     patch.desc = line
     line = getline(input)
@@ -188,7 +188,7 @@  def grabpatch(input):
         return None
     m = patterns['commit'].match(line)
     if not m:
-        print 'noncommit', line
+        print('noncommit', line)
         return None
     p = patch(m.group(1))
     state = S_HEADER
@@ -199,7 +199,7 @@  def grabpatch(input):
         line = getline(input)
         if line is None:
             if state != S_NUMSTAT:
-                print 'Ran out of patch', state
+                print('Ran out of patch', state)
                 return None
             return p
         state = grabbers[state](p, line, input)
diff --git a/logparser.py b/logparser.py
index b375034..88293c5 100644
--- a/logparser.py
+++ b/logparser.py
@@ -41,7 +41,7 @@  class LogPatchSplitter:
     def __iter__(self):
         return self
 
-    def next(self):
+    def __next__(self):
         patch = self.__grab_patch__()
         if not patch:
             raise StopIteration
@@ -85,6 +85,6 @@  if __name__ == '__main__':
     patches = LogPatchSplitter(sys.stdin)
 
     for patch in patches:
-        print '---------- NEW PATCH ----------'
+        print('---------- NEW PATCH ----------')
         for line in patch:
-            print line,
+            print(line, end=' ')
diff --git a/reports.py b/reports.py
index d7a96bc..3e03e69 100644
--- a/reports.py
+++ b/reports.py
@@ -69,11 +69,8 @@  def EndReport():
 #
 # Comparison and report generation functions.
 #
-def ComparePCount(h1, h2):
-    return len(h2.patches) - len(h1.patches)
-
 def ReportByPCount(hlist, cscount):
-    hlist.sort(ComparePCount)
+    hlist.sort(key=lambda h: -len(h.patches))
     count = 0
     BeginReport('Developers with the most changesets')
     for h in hlist:
@@ -87,11 +84,8 @@  def ReportByPCount(hlist, cscount):
             break
     EndReport()
             
-def CompareLChanged(h1, h2):
-    return h2.changed - h1.changed
-
 def ReportByLChanged(hlist, totalchanged):
-    hlist.sort(CompareLChanged)
+    hlist.sort(key=lambda h: -h.changed)
     count = 0
     BeginReport('Developers with the most changed lines')
     for h in hlist:
@@ -103,11 +97,8 @@  def ReportByLChanged(hlist, totalchanged):
             break
     EndReport()
             
-def CompareLRemoved(h1, h2):
-    return (h2.removed - h2.added) - (h1.removed - h1.added)
-
 def ReportByLRemoved(hlist, totalremoved):
-    hlist.sort(CompareLRemoved)
+    hlist.sort(key=lambda h: h.added - h.removed)
     count = 0
     BeginReport('Developers with the most lines removed')
     for h in hlist:
@@ -121,11 +112,8 @@  def ReportByLRemoved(hlist, totalremoved):
             break
     EndReport()
 
-def CompareEPCount(e1, e2):
-    return e2.count - e1.count
-
 def ReportByPCEmpl(elist, cscount):
-    elist.sort(CompareEPCount)
+    elist.sort(key=lambda e: -e.count)
     count = 0
     BeginReport('Top changeset contributors by employer')
     for e in elist:
@@ -137,11 +125,8 @@  def ReportByPCEmpl(elist, cscount):
     EndReport()
 
 
-def CompareELChanged(e1, e2):
-    return e2.changed - e1.changed
-
 def ReportByELChanged(elist, totalchanged):
-    elist.sort(CompareELChanged)
+    elist.sort(key=lambda e: -e.changed)
     count = 0
     BeginReport('Top lines changed by employer')
     for e in elist:
@@ -154,11 +139,8 @@  def ReportByELChanged(elist, totalchanged):
 
 
 
-def CompareSOBs(h1, h2):
-    return len(h2.signoffs) - len(h1.signoffs)
-
 def ReportBySOBs(hlist):
-    hlist.sort(CompareSOBs)
+    hlist.sort(key = lambda h: -len(h.signoffs))
     totalsobs = 0
     for h in hlist:
         totalsobs += len(h.signoffs)
@@ -176,11 +158,8 @@  def ReportBySOBs(hlist):
 #
 # Reviewer reporting.
 #
-def CompareRevs(h1, h2):
-    return len(h2.reviews) - len(h1.reviews)
-
 def ReportByRevs(hlist):
-    hlist.sort(CompareRevs)
+    hlist.sort(key=lambda h: -len(h.reviews))
     totalrevs = 0
     for h in hlist:
         totalrevs += len(h.reviews)
@@ -198,11 +177,8 @@  def ReportByRevs(hlist):
 #
 # tester reporting.
 #
-def CompareTests(h1, h2):
-    return len(h2.tested) - len(h1.tested)
-
 def ReportByTests(hlist):
-    hlist.sort(CompareTests)
+    hlist.sort(key=lambda h: -len(h.tested))
     totaltests = 0
     for h in hlist:
         totaltests += len(h.tested)
@@ -217,11 +193,8 @@  def ReportByTests(hlist):
             break
     EndReport()
 
-def CompareTestCred(h1, h2):
-    return h2.testcred - h1.testcred
-
 def ReportByTestCreds(hlist):
-    hlist.sort(CompareTestCred)
+    hlist.sort(key=lambda h: -h.testcred)
     totaltests = 0
     for h in hlist:
         totaltests += h.testcred
@@ -240,11 +213,8 @@  def ReportByTestCreds(hlist):
 #
 # Reporter reporting.
 #
-def CompareReports(h1, h2):
-    return len(h2.reports) - len(h1.reports)
-
 def ReportByReports(hlist):
-    hlist.sort(CompareReports)
+    hlist.sort(key=lambda h: -len(h.reports))
     totalreps = 0
     for h in hlist:
         totalreps += len(h.reports)
@@ -259,11 +229,8 @@  def ReportByReports(hlist):
             break
     EndReport()
 
-def CompareRepCred(h1, h2):
-    return h2.repcred - h1.repcred
-
 def ReportByRepCreds(hlist):
-    hlist.sort(CompareRepCred)
+    hlist.sort(key=lambda h: -h.repcred)
     totalreps = 0
     for h in hlist:
         totalreps += h.repcred
@@ -280,14 +247,11 @@  def ReportByRepCreds(hlist):
 #
 # Versions.
 #
-def CompareVersionCounts(h1, h2):
-    if h1.versions and h2.versions:
-        return len(h2.versions) - len(h1.versions)
-    if h2.versions:
-        return 1
-    if h1.versions:
-        return -1
-    return 0
+def VersionCount(h):
+    if h.versions:
+        return len(h.versions)
+    else:
+        return 0
 
 def MissedVersions(hv, allv):
     missed = [v for v in allv if v not in hv]
@@ -295,7 +259,7 @@  def MissedVersions(hv, allv):
     return ' '.join(missed)
 
 def ReportVersions(hlist):
-    hlist.sort(CompareVersionCounts)
+    hlist.sort(key=lambda h: -VersionCount(h))
     BeginReport('Developers represented in the most kernel versions')
     count = 0
     allversions = hlist[0].versions
@@ -307,11 +271,8 @@  def ReportVersions(hlist):
     EndReport()
 
 
-def CompareESOBs(e1, e2):
-    return e2.sobs - e1.sobs
-
 def ReportByESOBs(elist):
-    elist.sort(CompareESOBs)
+    elist.sort(key=lambda e: -e.sobs)
     totalsobs = 0
     for e in elist:
         totalsobs += e.sobs
@@ -325,11 +286,8 @@  def ReportByESOBs(elist):
             break
     EndReport()
    
-def CompareHackers(e1, e2):
-    return len(e2.hackers) - len(e1.hackers)
-
 def ReportByEHackers(elist):
-    elist.sort(CompareHackers)
+    elist.sort(key=lambda e: -len(e.hackers))
     totalhackers = 0
     for e in elist:
         totalhackers += len(e.hackers)
@@ -375,7 +333,7 @@  def ReportUnknowns(hlist, cscount):
     # mapping to (Unknown) is happening or not.
     #
     ulist = [ h for h in hlist if IsUnknown(h) ]
-    ulist.sort(ComparePCount)
+    ulist.sort(lambda h: len(h.patches))
     count = 0
     BeginReport('Developers with unknown affiliation')
     for h in ulist:
@@ -398,46 +356,46 @@  def ReportByFileType(hacker_list):
         by_hacker = {}
         for patch in h.patches:
             # Get a summary by hacker
-            for (filetype, (added, removed)) in patch.filetypes.iteritems():
-                if by_hacker.has_key(filetype):
+            for (filetype, (added, removed)) in patch.filetypes.items():
+                if filetype in by_hacker:
                     by_hacker[filetype][patch.ADDED] += added
                     by_hacker[filetype][patch.REMOVED] += removed
                 else:
                     by_hacker[filetype] = [added, removed]
 
                 # Update the totals
-                if total.has_key(filetype):
+                if filetype in total:
                     total[filetype][patch.ADDED] += added
                     total[filetype][patch.REMOVED] += removed
                 else:
                     total[filetype] = [added, removed, []]
 
         # Print a summary by hacker
-        print h.name
-        for filetype, counters in by_hacker.iteritems():
-            print '\t', filetype, counters
+        print(h.name)
+        for filetype, counters in by_hacker.items():
+            print('\t', filetype, counters)
             h_added = by_hacker[filetype][patch.ADDED]
             h_removed = by_hacker[filetype][patch.REMOVED]
             total[filetype][2].append([h.name, h_added, h_removed])
 
     # Print the global summary
     BeginReport('Contributions by type and developers')
-    for filetype, (added, removed, hackers) in total.iteritems():
-        print filetype, added, removed
+    for filetype, (added, removed, hackers) in total.items():
+        print(filetype, added, removed)
         for h, h_added, h_removed in hackers:
-            print '\t%s: [%d, %d]' % (h, h_added, h_removed)
+            print('\t%s: [%d, %d]' % (h, h_added, h_removed))
 
     # Print the very global summary
     BeginReport('General contributions by type')
-    for filetype, (added, removed, hackers) in total.iteritems():
-        print filetype, added, removed
+    for filetype, (added, removed, hackers) in total.items():
+        print(filetype, added, removed)
 
 #
 # The file access report is a special beast.
 #
 def FileAccessReport(name, accesses, total):
     outf = open(name, 'w')
-    files = accesses.keys()
+    files = list(accesses.keys())
     files.sort()
     for file in files:
         a = accesses[file]
diff --git a/utils.py b/utils.py
index 2b3be5d..9f17911 100644
--- a/utils.py
+++ b/utils.py
@@ -21,7 +21,7 @@  class accumulator:
             return default
 
     def append(self, key, item, unique = False):
-        if unique and self._data.has_key(key) and \
+        if unique and key in self._data and \
            item in self._data[key]:
             return
         try:
@@ -30,7 +30,7 @@  class accumulator:
             self._data[key] = [item]
 
     def keys(self):
-        return self._data.keys()
+        return list(self._data.keys())
 
     def __getitem__(self, key):
         return self._data[key]