commit ede49cf2cb17ecf08e81290bb475c700b2314a67
Author: Guilherme Salgado <guilherme.salgado@linaro.org>
Date: Fri Mar 25 15:59:28 2011 -0300
Adds a script which goes through all registered projects looking for patches that have been committed already
It does that by checking out the project's source code from its VCS of choice
(currently only git is supported, though), scanning the commits there and
comparing them to the patches in Patchwork.
new file mode 100755
@@ -0,0 +1,38 @@
+#!/usr/bin/python
+
+import _pythonpath
+from patchwork.models import Patch, Project, State
+from patchwork.utils import (
+ ensure_source_checkout_for_project, get_hashes_for_commits)
+
+
+for project in Project.objects.all():
+ if project.source_tree is None:
+ continue
+
+ print "\n"
+ print "="*80
+ print "Scanning commits of %s" % project.name
+ print "="*80
+
+ try:
+ root = ensure_source_checkout_for_project(project)
+ except AssertionError:
+ print ("Skipping %s as we couldn't get a source checkout" %
+ project.name)
+ continue
+
+ hashes = get_hashes_for_commits(
+ root, start_at=project.last_seen_commit_ref)
+ for commit_id, patch_hash in hashes:
+ # There may be multiple patches with the same hash. That's usually
+ # the case when a second version of a patch series is submitted
+ # and some of the patches in the series are identical in both
+ # series.
+ for patch in Patch.objects.filter(project=project, hash=patch_hash):
+ patch.state = State.objects.get(name='Accepted')
+ patch.commit_ref = commit_id
+ print patch, patch.state
+
+ project.last_seen_commit_ref = commit_id
+ project.save()
@@ -23,3 +23,4 @@ from patchwork.tests.bundles import *
from patchwork.tests.mboxviews import *
from patchwork.tests.updates import *
from patchwork.tests.filters import *
+from patchwork.tests.test_utils import *
new file mode 100644
@@ -0,0 +1,83 @@
+
+import atexit
+import shutil
+import tempfile
+from time import time
+from unittest import TestCase
+
+from dulwich.objects import Blob, Commit, parse_timezone, Tree
+from dulwich.repo import Repo
+
+from patchwork.utils import get_hashes_for_commits, get_commits_to_parse
+
+
+class TestGitRepoScanning(TestCase):
+ """Tests for helper functions that scan commits on a git repo."""
+
+ def test_get_commits_to_parse(self):
+ repo = self.create_git_repo()
+ commit = self.add_file_and_commit(repo, 'foo', 'Content1')
+ commit2 = self.add_file_and_commit(repo, 'bar', 'Content2', commit)
+ commit3 = self.add_file_and_commit(repo, 'baz', 'Content3', commit2)
+ self.assertEqual(
+ [commit.id, commit2.id, commit3.id],
+ get_commits_to_parse(repo.path, start_at=None))
+
+ def test_get_hashes_for_commits(self):
+ repo = self.create_git_repo()
+ commit = self.add_file_and_commit(repo, 'foo', 'Content1')
+ commit2 = self.add_file_and_commit(repo, 'bar', 'Content2', commit)
+ self.assertEqual(
+ [(commit.id, 'a082b23263e8e3366bf3c387ffdcb8b21658e3ad'),
+ (commit2.id, '5c010402c5673981ee3e1712e6a037de3ff9cae4')],
+ list(get_hashes_for_commits(repo.path, start_at=None)))
+
+ def test_get_hashes_for_commits_with_single_empty_patch(self):
+ # Here the repo has only one commit which just adds an empty file, so
+ # parse_patch() returns returns None and we don't have any hash to
+ # check. XXX: Maybe parse_patch() should be fixed to not return None
+ # in these cases?
+ repo = self.create_git_repo()
+ commit = self.add_file_and_commit(repo, 'foo', '')
+ self.assertEqual(
+ [], list(get_hashes_for_commits(repo.path, start_at=None)))
+
+ def test_get_hashes_for_commits_starting_at_specific_commit(self):
+ repo = self.create_git_repo()
+ commit = self.add_file_and_commit(repo, 'foo', 'Content1')
+ commit2 = self.add_file_and_commit(repo, 'bar', 'Content2', commit)
+ commit3 = self.add_file_and_commit(repo, 'baz', 'Content3', commit2)
+ self.assertEqual(
+ [(commit3.id, '11d22fa0986b3bb341baa76b8a6a757a46a2f916')],
+ list(get_hashes_for_commits(repo.path, start_at=commit2.id)))
+
+ def create_git_repo(self):
+ tmpdir = tempfile.mkdtemp()
+ atexit.register(shutil.rmtree, tmpdir)
+ repo = Repo.init(tmpdir)
+ return repo
+
+ def add_file_and_commit(self, repo, filename, data, parent=None):
+ blob = Blob.from_string(data)
+ parents = []
+ tree = Tree()
+ if parent is not None:
+ tree = repo[parent.tree]
+ parents = [parent.id]
+ tree.add(0100644, filename, blob.id)
+ commit = Commit()
+ commit.tree = tree.id
+ author = 'You <you@example.com>'
+ commit.author = commit.committer = author
+ commit.commit_time = commit.author_time = int(time())
+ tz = parse_timezone('-0200')[0]
+ commit.commit_timezone = commit.author_timezone = tz
+ commit.encoding = "UTF-8"
+ commit.message = "A commit"
+ commit.parents = parents
+ object_store = repo.object_store
+ object_store.add_object(blob)
+ object_store.add_object(tree)
+ object_store.add_object(commit)
+ repo.refs['refs/heads/master'] = commit.id
+ return commit
@@ -17,8 +17,12 @@
# along with Patchwork; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+import chardet
+import os
+import subprocess
-from patchwork.models import Bundle, Project, BundlePatch
+from patchwork.parser import hash_patch, parse_patch
+from patchwork.models import Bundle, BundlePatch
from django.shortcuts import get_object_or_404
def get_patch_ids(d, prefix = 'patch_id'):
@@ -137,3 +141,57 @@ def set_bundle(user, project, action, data, patches, context):
bundle.save()
return []
+
+
+def ensure_source_checkout_for_project(project):
+ forest = '/home/salgado/src' # This is where we store the trees we checkout
+ root = os.path.join(forest, project.linkname)
+ if not os.path.exists(root):
+ proc = subprocess.Popen(['git', 'clone', project.source_tree, root],
+ stdout=subprocess.PIPE)
+ else:
+ proc = subprocess.Popen(
+ ['git', 'fetch', '-f', project.source_tree], cwd=root,
+ stdout=subprocess.PIPE)
+ proc.communicate()
+ if proc.returncode != 0:
+ raise AssertionError("FIXME: raise a more specific assertion")
+ return root
+
+
+def get_commits_to_parse(root, start_at):
+ args = ['git', 'rev-list', '--reverse']
+ if start_at:
+ args.append('%s..HEAD' % start_at)
+ else:
+ args.extend(['--max-count=2000', 'HEAD'])
+ proc = subprocess.Popen(args, cwd=root, stdout=subprocess.PIPE)
+ stdout, stderr = proc.communicate()
+ return stdout.split()
+
+
+def get_hashes_for_commits(root, start_at):
+ for commit_id in get_commits_to_parse(root, start_at=start_at):
+ proc = subprocess.Popen(
+ ['git', 'show', commit_id], cwd=root, stdout=subprocess.PIPE)
+ stdout, stderr = proc.communicate()
+
+ try:
+ diff = stdout.decode('utf-8')
+ except UnicodeDecodeError:
+ try:
+ # XXX: Should either add chardet as a dependency or skip this
+ # block when it's not available.
+ # chardet.detect is rather slow so we only use it when we fail
+ # to decode from utf-8.
+ encoding = chardet.detect(stdout)['encoding']
+ diff = stdout.decode(encoding)
+ except UnicodeDecodeError:
+ print "Skipping %s as it doesn't seem to be utf-8" % commit_id
+ continue
+
+ patch, _ = parse_patch(diff)
+ # When commits just add files or change permissions the diff will be
+ # empty and thus parse_patch() will return None.
+ if patch is not None:
+ yield commit_id, hash_patch(patch).hexdigest()