{"id":369465,"url":"http://patchwork.ozlabs.org/api/patches/369465/?format=json","web_url":"http://patchwork.ozlabs.org/project/patchwork/patch/1405304492.528839.24449452940.1.gpush@pablo/","project":{"id":16,"url":"http://patchwork.ozlabs.org/api/projects/16/?format=json","name":"Patchwork","link_name":"patchwork","list_id":"patchwork.lists.ozlabs.org","list_email":"patchwork@lists.ozlabs.org","web_url":"http://jk.ozlabs.org/projects/patchwork/","scm_url":"git://github.com/getpatchwork/patchwork","webscm_url":"https://github.com/getpatchwork/patchwork","list_archive_url":"","list_archive_url_format":"","commit_url_format":""},"msgid":"<1405304492.528839.24449452940.1.gpush@pablo>","list_archive_url":null,"date":"2014-07-14T02:21:32","name":"parsemail: Fallback to common charsets when charset is None or x-unknown","commit_ref":null,"pull_url":null,"state":"accepted","archived":false,"hash":"3db5ed6b61159d877eae4c23ed5dffd0663c2ff8","submitter":{"id":1,"url":"http://patchwork.ozlabs.org/api/people/1/?format=json","name":"Jeremy Kerr","email":"jk@ozlabs.org"},"delegate":null,"mbox":"http://patchwork.ozlabs.org/project/patchwork/patch/1405304492.528839.24449452940.1.gpush@pablo/mbox/","series":[],"comments":"http://patchwork.ozlabs.org/api/patches/369465/comments/","check":"pending","checks":"http://patchwork.ozlabs.org/api/patches/369465/checks/","tags":{},"related":[],"headers":{"Return-Path":"<patchwork-bounces+incoming=patchwork.ozlabs.org@lists.ozlabs.org>","X-Original-To":["incoming@patchwork.ozlabs.org","patchwork@lists.ozlabs.org"],"Delivered-To":["patchwork-incoming@bilbo.ozlabs.org","patchwork@lists.ozlabs.org"],"Received":["from lists.ozlabs.org (lists.ozlabs.org [103.22.144.68])\n\t(using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits))\n\t(No client certificate requested)\n\tby ozlabs.org (Postfix) with ESMTPS id 902D21400BB\n\tfor <incoming@patchwork.ozlabs.org>;\n\tMon, 14 Jul 2014 12:22:06 +1000 (EST)","from ozlabs.org (ozlabs.org [103.22.144.67])\n\tby lists.ozlabs.org (Postfix) with ESMTP id 72C461A0156\n\tfor <incoming@patchwork.ozlabs.org>;\n\tMon, 14 Jul 2014 12:22:06 +1000 (EST)","from ozlabs.org (ozlabs.org [103.22.144.67])\n\t(using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits))\n\t(No client certificate requested)\n\tby lists.ozlabs.org (Postfix) with ESMTPS id E87C51A0004\n\tfor <patchwork@lists.ozlabs.org>;\n\tMon, 14 Jul 2014 12:22:04 +1000 (EST)","by ozlabs.org (Postfix, from userid 1023)\n\tid D135E1400F0; Mon, 14 Jul 2014 12:22:04 +1000 (EST)"],"MIME-Version":"1.0","Subject":"[PATCH] parsemail: Fallback to common charsets when charset is None\n\tor x-unknown","Message-Id":"<1405304492.528839.24449452940.1.gpush@pablo>","In-Reply-To":"<53C33BF6.8060900@ozlabs.org>","To":"Siddhesh Poyarekar <siddhesh@redhat.com>","From":"Jeremy Kerr <jk@ozlabs.org>","Date":"Mon, 14 Jul 2014 10:21:32 +0800","Cc":"patchwork@lists.ozlabs.org","X-BeenThere":"patchwork@lists.ozlabs.org","X-Mailman-Version":"2.1.16","Precedence":"list","List-Id":"Patchwork development <patchwork.lists.ozlabs.org>","List-Unsubscribe":"<https://lists.ozlabs.org/options/patchwork>,\n\t<mailto:patchwork-request@lists.ozlabs.org?subject=unsubscribe>","List-Archive":"<http://lists.ozlabs.org/pipermail/patchwork/>","List-Post":"<mailto:patchwork@lists.ozlabs.org>","List-Help":"<mailto:patchwork-request@lists.ozlabs.org?subject=help>","List-Subscribe":"<https://lists.ozlabs.org/listinfo/patchwork>,\n\t<mailto:patchwork-request@lists.ozlabs.org?subject=subscribe>","Content-Type":"text/plain; charset=\"utf-8\"","Content-Transfer-Encoding":"base64","Errors-To":"patchwork-bounces+incoming=patchwork.ozlabs.org@lists.ozlabs.org","Sender":"\"Patchwork\"\n\t<patchwork-bounces+incoming=patchwork.ozlabs.org@lists.ozlabs.org>"},"content":"From: Siddhesh Poyarekar <siddhesh@redhat.com>\n\nWe recently encountered a case in our glibc patchwork instance on\nsourceware, where a patch was dropped because it had x-unknown\ncharset.\n\nThis change adds a fallback on a set of encodings (instead of just\nutf-8) when the charset is not mentioned or if it is set as x-unknown.\n\nMinor changes and testcase by Jeremy Kerr <jk@ozlabs.org>\n\nSigned-off-by: Siddhesh Poyarekar <siddhesh@redhat.com>\nSigned-off-by: Jeremy Kerr <jk@ozlabs.org>\n\n---\n apps/patchwork/bin/parsemail.py                     |   40 ++++-\n apps/patchwork/tests/mail/0010-invalid-charset.mbox |   91 ++++++++++++\n apps/patchwork/tests/test_patchparser.py            |   11 +\n 3 files changed, 136 insertions(+), 6 deletions(-)","diff":"diff --git a/apps/patchwork/bin/parsemail.py b/apps/patchwork/bin/parsemail.py\nindex b6eb97a..2a4866f 100755\n--- a/apps/patchwork/bin/parsemail.py\n+++ b/apps/patchwork/bin/parsemail.py\n@@ -24,6 +24,7 @@ import re\n import datetime\n import time\n import operator\n+import codecs\n from email import message_from_file\n try:\n     from email.header import Header, decode_header\n@@ -147,6 +148,13 @@ def find_pull_request(content):\n         return match.group(1)\n     return None\n \n+def try_decode(payload, charset):\n+    try:\n+        payload = unicode(payload, charset)\n+    except UnicodeDecodeError:\n+        return None\n+    return payload\n+\n def find_content(project, mail):\n     patchbuf = None\n     commentbuf = ''\n@@ -157,15 +165,35 @@ def find_content(project, mail):\n             continue\n \n         payload = part.get_payload(decode=True)\n-        charset = part.get_content_charset()\n         subtype = part.get_content_subtype()\n \n-        # if we don't have a charset, assume utf-8\n-        if charset is None:\n-            charset = 'utf-8'\n-\n         if not isinstance(payload, unicode):\n-            payload = unicode(payload, charset)\n+            charset = part.get_content_charset()\n+\n+            # Check that we have a charset that we understand. Otherwise,\n+            # ignore it and fallback to our standard set.\n+            if charset is not None:\n+                try:\n+                    codec = codecs.lookup(charset)\n+                except LookupError:\n+                    charset = None\n+\n+            # If there is no charset or if it is unknown, then try some common\n+            # charsets before we fail.\n+            if charset is None:\n+                try_charsets = ['utf-8', 'windows-1252', 'iso-8859-1']\n+            else:\n+                try_charsets = [charset]\n+\n+            for cset in try_charsets:\n+                decoded_payload = try_decode(payload, cset)\n+                if decoded_payload is not None:\n+                    break\n+            payload = decoded_payload\n+\n+            # Could not find a valid decoded payload.  Fail.\n+            if payload is None:\n+                return (None, None)\n \n         if subtype in ['x-patch', 'x-diff']:\n             patchbuf = payload\ndiff --git a/apps/patchwork/tests/mail/0010-invalid-charset.mbox b/apps/patchwork/tests/mail/0010-invalid-charset.mbox\nnew file mode 100644\nindex 0000000..a8614ef\n--- /dev/null\n+++ b/apps/patchwork/tests/mail/0010-invalid-charset.mbox\n@@ -0,0 +1,91 @@\n+From libc-alpha-return-50517-siddhesh=redhat.com@sourceware.org Thu Jun  5 10:36:33 2014\n+Received: (qmail 11948 invoked by alias); 4 Jun 2014 17:51:01 -0000\n+Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm\n+List-Id: <libc-alpha.sourceware.org>\n+Sender: libc-alpha-owner@sourceware.org\n+Date: Wed, 4 Jun 2014 17:50:46 +0000\n+From: \"Joseph S. Myers\" <joseph@codesourcery.com>\n+To: <libc-alpha@sourceware.org>\n+Subject: Fix pow overflow in non-default rounding modes (bug 16315)\n+Message-ID: <Pine.LNX.4.64.1406041749420.3719@digraph.polyomino.org.uk>\n+MIME-Version: 1.0\n+Content-Type: multipart/mixed;\n+\tboundary=\"-1152306461-1522705971-1401904246=:3719\"\n+Content-Length: 24171\n+\n+---1152306461-1522705971-1401904246=:3719\n+Content-Type: text/plain; charset=\"none\"\n+Content-Transfer-Encoding: QUOTED-PRINTABLE\n+\n+This patch, relative to a tree with\n+<https://sourceware.org/ml/libc-alpha/2014-06/msg00076.html> applied,\n+fixes bug 16315, bad pow handling of overflow/underflow in non-default\n+rounding modes.  Tests of pow are duly converted to ALL_RM_TEST to run\n+all tests in all rounding modes.\n+\n+There are two main issues here.  First, various implementations\n+compute a negative result by negating a positive result, but this\n+yields inappropriate overflow / underflow values for directed\n+rounding, so either overflow / underflow results need recomputing in\n+the correct sign, or the relevant overflowing / underflowing operation\n+needs to be made to have a result of the correct sign.  Second, the\n+dbl-64 implementation sets FE_TONEAREST internally; in the overflow /\n+underflow case, the result needs recomputing in the original rounding\n+mode.\n+\n+Tested x86_64 and x86 and ulps updated accordingly.\n+\n+(auto-libm-test-out diffs omitted below.)\n+\n+2014-06-04  Joseph Myers  <joseph@codesourcery.com>\n+\n+=09[BZ #16315]\n+=09* sysdeps/i386/fpu/e_pow.S (__ieee754_pow): Ensure possibly\n+=09overflowing or underflowing operations take place with sign of\n+=09result.\n+=09* sysdeps/i386/fpu/e_powf.S (__ieee754_powf): Likewise.\n+=09* sysdeps/i386/fpu/e_powl.S (__ieee754_powl): Likewise.\n+=09* sysdeps/ieee754/dbl-64/e_pow.c: Include <math.h>.\n+=09(__ieee754_pow): Recompute overflowing and underflowing results in\n+=09original rounding mode.\n+=09* sysdeps/x86/fpu/powl_helper.c: Include <stdbool.h>.\n+=09(__powl_helper): Allow negative argument X and scale negated value\n+=09as needed.  Avoid passing value outside [-1, 1] to f2xm1.\n+=09* sysdeps/x86_64/fpu/e_powl.S (__ieee754_powl): Ensure possibly\n+=09overflowing or underflowing operations take place with sign of\n+=09result.\n+=09* sysdeps/x86_64/fpu/multiarch/e_pow.c [HAVE_FMA4_SUPPORT]:\n+=09Include <math.h>.\n+=09* math/auto-libm-test-in: Add more tests of pow.\n+=09* math/auto-libm-test-out: Regenerated.\n+=09* math/libm-test.inc (pow_test): Use ALL_RM_TEST.\n+=09(pow_tonearest_test_data): Remove.\n+=09(pow_test_tonearest): Likewise.\n+=09(pow_towardzero_test_data): Likewise.\n+=09(pow_test_towardzero): Likewise.\n+=09(pow_downward_test_data): Likewise.\n+=09(pow_test_downward): Likewise.\n+=09(pow_upward_test_data): Likewise.\n+=09(pow_test_upward): Likewise.\n+=09(main): Don't call removed functions.\n+=09* sysdeps/i386/fpu/libm-test-ulps: Update.\n+=09* sysdeps/x86_64/fpu/libm-test-ulps: Likewise.\n+\n+diff --git a/sysdeps/x86_64/fpu/multiarch/e_pow.c b/sysdeps/x86_64/fpu/mult=\n+iarch/e_pow.c\n+index a740b6c..433cce0 100644\n+--- a/sysdeps/x86_64/fpu/multiarch/e_pow.c\n++++ b/sysdeps/x86_64/fpu/multiarch/e_pow.c\n+@@ -1,5 +1,6 @@\n+ #ifdef HAVE_FMA4_SUPPORT\n+ # include <init-arch.h>\n++# include <math.h>\n+ # include <math_private.h>\n+=20\n+ extern double __ieee754_pow_sse2 (double, double);\n+\n+--=20\n+Joseph S. Myers\n+joseph@codesourcery.com\n+---1152306461-1522705971-1401904246=:3719--\n+\ndiff --git a/apps/patchwork/tests/test_patchparser.py b/apps/patchwork/tests/test_patchparser.py\nindex 0496a69..d9a24c1 100644\n--- a/apps/patchwork/tests/test_patchparser.py\n+++ b/apps/patchwork/tests/test_patchparser.py\n@@ -422,6 +422,17 @@ class CVSFormatPatchTest(MBoxPatchTest):\n         self.assertTrue(comment is not None)\n         self.assertTrue(patch.content.startswith('Index'))\n \n+class CharsetFallbackPatchTest(MBoxPatchTest):\n+    \"\"\" Test mail with and invalid charset name, and check that we can parse\n+        with one of the fallback encodings\"\"\"\n+\n+    mail_file = '0010-invalid-charset.mbox'\n+\n+    def testPatch(self):\n+        (patch, comment) = find_content(self.project, self.mail)\n+        self.assertTrue(patch is not None)\n+        self.assertTrue(comment is not None)\n+\n class DelegateRequestTest(TestCase):\n     patch_filename = '0001-add-line.patch'\n     msgid = '<1@example.com>'\n","prefixes":[]}