Message ID | BLU437-SMTP14C341A24DA164D29133DFA3F40@phx.gbl |
---|---|
State | Not Applicable |
Headers | show |
> > I don't have the offending patch on hand, but isn't the issue with the > headers. If so, would something like the below do (I haven't tested > it - there could be typos). This is a bit more general - it deals with UTF-8 characters in the body for Py3 as well, which is broken even with the other patch I sent. I'll send some tests which will make it a bit clearer. Regards, Daniel > > I'll review this if not. > > Stephen > > diff --git a/patchwork/parser.py b/patchwork/parser.py > index 1805df8..7917e97 100644 > --- a/patchwork/parser.py > +++ b/patchwork/parser.py > @@ -21,7 +21,8 @@ > > import codecs > import datetime > -from email.header import Header, decode_header > +from email.header import decode_header > +from email.header import make_header > from email.utils import parsedate_tz, mktime_tz > from fnmatch import fnmatch > from functools import reduce > @@ -155,10 +156,10 @@ def find_date(mail): > > > def find_headers(mail): > - return reduce(operator.__concat__, > - ['%s: %s\n' % (k, Header(v, header_name=k, > - continuation_ws='\t').encode()) > - for (k, v) in list(mail.items())]) > + headers = {key: decode_header(value) for key, value in list(mail.items())} > + return '\n'.join(['%s: %s' % (key, make_header(value[0], header_name=key, > + continuation_wd='\t')) > + for key, value in headers]) > > > def find_references(mail):
Hi, 2016-09-20 00:08, Stephen Finucane: > On 20 Sep 01:22, Daniel Axtens wrote: > > So, umm, I went ahead and had a crack at this. > > > > It turns out this is hideously difficult to get right. But this plus my > > other patch to fix Thomas' problem should have things working on Py2 and > > Py3 with this series. Thanks for taking care. [...] > I don't have the offending patch on hand, but isn't the issue with the > headers. If so, would something like the below do (I haven't tested > it - there could be typos). [...] Please would it be possible to fix this bug in the stable branch also? Thanks a lot
Hi Stephen, > def find_headers(mail): > - return reduce(operator.__concat__, > - ['%s: %s\n' % (k, Header(v, header_name=k, > - continuation_ws='\t').encode()) > - for (k, v) in list(mail.items())]) > + headers = {key: decode_header(value) for key, value in list(mail.items())} > + return '\n'.join(['%s: %s' % (key, make_header(value[0], header_name=key, > + continuation_wd='\t')) > + for key, value in headers]) This works beautifully in Python3. In Python2, not so much: patchwork@652f47a766fc:~/patchwork$ python2 Python 2.7.12 (default, Jul 1 2016, 15:12:24) [GCC 5.4.0 20160609] on linux2 Type "help", "copyright", "credits" or "license" for more information. >>> from email.header import decode_header, make_header >>> snowman_header = u'Snowman: \u2603' >>> snowman_utf_8 = snowman_header.encode('utf-8') >>> print(snowman_utf_8) Snowman: ☃ >>> print(decode_header(snowman_header)) Traceback (most recent call last): File "<stdin>", line 1, in <module> File "/usr/lib/python2.7/email/header.py", line 73, in decode_header header = str(header) UnicodeEncodeError: 'ascii' codec can't encode character u'\u2603' in position 9: ordinal not in range(128) >>> print(decode_header(snowman_utf_8)) [('Snowman: \xe2\x98\x83', None)] >>> print(make_header(decode_header(snowman_utf_8))) Traceback (most recent call last): File "<stdin>", line 1, in <module> File "/usr/lib/python2.7/email/header.py", line 139, in make_header h.append(s, charset) File "/usr/lib/python2.7/email/header.py", line 267, in append ustr = unicode(s, incodec, errors) UnicodeDecodeError: 'ascii' codec can't decode byte 0xe2 in position 9: ordinal not in range(128) >>> So I suppose some half-breed of the two approaches is the way to go. Regards, Daniel
diff --git a/patchwork/parser.py b/patchwork/parser.py index 1805df8..7917e97 100644 --- a/patchwork/parser.py +++ b/patchwork/parser.py @@ -21,7 +21,8 @@ import codecs import datetime -from email.header import Header, decode_header +from email.header import decode_header +from email.header import make_header from email.utils import parsedate_tz, mktime_tz from fnmatch import fnmatch from functools import reduce @@ -155,10 +156,10 @@ def find_date(mail): def find_headers(mail): - return reduce(operator.__concat__, - ['%s: %s\n' % (k, Header(v, header_name=k, - continuation_ws='\t').encode()) - for (k, v) in list(mail.items())]) + headers = {key: decode_header(value) for key, value in list(mail.items())} + return '\n'.join(['%s: %s' % (key, make_header(value[0], header_name=key, + continuation_wd='\t')) + for key, value in headers]) def find_references(mail):