🌐 AI搜索 & 代理 主页
Skip to content

Commit 79aa43a

Browse files
authored
gh-79986: Add parsing for References/In-Reply-To email headers (#137201)
This is a followup to 46d88a1 (#13397), which added parsing for Message-ID. Similar handling is needed for the other two identification headers.
1 parent 4eab90f commit 79aa43a

File tree

5 files changed

+137
-0
lines changed

5 files changed

+137
-0
lines changed

Lib/email/_header_value_parser.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -878,6 +878,12 @@ class MessageID(MsgID):
878878
class InvalidMessageID(MessageID):
879879
token_type = 'invalid-message-id'
880880

881+
class MessageIDList(TokenList):
882+
token_type = 'message-id-list'
883+
884+
@property
885+
def message_ids(self):
886+
return [x for x in self if x.token_type=='msg-id']
881887

882888
class Header(TokenList):
883889
token_type = 'header'
@@ -2175,6 +2181,32 @@ def parse_message_id(value):
21752181

21762182
return message_id
21772183

2184+
def parse_message_ids(value):
2185+
"""in-reply-to = "In-Reply-To:" 1*msg-id CRLF
2186+
references = "References:" 1*msg-id CRLF
2187+
"""
2188+
message_id_list = MessageIDList()
2189+
while value:
2190+
if value[0] == ',':
2191+
# message id list separated with commas - this is invalid,
2192+
# but happens rather frequently in the wild
2193+
message_id_list.defects.append(
2194+
errors.InvalidHeaderDefect("comma in msg-id list"))
2195+
message_id_list.append(
2196+
WhiteSpaceTerminal(' ', 'invalid-comma-replacement'))
2197+
value = value[1:]
2198+
continue
2199+
try:
2200+
token, value = get_msg_id(value)
2201+
message_id_list.append(token)
2202+
except errors.HeaderParseError as ex:
2203+
token = get_unstructured(value)
2204+
message_id_list.append(InvalidMessageID(token))
2205+
message_id_list.defects.append(
2206+
errors.InvalidHeaderDefect("Invalid msg-id: {!r}".format(ex)))
2207+
break
2208+
return message_id_list
2209+
21782210
#
21792211
# XXX: As I begin to add additional header parsers, I'm realizing we probably
21802212
# have two level of parser routines: the get_XXX methods that get a token in

Lib/email/headerregistry.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -534,6 +534,18 @@ def parse(cls, value, kwds):
534534
kwds['defects'].extend(parse_tree.all_defects)
535535

536536

537+
class ReferencesHeader:
538+
539+
max_count = 1
540+
value_parser = staticmethod(parser.parse_message_ids)
541+
542+
@classmethod
543+
def parse(cls, value, kwds):
544+
kwds['parse_tree'] = parse_tree = cls.value_parser(value)
545+
kwds['decoded'] = str(parse_tree)
546+
kwds['defects'].extend(parse_tree.all_defects)
547+
548+
537549
# The header factory #
538550

539551
_default_header_map = {
@@ -557,6 +569,8 @@ def parse(cls, value, kwds):
557569
'content-disposition': ContentDispositionHeader,
558570
'content-transfer-encoding': ContentTransferEncodingHeader,
559571
'message-id': MessageIDHeader,
572+
'in-reply-to': ReferencesHeader,
573+
'references': ReferencesHeader,
560574
}
561575

562576
class HeaderRegistry:

Lib/test/test_email/test__header_value_parser.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2867,6 +2867,81 @@ def test_get_msg_id_ws_only_local(self):
28672867
)
28682868
self.assertEqual(msg_id.token_type, 'msg-id')
28692869

2870+
def test_parse_message_ids_valid(self):
2871+
message_ids = self._test_parse_x(
2872+
parser.parse_message_ids,
2873+
"<foo@bar> <bar@foo>",
2874+
"<foo@bar> <bar@foo>",
2875+
"<foo@bar> <bar@foo>",
2876+
[],
2877+
)
2878+
self.assertEqual(message_ids.token_type, 'message-id-list')
2879+
2880+
def test_parse_message_ids_empty(self):
2881+
message_ids = self._test_parse_x(
2882+
parser.parse_message_ids,
2883+
" ",
2884+
" ",
2885+
" ",
2886+
[errors.InvalidHeaderDefect],
2887+
)
2888+
self.assertEqual(message_ids.token_type, 'message-id-list')
2889+
2890+
def test_parse_message_ids_comment(self):
2891+
message_ids = self._test_parse_x(
2892+
parser.parse_message_ids,
2893+
"<foo@bar> (foo's message from \"bar\")",
2894+
"<foo@bar> (foo's message from \"bar\")",
2895+
"<foo@bar> ",
2896+
[],
2897+
)
2898+
self.assertEqual(message_ids.message_ids[0].value, '<foo@bar> ')
2899+
self.assertEqual(message_ids.token_type, 'message-id-list')
2900+
2901+
def test_parse_message_ids_no_sep(self):
2902+
message_ids = self._test_parse_x(
2903+
parser.parse_message_ids,
2904+
"<foo@bar><bar@foo>",
2905+
"<foo@bar><bar@foo>",
2906+
"<foo@bar><bar@foo>",
2907+
[],
2908+
)
2909+
self.assertEqual(message_ids.message_ids[0].value, '<foo@bar>')
2910+
self.assertEqual(message_ids.message_ids[1].value, '<bar@foo>')
2911+
self.assertEqual(message_ids.token_type, 'message-id-list')
2912+
2913+
def test_parse_message_ids_comma_sep(self):
2914+
message_ids = self._test_parse_x(
2915+
parser.parse_message_ids,
2916+
"<foo@bar>,<bar@foo>",
2917+
"<foo@bar> <bar@foo>",
2918+
"<foo@bar> <bar@foo>",
2919+
[errors.InvalidHeaderDefect],
2920+
)
2921+
self.assertEqual(message_ids.message_ids[0].value, '<foo@bar>')
2922+
self.assertEqual(message_ids.message_ids[1].value, '<bar@foo>')
2923+
self.assertEqual(message_ids.token_type, 'message-id-list')
2924+
2925+
def test_parse_message_ids_invalid_id(self):
2926+
message_ids = self._test_parse_x(
2927+
parser.parse_message_ids,
2928+
"<Date: Wed, 08 Jun 2002 09:78:58 +0600>",
2929+
"<Date: Wed, 08 Jun 2002 09:78:58 +0600>",
2930+
"<Date: Wed, 08 Jun 2002 09:78:58 +0600>",
2931+
[errors.InvalidHeaderDefect]*2,
2932+
)
2933+
self.assertEqual(message_ids.token_type, 'message-id-list')
2934+
2935+
def test_parse_message_ids_broken_ang(self):
2936+
message_ids = self._test_parse_x(
2937+
parser.parse_message_ids,
2938+
"<foo@bar> >bar@foo",
2939+
"<foo@bar> >bar@foo",
2940+
"<foo@bar> >bar@foo",
2941+
[errors.InvalidHeaderDefect]*1,
2942+
)
2943+
self.assertEqual(message_ids.token_type, 'message-id-list')
2944+
28702945

28712946

28722947
@parameterize

Lib/test/test_email/test_headerregistry.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1821,5 +1821,18 @@ def test_message_id_header_is_not_folded(self):
18211821
h.fold(policy=policy.default.clone(max_line_length=20)),
18221822
'Message-ID:\n <ईमेलfromMessage@wők.com>\n')
18231823

1824+
def test_fold_references(self):
1825+
h = self.make_header(
1826+
'References',
1827+
'<referenceid1thatislongerthan@maxlinelength.com> '
1828+
'<referenceid2thatislongerthan@maxlinelength.com>'
1829+
)
1830+
self.assertEqual(
1831+
h.fold(policy=policy.default.clone(max_line_length=20)),
1832+
'References: '
1833+
'<referenceid1thatislongerthan@maxlinelength.com>\n'
1834+
' <referenceid2thatislongerthan@maxlinelength.com>\n')
1835+
1836+
18241837
if __name__ == '__main__':
18251838
unittest.main()
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Add parsing for ``References`` and ``In-Reply-To`` headers to the :mod:`email`
2+
library that parses the header content as lists of message id tokens. This
3+
prevents them from being folded incorrectly.

0 commit comments

Comments
 (0)