From ce4bdf058d19a94b7fcf0a5346af1da37a0de7f1 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 4 Jul 2026 12:05:50 +0300 Subject: [PATCH] gh-135661: Fix abrupt closing of empty comment in HTMLParser An abruptly closed empty comment ("" or "") no longer extends up to a later "-->" in the same feed() call. test_htmlparser now also feeds each string source as a single chunk, in addition to one character at a time, to exercise different input buffering. Co-Authored-By: Claude Opus 4.8 --- Lib/html/parser.py | 6 ++++-- Lib/test/test_htmlparser.py | 10 ++++++++++ .../2026-07-04-13-00-00.gh-issue-135661.CIkADG.rst | 3 +++ 3 files changed, 17 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2026-07-04-13-00-00.gh-issue-135661.CIkADG.rst diff --git a/Lib/html/parser.py b/Lib/html/parser.py index 80fb8c3f929f6b6..38ddf9ef442d368 100644 --- a/Lib/html/parser.py +++ b/Lib/html/parser.py @@ -387,9 +387,11 @@ def parse_html_declaration(self, i): def parse_comment(self, i, report=True): rawdata = self.rawdata assert rawdata.startswith('" or "--!>" close. + match = commentabruptclose.match(rawdata, i+4) if not match: - match = commentabruptclose.match(rawdata, i+4) + match = commentclose.search(rawdata, i+4) if not match: return -1 if report: diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index e4eff1ea17a670b..6b7624f11505d92 100644 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -116,6 +116,11 @@ def _run_check(self, source, expected_events, *, collector=None, convert_charrefs=False): if collector is None: collector = self.get_collector(convert_charrefs=convert_charrefs) + if isinstance(source, str): + # Also feed the whole string at once, not just character by + # character (below), to exercise different input buffering. + self._run_check([source], expected_events, + convert_charrefs=convert_charrefs) parser = collector for s in source: parser.feed(s) @@ -593,6 +598,9 @@ def test_comments(self): ' -->' '' '' + # abruptly closed empty comment must not swallow later text + 'x-->' + 'y-->' ) expected = [('comment', " I'm a valid comment "), ('comment', 'me too!'), @@ -613,6 +621,8 @@ def test_comments(self): ('comment', ' '), ('comment', ''), + ('comment', ''), ('data', 'y-->'), ] self._run_check(html, expected) diff --git a/Misc/NEWS.d/next/Library/2026-07-04-13-00-00.gh-issue-135661.CIkADG.rst b/Misc/NEWS.d/next/Library/2026-07-04-13-00-00.gh-issue-135661.CIkADG.rst new file mode 100644 index 000000000000000..26a912a6f3a1949 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-07-04-13-00-00.gh-issue-135661.CIkADG.rst @@ -0,0 +1,3 @@ +Fix :class:`html.parser.HTMLParser`: an abruptly closed empty comment +(```` or ````) no longer extends up to a later ``-->`` in the same +:meth:`~html.parser.HTMLParser.feed` call.