From 8c164ba3f4a6173256e160744d763f9e0b61ad0c Mon Sep 17 00:00:00 2001 From: tonghuaroot Date: Fri, 26 Jun 2026 13:39:12 +0800 Subject: [PATCH 1/5] gh-152248: Reject a POSIX TZ abbreviation with non-ASCII-letter characters in pure-Python zoneinfo --- Lib/test/test_zoneinfo/test_zoneinfo.py | 35 +++++++++++++++++++ Lib/zoneinfo/_zoneinfo.py | 4 +-- ...-06-26-13-39-11.gh-issue-152248.N2Rmaf.rst | 4 +++ 3 files changed, 41 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2026-06-26-13-39-11.gh-issue-152248.N2Rmaf.rst diff --git a/Lib/test/test_zoneinfo/test_zoneinfo.py b/Lib/test/test_zoneinfo/test_zoneinfo.py index 7502b120825fbce..3d375e1f50aec70 100644 --- a/Lib/test/test_zoneinfo/test_zoneinfo.py +++ b/Lib/test/test_zoneinfo/test_zoneinfo.py @@ -1145,6 +1145,12 @@ def test_invalid_tzstr(self): "+11", # Unquoted alphanumeric "GMT,M3.2.0/2,M11.1.0/3", # Transition rule but no DST "GMT0+11,M3.2.0/2,M11.1.0/3", # Unquoted alphanumeric in DST + # Unquoted abbreviation with embedded or leading whitespace + # (accepted by the unmodified pure parser, rejected by the C + # implementation; both reject after the fix). + "AB C3", + " A B 3", + "AAA4BB B,J60/2,J300/2", # Embedded whitespace in DST "PST8PDT,M3.2.0/2", # Only one transition rule # Invalid offset hours "AAA168", @@ -1222,6 +1228,35 @@ def test_invalid_tzstr(self): with self.assertRaisesRegex(ValueError, tzstr_regex): self.zone_from_tzstr(invalid_tzstr) + def test_invalid_tzstr_non_ascii_abbr(self): + # A non-ASCII letter in an unquoted abbreviation is publicly reachable: + # from_file() UTF-8-decodes the footer, so b"AB\xc3\x80C3" decodes to + # "ABÀC3" and reaches the parser. The C implementation rejects it + # (Py_ISALPHA is ASCII-only); the unmodified pure parser accepted it. + # + # This case is kept out of the shared invalid_tzstrs list: the C error + # message embeds the bytes repr, which a re.escape() of the decoded + # string would not match, so each implementation is checked against + # its own message. + tzstr = "ABÀC3" + footer = tzstr.encode("utf-8") + + def from_footer(): + zonefile = io.BytesIO(self._tzif_header) + zonefile.seek(0, 2) + zonefile.write(b"\x0A") + zonefile.write(footer) + zonefile.write(b"\x0A") + zonefile.seek(0) + return self.klass.from_file(zonefile, key=tzstr) + + if self.module is py_zoneinfo: + expected = re.escape(tzstr) + else: + expected = re.escape(repr(footer)) + with self.assertRaisesRegex(ValueError, expected): + from_footer() + @classmethod def _populate_test_cases(cls): # This method uses a somewhat unusual style in that it populates the diff --git a/Lib/zoneinfo/_zoneinfo.py b/Lib/zoneinfo/_zoneinfo.py index 7063eb6a9025ac2..7e613d8303762dd 100644 --- a/Lib/zoneinfo/_zoneinfo.py +++ b/Lib/zoneinfo/_zoneinfo.py @@ -640,11 +640,11 @@ def _parse_tz_str(tz_str): parser_re = re.compile( r""" - (?P[^<0-9:.+-]+|<[a-zA-Z0-9+-]+>) + (?P[a-zA-Z]+|<[a-zA-Z0-9+-]+>) (?: (?P[+-]?\d{1,3}(?::\d{2}(?::\d{2})?)?) (?: - (?P[^0-9:.+-]+|<[a-zA-Z0-9+-]+>) + (?P[a-zA-Z]+|<[a-zA-Z0-9+-]+>) (?P[+-]?\d{1,3}(?::\d{2}(?::\d{2})?)?)? )? # dst )? # stdoff diff --git a/Misc/NEWS.d/next/Library/2026-06-26-13-39-11.gh-issue-152248.N2Rmaf.rst b/Misc/NEWS.d/next/Library/2026-06-26-13-39-11.gh-issue-152248.N2Rmaf.rst new file mode 100644 index 000000000000000..72767d1b4649111 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-06-26-13-39-11.gh-issue-152248.N2Rmaf.rst @@ -0,0 +1,4 @@ +Fix the pure-Python :mod:`zoneinfo` parser accepting an unquoted POSIX TZ +abbreviation that contains characters other than ASCII letters (for example an +embedded space), which the C implementation already rejects. Patch by +tonghuaroot. From 8a6bcd27ef0a9d7d02c3ee04ff2cfb6c9cc0af02 Mon Sep 17 00:00:00 2001 From: tonghuaroot Date: Fri, 26 Jun 2026 13:42:25 +0800 Subject: [PATCH 2/5] Trim test comments --- Lib/test/test_zoneinfo/test_zoneinfo.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/Lib/test/test_zoneinfo/test_zoneinfo.py b/Lib/test/test_zoneinfo/test_zoneinfo.py index 3d375e1f50aec70..de0d2fee595b63a 100644 --- a/Lib/test/test_zoneinfo/test_zoneinfo.py +++ b/Lib/test/test_zoneinfo/test_zoneinfo.py @@ -1146,8 +1146,6 @@ def test_invalid_tzstr(self): "GMT,M3.2.0/2,M11.1.0/3", # Transition rule but no DST "GMT0+11,M3.2.0/2,M11.1.0/3", # Unquoted alphanumeric in DST # Unquoted abbreviation with embedded or leading whitespace - # (accepted by the unmodified pure parser, rejected by the C - # implementation; both reject after the fix). "AB C3", " A B 3", "AAA4BB B,J60/2,J300/2", # Embedded whitespace in DST @@ -1229,15 +1227,9 @@ def test_invalid_tzstr(self): self.zone_from_tzstr(invalid_tzstr) def test_invalid_tzstr_non_ascii_abbr(self): - # A non-ASCII letter in an unquoted abbreviation is publicly reachable: - # from_file() UTF-8-decodes the footer, so b"AB\xc3\x80C3" decodes to - # "ABÀC3" and reaches the parser. The C implementation rejects it - # (Py_ISALPHA is ASCII-only); the unmodified pure parser accepted it. - # - # This case is kept out of the shared invalid_tzstrs list: the C error - # message embeds the bytes repr, which a re.escape() of the decoded - # string would not match, so each implementation is checked against - # its own message. + # A non-ASCII letter reaches the parser via from_file()'s UTF-8 decode. + # It can't use the shared invalid_tzstrs list (encode("ascii") fails and + # the C message holds the bytes repr), so check each parser's message. tzstr = "ABÀC3" footer = tzstr.encode("utf-8") From 6dd23ffde76eb77d6c92ef3caa0ab00994f8f867 Mon Sep 17 00:00:00 2001 From: tonghuaroot Date: Fri, 26 Jun 2026 13:43:55 +0800 Subject: [PATCH 3/5] Wrap test comment to 79 columns --- Lib/test/test_zoneinfo/test_zoneinfo.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_zoneinfo/test_zoneinfo.py b/Lib/test/test_zoneinfo/test_zoneinfo.py index de0d2fee595b63a..05eecf17532cdc4 100644 --- a/Lib/test/test_zoneinfo/test_zoneinfo.py +++ b/Lib/test/test_zoneinfo/test_zoneinfo.py @@ -1228,8 +1228,8 @@ def test_invalid_tzstr(self): def test_invalid_tzstr_non_ascii_abbr(self): # A non-ASCII letter reaches the parser via from_file()'s UTF-8 decode. - # It can't use the shared invalid_tzstrs list (encode("ascii") fails and - # the C message holds the bytes repr), so check each parser's message. + # It needs a separate test: it can't be ASCII-encoded for the shared + # invalid_tzstrs list, and the C error message holds the bytes repr. tzstr = "ABÀC3" footer = tzstr.encode("utf-8") From bf26921d2f5a9ddeff94f1cefda7b5e27033d39c Mon Sep 17 00:00:00 2001 From: tonghuaroot Date: Fri, 26 Jun 2026 18:34:19 +0800 Subject: [PATCH 4/5] Reuse zone_from_tzstr with an encoding parameter in the non-ASCII test --- Lib/test/test_zoneinfo/test_zoneinfo.py | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/Lib/test/test_zoneinfo/test_zoneinfo.py b/Lib/test/test_zoneinfo/test_zoneinfo.py index 05eecf17532cdc4..a7324ac2a00a20a 100644 --- a/Lib/test/test_zoneinfo/test_zoneinfo.py +++ b/Lib/test/test_zoneinfo/test_zoneinfo.py @@ -1009,14 +1009,14 @@ def populate_tzstr_header(cls): cls._tzif_header = bytes(out) - def zone_from_tzstr(self, tzstr): + def zone_from_tzstr(self, tzstr, encoding="ascii"): """Creates a zoneinfo file following a POSIX rule.""" zonefile = io.BytesIO(self._tzif_header) zonefile.seek(0, 2) # Write the footer zonefile.write(b"\x0A") - zonefile.write(tzstr.encode("ascii")) + zonefile.write(tzstr.encode(encoding)) zonefile.write(b"\x0A") zonefile.seek(0) @@ -1231,23 +1231,12 @@ def test_invalid_tzstr_non_ascii_abbr(self): # It needs a separate test: it can't be ASCII-encoded for the shared # invalid_tzstrs list, and the C error message holds the bytes repr. tzstr = "ABÀC3" - footer = tzstr.encode("utf-8") - - def from_footer(): - zonefile = io.BytesIO(self._tzif_header) - zonefile.seek(0, 2) - zonefile.write(b"\x0A") - zonefile.write(footer) - zonefile.write(b"\x0A") - zonefile.seek(0) - return self.klass.from_file(zonefile, key=tzstr) - if self.module is py_zoneinfo: expected = re.escape(tzstr) else: - expected = re.escape(repr(footer)) + expected = re.escape(repr(tzstr.encode("utf-8"))) with self.assertRaisesRegex(ValueError, expected): - from_footer() + self.zone_from_tzstr(tzstr, encoding="utf-8") @classmethod def _populate_test_cases(cls): From 1931021bd2f0e91bfbe4d4a4e26f7ee61c894003 Mon Sep 17 00:00:00 2001 From: tonghuaroot Date: Fri, 26 Jun 2026 21:15:28 +0800 Subject: [PATCH 5/5] Reject an empty quoted abbreviation in the C zoneinfo parser --- Lib/test/test_zoneinfo/test_zoneinfo.py | 3 +++ .../Library/2026-06-26-13-39-11.gh-issue-152248.N2Rmaf.rst | 7 +++---- Modules/_zoneinfo.c | 3 +++ 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_zoneinfo/test_zoneinfo.py b/Lib/test/test_zoneinfo/test_zoneinfo.py index a7324ac2a00a20a..aba317e9a1d716d 100644 --- a/Lib/test/test_zoneinfo/test_zoneinfo.py +++ b/Lib/test/test_zoneinfo/test_zoneinfo.py @@ -1149,6 +1149,9 @@ def test_invalid_tzstr(self): "AB C3", " A B 3", "AAA4BB B,J60/2,J300/2", # Embedded whitespace in DST + # Empty quoted abbreviation + "<>5", + "AAA4<>,M3.2.0/2,M11.1.0/3", "PST8PDT,M3.2.0/2", # Only one transition rule # Invalid offset hours "AAA168", diff --git a/Misc/NEWS.d/next/Library/2026-06-26-13-39-11.gh-issue-152248.N2Rmaf.rst b/Misc/NEWS.d/next/Library/2026-06-26-13-39-11.gh-issue-152248.N2Rmaf.rst index 72767d1b4649111..e6500e2631acb7a 100644 --- a/Misc/NEWS.d/next/Library/2026-06-26-13-39-11.gh-issue-152248.N2Rmaf.rst +++ b/Misc/NEWS.d/next/Library/2026-06-26-13-39-11.gh-issue-152248.N2Rmaf.rst @@ -1,4 +1,3 @@ -Fix the pure-Python :mod:`zoneinfo` parser accepting an unquoted POSIX TZ -abbreviation that contains characters other than ASCII letters (for example an -embedded space), which the C implementation already rejects. Patch by -tonghuaroot. +Make the C and pure-Python :mod:`zoneinfo` parsers validate POSIX TZ +abbreviations consistently, rejecting unquoted abbreviations with non-letter +characters and empty quoted abbreviations (``<>``). Patch by tonghuaroot. diff --git a/Modules/_zoneinfo.c b/Modules/_zoneinfo.c index eaffd020ed97c09..2a7ac4498261e08 100644 --- a/Modules/_zoneinfo.c +++ b/Modules/_zoneinfo.c @@ -1762,6 +1762,9 @@ parse_abbr(const char **p, PyObject **abbr) ptr++; } str_end = ptr; + if (str_end == str_start) { + return -1; + } ptr++; } else {