Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 21 additions & 2 deletions Lib/test/test_zoneinfo/test_zoneinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -1009,14 +1009,14 @@ def populate_tzstr_header(cls):

cls._tzif_header = bytes(out)

def zone_from_tzstr(self, tzstr):
def zone_from_tzstr(self, tzstr, encoding="ascii"):
"""Creates a zoneinfo file following a POSIX rule."""
zonefile = io.BytesIO(self._tzif_header)
zonefile.seek(0, 2)

# Write the footer
zonefile.write(b"\x0A")
zonefile.write(tzstr.encode("ascii"))
zonefile.write(tzstr.encode(encoding))
zonefile.write(b"\x0A")

zonefile.seek(0)
Expand Down Expand Up @@ -1145,6 +1145,13 @@ def test_invalid_tzstr(self):
"+11", # Unquoted alphanumeric
"GMT,M3.2.0/2,M11.1.0/3", # Transition rule but no DST
"GMT0+11,M3.2.0/2,M11.1.0/3", # Unquoted alphanumeric in DST
# Unquoted abbreviation with embedded or leading whitespace
"AB C3",
" A B 3",
"AAA4BB B,J60/2,J300/2", # Embedded whitespace in DST
# Empty quoted abbreviation
"<>5",
"AAA4<>,M3.2.0/2,M11.1.0/3",
"PST8PDT,M3.2.0/2", # Only one transition rule
# Invalid offset hours
"AAA168",
Expand Down Expand Up @@ -1222,6 +1229,18 @@ def test_invalid_tzstr(self):
with self.assertRaisesRegex(ValueError, tzstr_regex):
self.zone_from_tzstr(invalid_tzstr)

def test_invalid_tzstr_non_ascii_abbr(self):
# A non-ASCII letter reaches the parser via from_file()'s UTF-8 decode.
# It needs a separate test: it can't be ASCII-encoded for the shared
# invalid_tzstrs list, and the C error message holds the bytes repr.
tzstr = "ABÀC3"
if self.module is py_zoneinfo:
expected = re.escape(tzstr)
else:
expected = re.escape(repr(tzstr.encode("utf-8")))
with self.assertRaisesRegex(ValueError, expected):
self.zone_from_tzstr(tzstr, encoding="utf-8")

@classmethod
def _populate_test_cases(cls):
# This method uses a somewhat unusual style in that it populates the
Expand Down
4 changes: 2 additions & 2 deletions Lib/zoneinfo/_zoneinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -640,11 +640,11 @@ def _parse_tz_str(tz_str):

parser_re = re.compile(
r"""
(?P<std>[^<0-9:.+-]+|<[a-zA-Z0-9+-]+>)
(?P<std>[a-zA-Z]+|<[a-zA-Z0-9+-]+>)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And I see another divergence, C accepts an empty <>. :'-(

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch. The direction is the reverse of this PR though: here C is the lenient side. Its parse_abbr quoted branch has no empty check, while its own unquoted branch rejects an empty run (if (str_end == str_start) return -1;), so the pure parser is correct. Want me to fold a small C fix in here, or open a separate issue?

@StanFromIreland StanFromIreland Jun 26, 2026

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add it here, it's in the scope of POSIX TZ strings. This is actually spelled out by recent versions of the standard:

the quoting characters do not contribute to the three byte minimum length and {TZNAME_MAX} maximum length.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. The C parser now rejects an empty <>, mirroring its unquoted branch.

(?:
(?P<stdoff>[+-]?\d{1,3}(?::\d{2}(?::\d{2})?)?)
(?:
(?P<dst>[^0-9:.+-]+|<[a-zA-Z0-9+-]+>)
(?P<dst>[a-zA-Z]+|<[a-zA-Z0-9+-]+>)
(?P<dstoff>[+-]?\d{1,3}(?::\d{2}(?::\d{2})?)?)?
)? # dst
)? # stdoff
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Make the C and pure-Python :mod:`zoneinfo` parsers validate POSIX TZ
abbreviations consistently, rejecting unquoted abbreviations with non-letter
characters and empty quoted abbreviations (``<>``). Patch by tonghuaroot.
3 changes: 3 additions & 0 deletions Modules/_zoneinfo.c
Original file line number Diff line number Diff line change
Expand Up @@ -1762,6 +1762,9 @@ parse_abbr(const char **p, PyObject **abbr)
ptr++;
}
str_end = ptr;
if (str_end == str_start) {
return -1;
}
ptr++;
}
else {
Expand Down
Loading