Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 49 additions & 29 deletions Lib/test/test_zipfile/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -4114,6 +4114,16 @@ def test_read_zipfile_error(self):
with self.assertRaises(zipfile.BadZipfile):
zipfile.ZipFile(TESTFN, "r").close()

def test_read_zipfile_with_corrupted_extra_field(self):
with zipfile.ZipFile(TESTFN, mode='w') as zf:
zinfo = zipfile.ZipInfo("file.txt")
zinfo.extra = struct.pack('<HH', 1, 1)
zf.writestr(zinfo, b'_')

with self.assertRaisesRegex(zipfile.BadZipFile, 'Corrupt extra field 0001'):
with zipfile.ZipFile(TESTFN) as zf:
pass

def test_read_after_write_unicode_filenames(self):
with zipfile.ZipFile(TESTFN2, 'w') as zipfp:
zipfp.writestr('приклад', b'sample')
Expand Down Expand Up @@ -5840,59 +5850,69 @@ class StripExtraTests(unittest.TestCase):

ZIP64_EXTRA = 1

strip_extra = staticmethod(zipfile._Extra.strip)

def test_no_data(self):
s = struct.Struct("<HH")
a = s.pack(self.ZIP64_EXTRA, 0)
b = s.pack(2, 0)
c = s.pack(3, 0)

self.assertEqual(b'', zipfile._Extra.strip(a, (self.ZIP64_EXTRA,)))
self.assertEqual(b, zipfile._Extra.strip(b, (self.ZIP64_EXTRA,)))
self.assertEqual(
b+b"z", zipfile._Extra.strip(b+b"z", (self.ZIP64_EXTRA,)))
self.assertEqual(b'', self.strip_extra(a, (self.ZIP64_EXTRA,)))
self.assertEqual(b, self.strip_extra(b, (self.ZIP64_EXTRA,)))
self.assertEqual(b+b"z", self.strip_extra(b+b"z", (self.ZIP64_EXTRA,)))

self.assertEqual(b+c, zipfile._Extra.strip(a+b+c, (self.ZIP64_EXTRA,)))
self.assertEqual(b+c, zipfile._Extra.strip(b+a+c, (self.ZIP64_EXTRA,)))
self.assertEqual(b+c, zipfile._Extra.strip(b+c+a, (self.ZIP64_EXTRA,)))
self.assertEqual(b+c, self.strip_extra(a+b+c, (self.ZIP64_EXTRA,)))
self.assertEqual(b+c, self.strip_extra(b+a+c, (self.ZIP64_EXTRA,)))
self.assertEqual(b+c, self.strip_extra(b+c+a, (self.ZIP64_EXTRA,)))

def test_with_data(self):
s = struct.Struct("<HH")
a = s.pack(self.ZIP64_EXTRA, 1) + b"a"
b = s.pack(2, 2) + b"bb"
c = s.pack(3, 3) + b"ccc"

self.assertEqual(b"", zipfile._Extra.strip(a, (self.ZIP64_EXTRA,)))
self.assertEqual(b, zipfile._Extra.strip(b, (self.ZIP64_EXTRA,)))
self.assertEqual(
b+b"z", zipfile._Extra.strip(b+b"z", (self.ZIP64_EXTRA,)))
self.assertEqual(b"", self.strip_extra(a, (self.ZIP64_EXTRA,)))
self.assertEqual(b, self.strip_extra(b, (self.ZIP64_EXTRA,)))
self.assertEqual(b+b"z", self.strip_extra(b+b"z", (self.ZIP64_EXTRA,)))

self.assertEqual(b+c, zipfile._Extra.strip(a+b+c, (self.ZIP64_EXTRA,)))
self.assertEqual(b+c, zipfile._Extra.strip(b+a+c, (self.ZIP64_EXTRA,)))
self.assertEqual(b+c, zipfile._Extra.strip(b+c+a, (self.ZIP64_EXTRA,)))
self.assertEqual(b+c, self.strip_extra(a+b+c, (self.ZIP64_EXTRA,)))
self.assertEqual(b+c, self.strip_extra(b+a+c, (self.ZIP64_EXTRA,)))
self.assertEqual(b+c, self.strip_extra(b+c+a, (self.ZIP64_EXTRA,)))

def test_multiples(self):
s = struct.Struct("<HH")
a = s.pack(self.ZIP64_EXTRA, 1) + b"a"
b = s.pack(2, 2) + b"bb"

self.assertEqual(b"", zipfile._Extra.strip(a+a, (self.ZIP64_EXTRA,)))
self.assertEqual(b"", zipfile._Extra.strip(a+a+a, (self.ZIP64_EXTRA,)))
self.assertEqual(
b"z", zipfile._Extra.strip(a+a+b"z", (self.ZIP64_EXTRA,)))
self.assertEqual(
b+b"z", zipfile._Extra.strip(a+a+b+b"z", (self.ZIP64_EXTRA,)))
self.assertEqual(b"", self.strip_extra(a+a, (self.ZIP64_EXTRA,)))
self.assertEqual(b"", self.strip_extra(a+a+a, (self.ZIP64_EXTRA,)))
self.assertEqual(b"z", self.strip_extra(a+a+b"z", (self.ZIP64_EXTRA,)))
self.assertEqual(b+b"z", self.strip_extra(a+a+b+b"z", (self.ZIP64_EXTRA,)))

self.assertEqual(b, zipfile._Extra.strip(a+a+b, (self.ZIP64_EXTRA,)))
self.assertEqual(b, zipfile._Extra.strip(a+b+a, (self.ZIP64_EXTRA,)))
self.assertEqual(b, zipfile._Extra.strip(b+a+a, (self.ZIP64_EXTRA,)))
self.assertEqual(b, self.strip_extra(a+a+b, (self.ZIP64_EXTRA,)))
self.assertEqual(b, self.strip_extra(a+b+a, (self.ZIP64_EXTRA,)))
self.assertEqual(b, self.strip_extra(b+a+a, (self.ZIP64_EXTRA,)))

def test_truncated_data_for_stripped_id(self):
s = struct.Struct("<HH")
a = s.pack(self.ZIP64_EXTRA, 3) + b"a"
b = s.pack(2, 2) + b"bb"

self.assertEqual(b, self.strip_extra(b+a, (self.ZIP64_EXTRA,)))

def test_truncated_data_for_nonstripped_id(self):
s = struct.Struct("<HH")
a = s.pack(self.ZIP64_EXTRA, 1) + b"a"
b = s.pack(2, 4) + b"bb"

self.assertEqual(b, self.strip_extra(a+b, (self.ZIP64_EXTRA,)))

def test_too_short(self):
self.assertEqual(b"", zipfile._Extra.strip(b"", (self.ZIP64_EXTRA,)))
self.assertEqual(b"z", zipfile._Extra.strip(b"z", (self.ZIP64_EXTRA,)))
self.assertEqual(
b"zz", zipfile._Extra.strip(b"zz", (self.ZIP64_EXTRA,)))
self.assertEqual(
b"zzz", zipfile._Extra.strip(b"zzz", (self.ZIP64_EXTRA,)))
self.assertEqual(b"", self.strip_extra(b"", (self.ZIP64_EXTRA,)))
self.assertEqual(b"z", self.strip_extra(b"z", (self.ZIP64_EXTRA,)))
self.assertEqual(b"zz", self.strip_extra(b"zz", (self.ZIP64_EXTRA,)))
self.assertEqual(b"zzz", self.strip_extra(b"zzz", (self.ZIP64_EXTRA,)))


class StatIO(_pyio.BytesIO):
Expand Down
75 changes: 39 additions & 36 deletions Lib/zipfile/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,41 +195,50 @@ class LargeZipFile(Exception):
_DD_SIGNATURE = 0x08074b50


class _Extra(bytes):
class _Extra:
FIELD_STRUCT = struct.Struct('<HH')

def __new__(cls, val, id=None):
return super().__new__(cls, val)

def __init__(self, val, id=None):
self.id = id

@classmethod
def read_one(cls, raw):
try:
xid, xlen = cls.FIELD_STRUCT.unpack(raw[:4])
except struct.error:
xid = None
xlen = 0
return cls(raw[:4+xlen], xid), raw[4+xlen:]
def iter(cls, data, validate=False):
"""Iter through and yield each (field, id)."""
# early return for empty extra data
if not data:
return

@classmethod
def split(cls, data):
# use memoryview for zero-copy slices
rest = memoryview(data)
while rest:
extra, rest = _Extra.read_one(rest)
yield extra
pos, data_len = 0, len(data)
while pos < data_len:
try:
xid, xlen = cls.FIELD_STRUCT.unpack_from(data, pos)
except struct.error:
xid, xlen = None, 0
else:
if validate and pos + 4 + xlen > data_len:
raise BadZipFile(
"Corrupt extra field %04x (size=%d)" % (xid, xlen))
yield data[pos:pos + 4 + xlen], xid
pos += 4 + xlen

@classmethod
def strip(cls, data, xids):
"""Remove Extra fields with specified IDs."""
return b''.join(
ex
for ex in cls.split(data)
if ex.id not in xids
for ex, xid in cls.iter(data)
if xid not in xids
)

@classmethod
def update(cls, data, extra):
"""Insert fields from extra and strip duplicates."""
extras = {
xid: ex
for ex, xid in cls.iter(extra, True)
if xid is not None
}
# New fields first since data may have a corrupted tail that renders
# following fields inaccessible.
return b''.join(extras.values()) + cls.strip(data, extras)


def _check_zipfile(fp):
try:
Expand Down Expand Up @@ -585,14 +594,10 @@ def _encodeFilenameFlags(self):

def _decodeExtra(self, filename_crc):
# Try to decode the extra field.
extra = self.extra
unpack = struct.unpack
while len(extra) >= 4:
tp, ln = unpack('<HH', extra[:4])
if ln+4 > len(extra):
raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
for extra, tp in _Extra.iter(self.extra, True):
if tp == 0x0001:
data = extra[4:ln+4]
data = extra[4:]
# ZIP64 extension (large files and/or large archives)
try:
if self.file_size in (0xFFFF_FFFF_FFFF_FFFF, 0xFFFF_FFFF):
Expand All @@ -610,7 +615,7 @@ def _decodeExtra(self, filename_crc):
raise BadZipFile(f"Corrupt zip64 extra field. "
f"{field} not found.") from None
elif tp == 0x7075:
data = extra[4:ln+4]
data = extra[4:]
# Unicode Path Extra Field
try:
up_version, up_name_crc = unpack('<BL', data[:5])
Expand All @@ -626,8 +631,6 @@ def _decodeExtra(self, filename_crc):
except UnicodeDecodeError as e:
raise BadZipFile('Corrupt unicode path extra field (0x7075): invalid utf-8 bytes') from e

extra = extra[ln+4:]

@classmethod
def from_file(cls, filename, arcname=None, *, strict_timestamps=True):
"""Construct an appropriate ZipInfo for a file on the filesystem.
Expand Down Expand Up @@ -2665,11 +2668,11 @@ def _write_end_record(self):
extra_data = zinfo.extra
min_version = 0
if extra:
# Append a ZIP64 field to the extra's
extra_data = _Extra.strip(extra_data, (1,))
extra_data = struct.pack(
# Prepend a ZIP64 field to the extra's
extra_data = _Extra.update(extra_data, struct.pack(
'<HH' + 'Q'*len(extra),
1, 8*len(extra), *extra) + extra_data
1, 8*len(extra), *extra,
))

min_version = ZIP64_VERSION

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Rework :class:`!_Extra` in the :mod:`zipfile` module::

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_Extra is an internal class, so not relevant for users. These details can be removed from the news entry.

For just the refactoring we can skip the news entry. For the performance improvement, I would like to see a benchmark of functionality from the public api (e.g. not a benchmark of _Extra.strip, but of opening or unzipping a file)

* Improve performance for the mostly used :meth:`!_Extra.strip`.
* Replace :meth:`!_Extra.split` with :meth:`!_Extra.iter` for better semantic and performance.
* Introduce :meth:`!_Extra.update` for easier updating of subfields.
* Make it a namespace class rather than a subclass of :class:`bytes` for better performance.
Loading