Skip to content

Commit 3d91f5a

Browse files
committed
LocalFileHeader should be able to write zip64 extras
Fixes #5
1 parent f6b69e2 commit 3d91f5a

File tree

3 files changed

+143
-10
lines changed

3 files changed

+143
-10
lines changed

fastzip/tests/types.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from dataclasses import asdict
33
from io import BytesIO
44

5-
from fastzip.types import LocalFileHeader
5+
from fastzip.types import CentralDirectoryHeader, LocalFileHeader
66

77

88
class LocalFileHeaderTest(unittest.TestCase):
@@ -24,3 +24,13 @@ def test_zip64(self) -> None:
2424
self.assertEqual(8_000_000_000, h2.usize)
2525
self.assertEqual(20, h.version_needed)
2626
self.assertEqual(45, h2.version_needed)
27+
28+
cdh = CentralDirectoryHeader.from_lfh_and_relative_offset(h2, 0)
29+
30+
self.assertEqual(8_000_000_000, cdh.usize)
31+
self.assertEqual(45, cdh.version_needed)
32+
33+
data = cdh.dump()
34+
cdh2, buf = CentralDirectoryHeader.read_from(BytesIO(data))
35+
self.assertEqual(8_000_000_000, cdh2.usize)
36+
self.assertEqual(45, cdh2.version_needed)

fastzip/tests/write.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,3 +84,13 @@ def test_zip64_files(self) -> None:
8484
zf = zipfile.ZipFile(b)
8585
# TODO interrogate the zf to make sure it _was_ zip64
8686
self.assertEqual(20, len(zf.namelist()))
87+
88+
def _disabled_test_automatic_zip64(self) -> None:
89+
b = io.BytesIO()
90+
with WZip(Path("foo.zip"), fobj=b) as z:
91+
p = Path("big.bin")
92+
z.write(p, p, fobj=io.BytesIO(bytearray(2**32)))
93+
94+
zf = zipfile.ZipFile(b)
95+
info = zf.getinfo("big.bin")
96+
self.assertEqual(2**32, info.file_size)

fastzip/types.py

Lines changed: 122 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,10 @@ def read_from(cls, fo: IO[bytes]) -> Tuple["LocalFileHeader", bytes]:
187187
int.from_bytes(data[n : n + 8], "little")
188188
for n in range(0, len(data), 8)
189189
]
190+
# If a non-zip64-aware compressor produced this with a file
191+
# whose uncompressed length was exactly UINT32_MAX, we
192+
# don't go down this code path because it won't include the
193+
# extra.
190194
if inst.usize == UINT32_MAX:
191195
inst.usize = sizes.pop(0)
192196
if inst.csize == UINT32_MAX:
@@ -223,6 +227,7 @@ def dump(self) -> Tuple[bytes, int]:
223227
fn = self.filename.encode("utf-8")
224228
flags |= FLAG_FILENAME_UTF8
225229

230+
# This modifies the extra of the original, but is idempotent.
226231
usize = self.usize
227232
csize = self.csize
228233
min_ver = self.version_needed
@@ -311,6 +316,14 @@ def from_lfh_and_relative_offset(
311316
filename=lfh.filename, # TODO ordering
312317
)
313318

319+
def replace_extra(self, num: int, value: bytes) -> None:
320+
n: List[Tuple[int, bytes]] = []
321+
for i, v in self.parsed_extra:
322+
if i != num:
323+
n.append((i, v))
324+
n.append((num, value))
325+
self.parsed_extra = n
326+
314327
# TODO not happy with the name
315328
def dump(self) -> bytes:
316329
flags = self.flags
@@ -322,36 +335,136 @@ def dump(self) -> bytes:
322335
except UnicodeEncodeError:
323336
fn = self.filename.encode("utf-8")
324337
flags |= FLAG_FILENAME_UTF8
325-
# TODO dump these too, they're important
326-
extra = b""
327-
comment = b""
338+
339+
# This modifies the extra of the original, but is idempotent.
340+
usize = self.usize
341+
csize = self.csize
342+
relative_offset_of_lfh = self.relative_offset_of_lfh
343+
min_ver = self.version_needed
344+
if (
345+
self.usize >= UINT32_MAX
346+
or self.csize >= UINT32_MAX
347+
or self.relative_offset_of_lfh >= UINT32_MAX
348+
):
349+
zip64_extra = struct.pack(
350+
"<QQQ", self.usize, self.csize, self.relative_offset_of_lfh
351+
)
352+
usize = UINT32_MAX
353+
csize = UINT32_MAX
354+
relative_offset_of_lfh = UINT32_MAX
355+
self.replace_extra(1, zip64_extra)
356+
min_ver = max(self.version_needed, ZIP64_VERSION)
357+
extra = b"".join(
358+
struct.pack("<HH", i[0], len(i[1])) + i[1] for i in self.parsed_extra
359+
)
360+
extra_length = len(extra)
361+
362+
comment = (self.file_comment or "").encode("utf-8")
363+
comment_length = len(comment)
364+
328365
return (
329366
struct.pack(
330367
CENTRAL_DIRECTORY_FORMAT,
331368
self.signature,
332369
self.version_made_by,
333-
self.version_needed,
370+
min_ver,
334371
flags,
335372
self.method,
336373
self.mtime,
337374
self.mdate,
338375
self.crc32,
339-
self.csize,
340-
self.usize,
376+
csize,
377+
usize,
341378
# TODO always recalculates filename length, I guess?
342379
len(fn),
343-
0, # TODO extra_length
344-
0, # TODO comment_length
380+
extra_length,
381+
comment_length,
345382
self.disk_start,
346383
self.internal_attributes,
347384
self.external_attributes,
348-
self.relative_offset_of_lfh,
385+
relative_offset_of_lfh,
349386
)
350387
+ fn
351388
+ extra
352389
+ comment
353390
)
354391

392+
@classmethod
393+
def read_from(cls, fo: IO[bytes]) -> Tuple["CentralDirectoryHeader", bytes]:
394+
"""
395+
This isn't currently necessary or part of the public api when streaming.
396+
397+
Only used for testing...
398+
"""
399+
buf = _readn(fo, struct.calcsize(CENTRAL_DIRECTORY_FORMAT))
400+
args = struct.unpack(CENTRAL_DIRECTORY_FORMAT, buf)
401+
inst = cls(*args)
402+
403+
if inst.signature != CENTRAL_DIRECTORY_SIGNATURE:
404+
raise ValueError("Invalid signature %0x" % (inst.signature,))
405+
406+
filename_data = _readn(fo, inst.filename_length)
407+
buf += filename_data
408+
409+
if inst.flags & FLAG_FILENAME_UTF8:
410+
inst.filename = filename_data.decode("utf-8") # can raise
411+
else:
412+
inst.filename = filename_data.decode("cp437")
413+
414+
if inst.flags & FLAG_DATA_DESCRIPTOR:
415+
# I am not a fan of the complexity and additional validation
416+
# required to support this flag; although Python's zipfile.py can
417+
# generate such files, I don't see the usefulness and would like to
418+
# guarantee that files output by this library will not contain them.
419+
raise NotImplementedError("Data descriptor")
420+
421+
if inst.extra_length:
422+
extra: List[Tuple[int, bytes]] = []
423+
extra_data = _readn(fo, inst.extra_length)
424+
# print(" ".join("%02x" % c for c in extra_data))
425+
426+
i = 0
427+
# The len() - 4 is to avoid `_slicen` needing to raise an exception
428+
# if there are 1-3 bytes left. We raise that exception ourselves
429+
# directly below the loop to make it more clear that it's leftover
430+
# data at the _end_ rather than one that is completely malformed.
431+
while i < len(extra_data) - 4:
432+
extra_id, data_size = struct.unpack(
433+
"<HH",
434+
_slicen(extra_data, i, 4),
435+
)
436+
# print("Extra", i, extra_id, data_size)
437+
i += 4
438+
data = _slicen(extra_data, i, data_size)
439+
i += data_size
440+
extra.append((extra_id, data))
441+
442+
if extra_id == 1: # zip64 entry
443+
sizes = [
444+
int.from_bytes(data[n : n + 8], "little")
445+
for n in range(0, len(data), 8)
446+
]
447+
# If a non-zip64-aware compressor produced this with a file
448+
# whose uncompressed length was exactly UINT32_MAX, we
449+
# don't go down this code path because it won't include the
450+
# extra.
451+
if inst.usize == UINT32_MAX:
452+
inst.usize = sizes.pop(0)
453+
if inst.csize == UINT32_MAX:
454+
inst.csize = sizes.pop(0)
455+
if inst.relative_offset_of_lfh == UINT32_MAX:
456+
inst.relative_offset_of_lfh = sizes.pop(0)
457+
# Can we be strict here?
458+
# if len(sizes) != 0:
459+
# raise ValueError("Extra zip64 extra in CDH")
460+
if i != len(extra_data):
461+
raise ValueError("Extra length")
462+
inst.parsed_extra = tuple(extra)
463+
buf += extra_data
464+
465+
return inst, buf
466+
return inst
467+
355468

356469
ZIP64_EOCD_FORMAT = "<LQHHLLQQQQ"
357470
ZIP64_EOCD_SIGNATURE = 0x06064B50

0 commit comments

Comments
 (0)