@@ -187,6 +187,10 @@ def read_from(cls, fo: IO[bytes]) -> Tuple["LocalFileHeader", bytes]:
187
187
int .from_bytes (data [n : n + 8 ], "little" )
188
188
for n in range (0 , len (data ), 8 )
189
189
]
190
+ # If a non-zip64-aware compressor produced this with a file
191
+ # whose uncompressed length was exactly UINT32_MAX, we
192
+ # don't go down this code path because it won't include the
193
+ # extra.
190
194
if inst .usize == UINT32_MAX :
191
195
inst .usize = sizes .pop (0 )
192
196
if inst .csize == UINT32_MAX :
@@ -223,6 +227,7 @@ def dump(self) -> Tuple[bytes, int]:
223
227
fn = self .filename .encode ("utf-8" )
224
228
flags |= FLAG_FILENAME_UTF8
225
229
230
+ # This modifies the extra of the original, but is idempotent.
226
231
usize = self .usize
227
232
csize = self .csize
228
233
min_ver = self .version_needed
@@ -311,6 +316,14 @@ def from_lfh_and_relative_offset(
311
316
filename = lfh .filename , # TODO ordering
312
317
)
313
318
319
+ def replace_extra (self , num : int , value : bytes ) -> None :
320
+ n : List [Tuple [int , bytes ]] = []
321
+ for i , v in self .parsed_extra :
322
+ if i != num :
323
+ n .append ((i , v ))
324
+ n .append ((num , value ))
325
+ self .parsed_extra = n
326
+
314
327
# TODO not happy with the name
315
328
def dump (self ) -> bytes :
316
329
flags = self .flags
@@ -322,36 +335,136 @@ def dump(self) -> bytes:
322
335
except UnicodeEncodeError :
323
336
fn = self .filename .encode ("utf-8" )
324
337
flags |= FLAG_FILENAME_UTF8
325
- # TODO dump these too, they're important
326
- extra = b""
327
- comment = b""
338
+
339
+ # This modifies the extra of the original, but is idempotent.
340
+ usize = self .usize
341
+ csize = self .csize
342
+ relative_offset_of_lfh = self .relative_offset_of_lfh
343
+ min_ver = self .version_needed
344
+ if (
345
+ self .usize >= UINT32_MAX
346
+ or self .csize >= UINT32_MAX
347
+ or self .relative_offset_of_lfh >= UINT32_MAX
348
+ ):
349
+ zip64_extra = struct .pack (
350
+ "<QQQ" , self .usize , self .csize , self .relative_offset_of_lfh
351
+ )
352
+ usize = UINT32_MAX
353
+ csize = UINT32_MAX
354
+ relative_offset_of_lfh = UINT32_MAX
355
+ self .replace_extra (1 , zip64_extra )
356
+ min_ver = max (self .version_needed , ZIP64_VERSION )
357
+ extra = b"" .join (
358
+ struct .pack ("<HH" , i [0 ], len (i [1 ])) + i [1 ] for i in self .parsed_extra
359
+ )
360
+ extra_length = len (extra )
361
+
362
+ comment = (self .file_comment or "" ).encode ("utf-8" )
363
+ comment_length = len (comment )
364
+
328
365
return (
329
366
struct .pack (
330
367
CENTRAL_DIRECTORY_FORMAT ,
331
368
self .signature ,
332
369
self .version_made_by ,
333
- self . version_needed ,
370
+ min_ver ,
334
371
flags ,
335
372
self .method ,
336
373
self .mtime ,
337
374
self .mdate ,
338
375
self .crc32 ,
339
- self . csize ,
340
- self . usize ,
376
+ csize ,
377
+ usize ,
341
378
# TODO always recalculates filename length, I guess?
342
379
len (fn ),
343
- 0 , # TODO extra_length
344
- 0 , # TODO comment_length
380
+ extra_length ,
381
+ comment_length ,
345
382
self .disk_start ,
346
383
self .internal_attributes ,
347
384
self .external_attributes ,
348
- self . relative_offset_of_lfh ,
385
+ relative_offset_of_lfh ,
349
386
)
350
387
+ fn
351
388
+ extra
352
389
+ comment
353
390
)
354
391
392
+ @classmethod
393
+ def read_from (cls , fo : IO [bytes ]) -> Tuple ["CentralDirectoryHeader" , bytes ]:
394
+ """
395
+ This isn't currently necessary or part of the public api when streaming.
396
+
397
+ Only used for testing...
398
+ """
399
+ buf = _readn (fo , struct .calcsize (CENTRAL_DIRECTORY_FORMAT ))
400
+ args = struct .unpack (CENTRAL_DIRECTORY_FORMAT , buf )
401
+ inst = cls (* args )
402
+
403
+ if inst .signature != CENTRAL_DIRECTORY_SIGNATURE :
404
+ raise ValueError ("Invalid signature %0x" % (inst .signature ,))
405
+
406
+ filename_data = _readn (fo , inst .filename_length )
407
+ buf += filename_data
408
+
409
+ if inst .flags & FLAG_FILENAME_UTF8 :
410
+ inst .filename = filename_data .decode ("utf-8" ) # can raise
411
+ else :
412
+ inst .filename = filename_data .decode ("cp437" )
413
+
414
+ if inst .flags & FLAG_DATA_DESCRIPTOR :
415
+ # I am not a fan of the complexity and additional validation
416
+ # required to support this flag; although Python's zipfile.py can
417
+ # generate such files, I don't see the usefulness and would like to
418
+ # guarantee that files output by this library will not contain them.
419
+ raise NotImplementedError ("Data descriptor" )
420
+
421
+ if inst .extra_length :
422
+ extra : List [Tuple [int , bytes ]] = []
423
+ extra_data = _readn (fo , inst .extra_length )
424
+ # print(" ".join("%02x" % c for c in extra_data))
425
+
426
+ i = 0
427
+ # The len() - 4 is to avoid `_slicen` needing to raise an exception
428
+ # if there are 1-3 bytes left. We raise that exception ourselves
429
+ # directly below the loop to make it more clear that it's leftover
430
+ # data at the _end_ rather than one that is completely malformed.
431
+ while i < len (extra_data ) - 4 :
432
+ extra_id , data_size = struct .unpack (
433
+ "<HH" ,
434
+ _slicen (extra_data , i , 4 ),
435
+ )
436
+ # print("Extra", i, extra_id, data_size)
437
+ i += 4
438
+ data = _slicen (extra_data , i , data_size )
439
+ i += data_size
440
+ extra .append ((extra_id , data ))
441
+
442
+ if extra_id == 1 : # zip64 entry
443
+ sizes = [
444
+ int .from_bytes (data [n : n + 8 ], "little" )
445
+ for n in range (0 , len (data ), 8 )
446
+ ]
447
+ # If a non-zip64-aware compressor produced this with a file
448
+ # whose uncompressed length was exactly UINT32_MAX, we
449
+ # don't go down this code path because it won't include the
450
+ # extra.
451
+ if inst .usize == UINT32_MAX :
452
+ inst .usize = sizes .pop (0 )
453
+ if inst .csize == UINT32_MAX :
454
+ inst .csize = sizes .pop (0 )
455
+ if inst .relative_offset_of_lfh == UINT32_MAX :
456
+ inst .relative_offset_of_lfh = sizes .pop (0 )
457
+ # Can we be strict here?
458
+ # if len(sizes) != 0:
459
+ # raise ValueError("Extra zip64 extra in CDH")
460
+ if i != len (extra_data ):
461
+ raise ValueError ("Extra length" )
462
+ inst .parsed_extra = tuple (extra )
463
+ buf += extra_data
464
+
465
+ return inst , buf
466
+ return inst
467
+
355
468
356
469
ZIP64_EOCD_FORMAT = "<LQHHLLQQQQ"
357
470
ZIP64_EOCD_SIGNATURE = 0x06064B50
0 commit comments