@@ -3490,11 +3490,12 @@ class ArchiveMaker:
3490
3490
with t.open() as tar:
3491
3491
... # `tar` is now a TarFile with 'filename' in it!
3492
3492
"""
3493
- def __init__ (self ):
3493
+ def __init__ (self , ** kwargs ):
3494
3494
self .bio = io .BytesIO ()
3495
+ self .tar_kwargs = dict (kwargs )
3495
3496
3496
3497
def __enter__ (self ):
3497
- self .tar_w = tarfile .TarFile (mode = 'w' , fileobj = self .bio )
3498
+ self .tar_w = tarfile .TarFile (mode = 'w' , fileobj = self .bio , ** self . tar_kwargs )
3498
3499
return self
3499
3500
3500
3501
def __exit__ (self , * exc ):
@@ -4073,7 +4074,10 @@ def test_tar_filter(self):
4073
4074
# that in the test archive.)
4074
4075
with tarfile .TarFile .open (tarname ) as tar :
4075
4076
for tarinfo in tar .getmembers ():
4076
- filtered = tarfile .tar_filter (tarinfo , '' )
4077
+ try :
4078
+ filtered = tarfile .tar_filter (tarinfo , '' )
4079
+ except UnicodeEncodeError :
4080
+ continue
4077
4081
self .assertIs (filtered .name , tarinfo .name )
4078
4082
self .assertIs (filtered .type , tarinfo .type )
4079
4083
@@ -4084,11 +4088,48 @@ def test_data_filter(self):
4084
4088
for tarinfo in tar .getmembers ():
4085
4089
try :
4086
4090
filtered = tarfile .data_filter (tarinfo , '' )
4087
- except tarfile .FilterError :
4091
+ except ( tarfile .FilterError , UnicodeEncodeError ) :
4088
4092
continue
4089
4093
self .assertIs (filtered .name , tarinfo .name )
4090
4094
self .assertIs (filtered .type , tarinfo .type )
4091
4095
4096
+ @unittest .skipIf (sys .platform == 'win32' , 'requires native bytes paths' )
4097
+ def test_filter_unencodable (self ):
4098
+ # Sanity check using a valid path.
4099
+ tarinfo = tarfile .TarInfo (os_helper .TESTFN )
4100
+ filtered = tarfile .tar_filter (tarinfo , '' )
4101
+ self .assertIs (filtered .name , tarinfo .name )
4102
+ filtered = tarfile .data_filter (tarinfo , '' )
4103
+ self .assertIs (filtered .name , tarinfo .name )
4104
+
4105
+ tarinfo = tarfile .TarInfo ('test\x00 ' )
4106
+ self .assertRaises (ValueError , tarfile .tar_filter , tarinfo , '' )
4107
+ self .assertRaises (ValueError , tarfile .data_filter , tarinfo , '' )
4108
+ tarinfo = tarfile .TarInfo ('\ud800 ' )
4109
+ self .assertRaises (UnicodeEncodeError , tarfile .tar_filter , tarinfo , '' )
4110
+ self .assertRaises (UnicodeEncodeError , tarfile .data_filter , tarinfo , '' )
4111
+
4112
+ @unittest .skipIf (sys .platform == 'win32' , 'requires native bytes paths' )
4113
+ def test_extract_unencodable (self ):
4114
+ # Create a member with name \xed\xa0\x80 which is UTF-8 encoded
4115
+ # lone surrogate \ud800.
4116
+ with ArchiveMaker (encoding = 'ascii' , errors = 'surrogateescape' ) as arc :
4117
+ arc .add ('\udced \udca0 \udc80 ' )
4118
+ with os_helper .temp_cwd () as tmp :
4119
+ tar = arc .open (encoding = 'utf-8' , errors = 'surrogatepass' ,
4120
+ errorlevel = 1 )
4121
+ self .assertEqual (tar .getnames (), ['\ud800 ' ])
4122
+ with self .assertRaises (UnicodeEncodeError ):
4123
+ tar .extractall ()
4124
+ self .assertEqual (os .listdir (), [])
4125
+
4126
+ tar = arc .open (encoding = 'utf-8' , errors = 'surrogatepass' ,
4127
+ errorlevel = 0 , debug = 1 )
4128
+ with support .captured_stderr () as stderr :
4129
+ tar .extractall ()
4130
+ self .assertEqual (os .listdir (), [])
4131
+ self .assertIn ('tarfile: UnicodeEncodeError ' , stderr .getvalue ())
4132
+
4092
4133
def test_change_default_filter_on_instance (self ):
4093
4134
tar = tarfile .TarFile (tarname , 'r' )
4094
4135
def strict_filter (tarinfo , path ):
0 commit comments