Skip to content

Commit 0ef2f85

Browse files
committed
Rewrite the zip tree building algorithm
- Short version: now it handles the case when the ``filelist`` by ``ZipFile`` are not in order. - Long version: - Outer loop: iterate through each file in the list and break the full path into segments - Inner loop: iterate through each segment, add a new node if not found in the child nodes of its parent and update the node with the file's details if the segment is the last one. The full tree is complete at the end of one pass.
1 parent c4c3ff3 commit 0ef2f85

File tree

1 file changed

+116
-74
lines changed

1 file changed

+116
-74
lines changed

mfr/extensions/zip/render.py

Lines changed: 116 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import os
2-
from typing import List
3-
from zipfile import ZipFile
2+
from typing import List, Union
3+
from zipfile import ZipFile, ZipInfo
44

55
from mako.lookup import TemplateLookup
66

@@ -27,82 +27,108 @@ def render(self):
2727

2828
zip_file = ZipFile(self.file_path, 'r')
2929

30-
file_list = self.sanitize_file_list(zip_file.filelist)
31-
file_tree = self.file_list_to_tree(file_list)
30+
# ``ZipFile.filelist`` contains both files and folder. Using ``obj`` for better clarity.
31+
obj_list = self.sanitize_obj_list(zip_file.filelist)
32+
obj_tree = self.obj_list_to_tree(obj_list)
3233

33-
return self.TEMPLATE.render(data=file_tree, base=self.assets_url)
34+
return self.TEMPLATE.render(data=obj_tree, base=self.assets_url)
3435

35-
def file_list_to_tree(self, file_list: list) -> List[dict]:
36-
"""Build the file tree and return a "tree".
36+
def obj_list_to_tree(self, obj_list: list) -> List[dict]:
37+
"""Build the object tree from the object list. Each node is represented using a dictionary,
38+
where non-leaf nodes represent folders and leaves represent files. Return a list which
39+
contains only one element: the root node.
3740
38-
TODO: Fix this algorithm
39-
This algorithm only works when the ``file_list`` are in strict alphabetical order. Here is
40-
an example file A.zip where list 1 fails while list 2 succeed.
41-
42-
A.zip
43-
--- A/
44-
--- A/aa.png
45-
--- B/ab.png
46-
47-
File list 1: [ A/, A/B/, A/A/, A/A/aa.png, A/B/ab.png, ]
48-
49-
File list 2: [ A/, A/A/, A/A/aa.png, A/B/, A/B/ab.png, ]
50-
51-
:param file_list: the sanitized file list
41+
:param obj_list: the object list
5242
:rtype: ``List[dict]``
53-
:return: a "tree" in form of a list which contains one dictionary as the root node
43+
:return: a list which contains only one element: the root node.
5444
"""
5545

56-
icons_url = self.assets_url + '/img'
57-
58-
# Build the root of the file tree
59-
tree_root = [{
46+
# Build the root node of the tree
47+
tree_root = {
6048
'text': self.metadata.name + self.metadata.ext,
61-
'icon': icons_url + '/file-ext-zip.png',
49+
'icon': self.assets_url + '/img/file-ext-zip.png',
6250
'children': []
63-
}]
64-
65-
# Iteratively build the file tree for each file and folder.egments.
66-
for file in file_list:
67-
68-
node_path = tree_root[0]
69-
70-
# Split the full path into segments, add each path segment to the tree if the segment
71-
# doesn't already exist. The segments can be either a folder or a file.
72-
paths = [path for path in file.filename.split('/') if path]
73-
for path in paths:
74-
75-
# Add a child to the node
76-
if not len(node_path['children']) or node_path['children'][-1]['text'] != path:
77-
78-
new_node = {'text': path, 'children': []}
79-
80-
date = '%d-%02d-%02d %02d:%02d:%02d' % file.date_time[:6]
81-
size = sizeof_fmt(int(file.file_size)) if file.file_size else ''
82-
new_node['data'] = {'date': date, 'size': size}
51+
}
52+
53+
for obj in obj_list:
54+
55+
# For each object, always start from the root of the tree
56+
parent = tree_root
57+
path_from_root = obj.filename
58+
is_folder = path_from_root[-1] == '/'
59+
path_segments = [segment for segment in path_from_root.split('/') if segment]
60+
last_index = len(path_segments) - 1
61+
62+
# Iterate through the path segments list. Add the segment to tree if not already there
63+
# and update the details with the current object if it is the last one along the path.
64+
for index, segment in enumerate(path_segments):
65+
66+
# Check if the segment has already been added
67+
siblings = parent.get('children', [])
68+
current_node = self.find_node_among_siblings(segment, siblings)
69+
70+
# Found
71+
if current_node:
72+
if index == last_index:
73+
# If it is the last segment, this node must be a folder and represents the
74+
# current object. Update it with the objects' info and break.
75+
assert is_folder
76+
self.update_node_with_attributes(current_node, obj, is_folder=is_folder)
77+
break
78+
# Otherwise, jump to the next segment with the current node as the new parent
79+
parent = current_node
80+
continue
81+
82+
# Not found
83+
new_node = {
84+
'text': segment,
85+
'children': [],
86+
}
87+
if index == last_index:
88+
# If it is the last segment, the node represents the current object. Update the
89+
# it with the objects' info, add it to the siblings and break.
90+
self.update_node_with_attributes(new_node, obj, is_folder=is_folder)
91+
siblings.append(new_node)
92+
break
93+
94+
# Otherwise, append the new node to tree, jump to the next segment with the current
95+
# node as the new parent
96+
siblings.append(new_node)
97+
parent = new_node
98+
continue
8399

84-
if file.filename[-1] == '/':
85-
new_node['icon'] = icons_url + '/folder.png'
86-
else:
87-
ext = os.path.splitext(file.filename)[1].lstrip('.')
88-
if ext:
89-
ext = ext.lower()
90-
if self.icon_exists_for_type(ext):
91-
new_node['icon'] = '{}/file-ext-{}.png'.format(icons_url, ext)
92-
else:
93-
new_node['icon'] = '{}/file-ext-generic.png'.format(icons_url)
100+
return [tree_root, ]
94101

95-
node_path['children'].append(new_node)
102+
def update_node_with_attributes(self, node: dict, obj: ZipInfo, is_folder: bool) -> None:
103+
"""Update details (date, size, icon, etc.) of the node with the given object.
96104
97-
node_path = new_node
98-
# Go one level deeper
99-
else:
100-
node_path = node_path['children'][-1]
105+
:param node: the node to update
106+
:param obj: the object that the node represents
107+
:param is_folder: the folder flag
108+
"""
101109

102-
return tree_root
110+
date = '%d-%02d-%02d %02d:%02d:%02d' % obj.date_time[:6]
111+
size = sizeof_fmt(int(obj.file_size)) if obj.file_size else ''
112+
113+
if is_folder:
114+
icon_path = self.assets_url + '/img/folder.png'
115+
else:
116+
ext = (os.path.splitext(obj.filename)[1].lstrip('.')).lower()
117+
if self.icon_exists(ext):
118+
icon_path = '{}/img/file-ext-{}.png'.format(self.assets_url, ext)
119+
else:
120+
icon_path = '{}/img/file-ext-generic.png'.format(self.assets_url)
121+
122+
node.update({
123+
'icon': icon_path,
124+
'data': {
125+
'date': date,
126+
'size': size,
127+
},
128+
})
103129

104130
@staticmethod
105-
def icon_exists_for_type(ext: str) -> bool:
131+
def icon_exists(ext: str) -> bool:
106132
"""Check if an icon exists for the given file type. The extension string is converted to
107133
lower case.
108134
@@ -119,28 +145,44 @@ def icon_exists_for_type(ext: str) -> bool:
119145
))
120146

121147
@staticmethod
122-
def sanitize_file_list(file_list: list) -> list:
148+
def sanitize_obj_list(obj_list: list) -> list:
123149
"""Remove macOS system and temporary files. Current implementation only removes '__MACOSX/'
124150
and '.DS_Store'. If necessary, extend the sanitizer to exclude more file types.
125151
126-
:param file_list: the list of the path for each file and folder in the zip
152+
:param obj_list: a list of full paths for each file and folder in the zip
127153
:rtype: ``list``
128154
:return: a sanitized list
129155
"""
130156

131-
sanitized_file_list = []
157+
sanitized_obj_list = []
132158

133-
for file in file_list:
159+
for obj in obj_list:
134160

135-
file_path = file.filename
161+
obj_path = obj.filename
136162
# Ignore macOS '__MACOSX' folder for zip file
137-
if file_path.startswith('__MACOSX/'):
163+
if obj_path.startswith('__MACOSX/'):
138164
continue
139-
140165
# Ignore macOS '.DS_STORE' file
141-
if file_path == '.DS_Store' or file_path.endswith('/.DS_Store'):
166+
if obj_path == '.DS_Store' or obj_path.endswith('/.DS_Store'):
142167
continue
143168

144-
sanitized_file_list.append(file)
169+
sanitized_obj_list.append(obj)
170+
171+
return sanitized_obj_list
172+
173+
@staticmethod
174+
def find_node_among_siblings(segment: str, siblings: list) -> Union[dict, None]:
175+
"""Find if the folder or file represented by the path segment has already been added.
176+
177+
:param segment: the path segment
178+
:param siblings: the list containing all added sibling nodes
179+
:rtype: ``Union[dict, None]``
180+
:return: the node if found or ``None`` otherwise
181+
"""
182+
183+
for sibling in siblings:
184+
185+
if sibling.get('text', '') == segment:
186+
return sibling
145187

146-
return sanitized_file_list
188+
return None

0 commit comments

Comments
 (0)