18
18
"""
19
19
Common library to export either CSV or XLS.
20
20
"""
21
- import gc
22
21
import re
23
22
import logging
24
23
import numbers
27
26
28
27
import six
29
28
import tablib
30
- import openpyxl
29
+ import xlsxwriter
31
30
from django .http import HttpResponse , StreamingHttpResponse
32
31
from django .utils .encoding import smart_str
33
32
36
35
LOG = logging .getLogger ()
37
36
38
37
DOWNLOAD_CHUNK_SIZE = 1 * 1024 * 1024 # 1MB
39
- ILLEGAL_CHARS = r'[\000-\010]|[\013-\014]|[\016-\037]'
38
+
39
+ ILLEGAL_CHARS_RE = re .compile (r'[\000-\010]|[\013-\014]|[\016-\037]' )
40
+ HYPERLINK_RE = re .compile (r'^(https?://.+)' , re .IGNORECASE )
41
+
40
42
FORMAT_TO_CONTENT_TYPE = {
41
43
'csv' : 'application/csv' ,
42
44
'xls' : 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' ,
43
45
'json' : 'application/json'
44
46
}
45
47
46
-
47
48
def nullify (cell ):
48
- return cell if cell is not None else "NULL"
49
-
49
+ return cell if cell is not None else "NULL"
50
50
51
51
def file_reader (fh ):
52
- """Generator that reads a file, chunk-by-chunk."""
53
- while True :
54
- chunk = fh .read (DOWNLOAD_CHUNK_SIZE )
55
- if not chunk :
56
- fh .close ()
57
- break
58
- yield chunk
52
+ """Generator that reads a file, chunk-by-chunk."""
53
+ while True :
54
+ chunk = fh .read (DOWNLOAD_CHUNK_SIZE )
55
+ if not chunk :
56
+ fh .close ()
57
+ break
58
+ yield chunk
59
59
60
60
61
61
def encode_row (row , encoding = None , make_excel_links = False ):
62
- encoded_row = []
63
- encoding = encoding or i18n .get_site_encoding ()
62
+ encoded_row = []
63
+ encoding = encoding or i18n .get_site_encoding ()
64
64
65
- for cell in row :
66
- if isinstance (cell , six .string_types ):
67
- cell = re .sub (ILLEGAL_CHARS , '?' , cell )
68
- if make_excel_links :
69
- cell = re . compile ( '^(https?://.+)' , re . IGNORECASE ) .sub (r'=HYPERLINK("\1")' , cell )
70
- cell = nullify (cell )
71
- if not isinstance (cell , numbers .Number ):
72
- cell = smart_str (cell , encoding , strings_only = True , errors = 'replace' )
73
- encoded_row .append (cell )
74
- return encoded_row
65
+ for cell in row :
66
+ if isinstance (cell , six .string_types ):
67
+ cell = ILLEGAL_CHARS_RE .sub ('?' , cell )
68
+ if make_excel_links :
69
+ cell = HYPERLINK_RE .sub (r'=HYPERLINK("\1")' , cell )
70
+ cell = nullify (cell )
71
+ if not isinstance (cell , numbers .Number ):
72
+ cell = smart_str (cell , encoding , strings_only = True , errors = 'replace' )
73
+ encoded_row .append (cell )
74
+ return encoded_row
75
75
76
76
77
77
def dataset (headers , data , encoding = None ):
@@ -90,43 +90,32 @@ def dataset(headers, data, encoding=None):
90
90
91
91
return dataset
92
92
93
-
94
- class XlsWrapper (object ):
95
- def __init__ (self , xls ):
96
- self .xls = xls
97
-
98
-
99
- def xls_dataset (workbook ):
100
- output = string_io ()
101
- workbook .save (output )
102
- output .seek (0 )
103
- return XlsWrapper (output .read ())
104
-
105
-
106
93
def create_generator (content_generator , format , encoding = None ):
107
94
if format == 'csv' :
108
95
show_headers = True
109
96
for headers , data in content_generator :
110
97
yield dataset (show_headers and headers or None , data , encoding ).csv
111
98
show_headers = False
112
99
elif format == 'xls' :
113
- workbook = openpyxl .Workbook (write_only = True )
114
- worksheet = workbook .create_sheet ()
100
+ output = string_io ()
101
+ workbook = xlsxwriter .Workbook (output )
102
+ worksheet = workbook .add_worksheet ()
115
103
row_ctr = 0
116
104
117
105
for _headers , _data in content_generator :
118
106
# Write headers to workbook once
119
107
if _headers and row_ctr == 0 :
120
- worksheet .append ( encode_row (_headers , encoding ))
108
+ worksheet .write_row ( row_ctr , 0 , encode_row (_headers , encoding , make_excel_links = False ))
121
109
row_ctr += 1
122
110
123
111
# Write row data to workbook
124
112
for row in _data :
125
- worksheet .append ( encode_row (row , encoding , make_excel_links = True ))
113
+ worksheet .write_row ( row_ctr , 0 , encode_row (row , encoding , make_excel_links = False ))
126
114
row_ctr += 1
127
115
128
- yield xls_dataset (workbook ).xls
129
- gc .collect ()
116
+ workbook .close ()
117
+ output .seek (0 )
118
+ yield output .getvalue ()
130
119
else :
131
120
raise Exception ("Unknown format: %s" % format )
132
121
@@ -147,7 +136,7 @@ def make_response(generator, format, name, encoding=None, user_agent=None): # T
147
136
pass
148
137
elif format == 'xls' :
149
138
format = 'xlsx'
150
- resp = HttpResponse ( next ( generator ) , content_type = content_type )
139
+ resp = StreamingHttpResponse ( generator , content_type = content_type )
151
140
elif format == 'json' or format == 'txt' :
152
141
resp = HttpResponse (generator , content_type = content_type )
153
142
else :
0 commit comments