|
6 | 6 | :copyright: (c) 2015-2017 by Onni Software Ltd & its contributors
|
7 | 7 | :license: New BSD License
|
8 | 8 | """
|
9 |
| -import re |
10 |
| -import datetime |
11 |
| - |
12 | 9 | from pdftables import get_tables
|
13 | 10 | from pyexcel_io.book import BookReader
|
14 | 11 | from pyexcel_io.sheet import SheetReader, NamedContent
|
15 | 12 | from pyexcel_io._compact import OrderedDict
|
| 13 | +import pyexcel_io.service as service |
16 | 14 |
|
17 | 15 |
|
18 | 16 | class PdfTable(SheetReader):
|
@@ -84,17 +82,17 @@ def column_iterator(self, row):
|
84 | 82 | def __convert_cell(self, cell_text):
|
85 | 83 | ret = None
|
86 | 84 | if self.__auto_detect_int:
|
87 |
| - ret = _detect_int_value(cell_text) |
| 85 | + ret = service.detect_int_value(cell_text) |
88 | 86 | if ret is None and self.__auto_detect_float:
|
89 |
| - ret = _detect_float_value(cell_text) |
| 87 | + ret = service.detect_float_value(cell_text) |
90 | 88 | shall_we_ignore_the_conversion = (
|
91 | 89 | (ret in [float('inf'), float('-inf')]) and
|
92 | 90 | self.__ignore_infinity
|
93 | 91 | )
|
94 | 92 | if shall_we_ignore_the_conversion:
|
95 | 93 | ret = None
|
96 | 94 | if ret is None and self.__auto_detect_datetime:
|
97 |
| - ret = _detect_date_value(cell_text) |
| 95 | + ret = service.detect_date_value(cell_text) |
98 | 96 | if ret is None:
|
99 | 97 | ret = cell_text
|
100 | 98 | return ret
|
@@ -142,55 +140,3 @@ def _parse_pdf(self, file_handle):
|
142 | 140 | def close(self):
|
143 | 141 | if self._file_handle:
|
144 | 142 | self._file_handle.close()
|
145 |
| - |
146 |
| - |
147 |
| -def _detect_date_value(csv_cell_text): |
148 |
| - """ |
149 |
| - Read the date formats that were written by csv.writer |
150 |
| - """ |
151 |
| - ret = None |
152 |
| - try: |
153 |
| - if len(csv_cell_text) == 10: |
154 |
| - ret = datetime.datetime.strptime( |
155 |
| - csv_cell_text, |
156 |
| - "%Y-%m-%d") |
157 |
| - ret = ret.date() |
158 |
| - elif len(csv_cell_text) == 19: |
159 |
| - ret = datetime.datetime.strptime( |
160 |
| - csv_cell_text, |
161 |
| - "%Y-%m-%d %H:%M:%S") |
162 |
| - elif len(csv_cell_text) > 19: |
163 |
| - ret = datetime.datetime.strptime( |
164 |
| - csv_cell_text[0:26], |
165 |
| - "%Y-%m-%d %H:%M:%S.%f") |
166 |
| - except ValueError: |
167 |
| - pass |
168 |
| - return ret |
169 |
| - |
170 |
| - |
171 |
| -def _detect_float_value(csv_cell_text): |
172 |
| - try: |
173 |
| - should_we_skip_it = (csv_cell_text.startswith('0') and |
174 |
| - csv_cell_text.startswith('0.') is False) |
175 |
| - if should_we_skip_it: |
176 |
| - # do not convert if a number starts with 0 |
177 |
| - # e.g. 014325 |
178 |
| - return None |
179 |
| - else: |
180 |
| - return float(csv_cell_text) |
181 |
| - except ValueError: |
182 |
| - return None |
183 |
| - |
184 |
| - |
185 |
| -def _detect_int_value(csv_cell_text): |
186 |
| - if csv_cell_text.startswith('0') and len(csv_cell_text) > 1: |
187 |
| - return None |
188 |
| - try: |
189 |
| - return int(csv_cell_text) |
190 |
| - except ValueError: |
191 |
| - pattern = '([0-9]+,)*[0-9]+$' |
192 |
| - if re.match(pattern, csv_cell_text): |
193 |
| - integer_string = csv_cell_text.replace(',', '') |
194 |
| - return int(integer_string) |
195 |
| - else: |
196 |
| - return None |
0 commit comments