Skip to content

Commit 1c17d53

Browse files
committed
use next version of pyexcel-io
1 parent ca9e1bb commit 1c17d53

File tree

2 files changed

+5
-58
lines changed

2 files changed

+5
-58
lines changed

pyexcel_pdfr/pdfr.py

+4-58
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,11 @@
66
:copyright: (c) 2015-2017 by Onni Software Ltd & its contributors
77
:license: New BSD License
88
"""
9-
import re
10-
import datetime
11-
129
from pdftables import get_tables
1310
from pyexcel_io.book import BookReader
1411
from pyexcel_io.sheet import SheetReader, NamedContent
1512
from pyexcel_io._compact import OrderedDict
13+
import pyexcel_io.service as service
1614

1715

1816
class PdfTable(SheetReader):
@@ -84,17 +82,17 @@ def column_iterator(self, row):
8482
def __convert_cell(self, cell_text):
8583
ret = None
8684
if self.__auto_detect_int:
87-
ret = _detect_int_value(cell_text)
85+
ret = service.detect_int_value(cell_text)
8886
if ret is None and self.__auto_detect_float:
89-
ret = _detect_float_value(cell_text)
87+
ret = service.detect_float_value(cell_text)
9088
shall_we_ignore_the_conversion = (
9189
(ret in [float('inf'), float('-inf')]) and
9290
self.__ignore_infinity
9391
)
9492
if shall_we_ignore_the_conversion:
9593
ret = None
9694
if ret is None and self.__auto_detect_datetime:
97-
ret = _detect_date_value(cell_text)
95+
ret = service.detect_date_value(cell_text)
9896
if ret is None:
9997
ret = cell_text
10098
return ret
@@ -142,55 +140,3 @@ def _parse_pdf(self, file_handle):
142140
def close(self):
143141
if self._file_handle:
144142
self._file_handle.close()
145-
146-
147-
def _detect_date_value(csv_cell_text):
148-
"""
149-
Read the date formats that were written by csv.writer
150-
"""
151-
ret = None
152-
try:
153-
if len(csv_cell_text) == 10:
154-
ret = datetime.datetime.strptime(
155-
csv_cell_text,
156-
"%Y-%m-%d")
157-
ret = ret.date()
158-
elif len(csv_cell_text) == 19:
159-
ret = datetime.datetime.strptime(
160-
csv_cell_text,
161-
"%Y-%m-%d %H:%M:%S")
162-
elif len(csv_cell_text) > 19:
163-
ret = datetime.datetime.strptime(
164-
csv_cell_text[0:26],
165-
"%Y-%m-%d %H:%M:%S.%f")
166-
except ValueError:
167-
pass
168-
return ret
169-
170-
171-
def _detect_float_value(csv_cell_text):
172-
try:
173-
should_we_skip_it = (csv_cell_text.startswith('0') and
174-
csv_cell_text.startswith('0.') is False)
175-
if should_we_skip_it:
176-
# do not convert if a number starts with 0
177-
# e.g. 014325
178-
return None
179-
else:
180-
return float(csv_cell_text)
181-
except ValueError:
182-
return None
183-
184-
185-
def _detect_int_value(csv_cell_text):
186-
if csv_cell_text.startswith('0') and len(csv_cell_text) > 1:
187-
return None
188-
try:
189-
return int(csv_cell_text)
190-
except ValueError:
191-
pattern = '([0-9]+,)*[0-9]+$'
192-
if re.match(pattern, csv_cell_text):
193-
integer_string = csv_cell_text.replace(',', '')
194-
return int(integer_string)
195-
else:
196-
return None

rnd_requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
https://github.com/pyexcel/pyexcel-io/archive/dev.zip

0 commit comments

Comments
 (0)