update dependency

chfw · chfw · commit 1d8828c19316 · 2017-08-01T00:25:44.000+01:00
diff --git a/.moban.d/README.rst b/.moban.d/README.rst
@@ -1,8 +1,17 @@
-{% extends "BASIC-README.rst.jj2" %}
-
-{%block constraint%}
-{%endblock%}
+{% extends "README.rst.jj2" %}
 
 {%block features %}
-**{{name}}** does {{description}}.
+{%include "feature.rst"%}
 {%endblock%}
+
+{% block write_to_file %}
+{% endblock %}
+
+{% block write_to_memory %}
+{% endblock %}
+
+{% block pyexcel_write_to_file%}
+{% endblock %}
+
+{% block pyexcel_write_to_memory%}
+{% endblock %}
diff --git a/README.rst b/README.rst
@@ -1,5 +1,5 @@
 ================================================================================
-pyexcel-pdfr - Let you focus on data, instead of file formats
+pyexcel-pdfr - Let you focus on data, instead of pdf format
 ================================================================================
 
 .. image:: https://raw.githubusercontent.com/pyexcel/pyexcel.github.io/master/images/patreon.png
@@ -17,6 +17,30 @@ pyexcel-pdfr - Let you focus on data, instead of file formats
 .. image:: https://readthedocs.org/projects/pyexcel-pdfr/badge/?version=latest
    :target: http://pyexcel-pdfr.readthedocs.org/en/latest/
 
+
+Known constraints
+==================
+
+Fonts, colors and charts are not supported.
+
+Installation
+================================================================================
+
+You can install it via pip:
+
+.. code-block:: bash
+
+    $ pip install pyexcel-pdfr
+
+
+or clone it and install it:
+
+.. code-block:: bash
+
+    $ git clone https://github.com/pyexcel/pyexcel-pdfr.git
+    $ cd pyexcel-pdfr
+    $ python setup.py install
+
 Support the project
 ================================================================================
 
@@ -32,35 +56,183 @@ With your financial support, I will be able to invest
 a little bit more time in coding, documentation and writing interesting posts.
 
 
-
-Introduction
+Usage
 ================================================================================
-**pyexcel-pdfr** does Read tables in pdf files as tabular data.
 
+As a standalone library
+--------------------------------------------------------------------------------
 
+.. testcode::
+   :hide:
 
-Installation
-================================================================================
-You can install it via pip:
+    >>> import os
+    >>> import sys
+    >>> if sys.version_info[0] < 3:
+    ...     from StringIO import StringIO
+    ... else:
+    ...     from io import BytesIO as StringIO
+    >>> PY2 = sys.version_info[0] == 2
+    >>> if PY2 and sys.version_info[1] < 7:
+    ...      from ordereddict import OrderedDict
+    ... else:
+    ...     from collections import OrderedDict
 
-.. code-block:: bash
 
-    $ pip install pyexcel-pdfr
+Read from an pdf file
+********************************************************************************
 
+Here's the sample code:
 
-or clone it and install it:
+.. code-block:: python
 
-.. code-block:: bash
+    >>> from pyexcel_pdf import get_data
+    >>> data = get_data("your_file.pdf")
+    >>> import json
+    >>> print(json.dumps(data))
+    {"Sheet 1": [[1, 2, 3], [4, 5, 6]], "Sheet 2": [["row 1", "row 2", "row 3"]]}
 
-    $ git clone https://github.com/pyexcel/pyexcel-pdfr.git
-    $ cd pyexcel-pdfr
-    $ python setup.py install
 
 
 
-Development guide
+Read from an pdf from memory
+********************************************************************************
+
+Continue from previous example:
+
+.. code-block:: python
+
+    >>> # This is just an illustration
+    >>> # In reality, you might deal with pdf file upload
+    >>> # where you will read from requests.FILES['YOUR_PDF_FILE']
+    >>> data = get_data(io)
+    >>> print(json.dumps(data))
+    {"Sheet 1": [[1, 2, 3], [4, 5, 6]], "Sheet 2": [[7, 8, 9], [10, 11, 12]]}
+
+
+Pagination feature
+********************************************************************************
+
+
+
+Let's assume the following file is a huge pdf file:
+
+.. code-block:: python
+
+   >>> huge_data = [
+   ...     [1, 21, 31],
+   ...     [2, 22, 32],
+   ...     [3, 23, 33],
+   ...     [4, 24, 34],
+   ...     [5, 25, 35],
+   ...     [6, 26, 36]
+   ... ]
+   >>> sheetx = {
+   ...     "huge": huge_data
+   ... }
+   >>> save_data("huge_file.pdf", sheetx)
+
+And let's pretend to read partial data:
+
+.. code-block:: python
+
+   >>> partial_data = get_data("huge_file.pdf", start_row=2, row_limit=3)
+   >>> print(json.dumps(partial_data))
+   {"huge": [[3, 23, 33], [4, 24, 34], [5, 25, 35]]}
+
+And you could as well do the same for columns:
+
+.. code-block:: python
+
+   >>> partial_data = get_data("huge_file.pdf", start_column=1, column_limit=2)
+   >>> print(json.dumps(partial_data))
+   {"huge": [[21, 31], [22, 32], [23, 33], [24, 34], [25, 35], [26, 36]]}
+
+Obvious, you could do both at the same time:
+
+.. code-block:: python
+
+   >>> partial_data = get_data("huge_file.pdf",
+   ...     start_row=2, row_limit=3,
+   ...     start_column=1, column_limit=2)
+   >>> print(json.dumps(partial_data))
+   {"huge": [[23, 33], [24, 34], [25, 35]]}
+
+.. testcode::
+   :hide:
+
+   >>> os.unlink("huge_file.pdf")
+
+
+As a pyexcel plugin
+--------------------------------------------------------------------------------
+
+No longer, explicit import is needed since pyexcel version 0.2.2. Instead,
+this library is auto-loaded. So if you want to read data in pdf format,
+installing it is enough.
+
+
+Reading from an pdf file
+********************************************************************************
+
+Here is the sample code:
+
+.. code-block:: python
+
+    >>> import pyexcel as pe
+    >>> sheet = pe.get_book(file_name="your_file.pdf")
+    >>> sheet
+    Sheet 1:
+    +---+---+---+
+    | 1 | 2 | 3 |
+    +---+---+---+
+    | 4 | 5 | 6 |
+    +---+---+---+
+    Sheet 2:
+    +-------+-------+-------+
+    | row 1 | row 2 | row 3 |
+    +-------+-------+-------+
+
+
+
+
+Reading from a IO instance
+********************************************************************************
+
+You got to wrap the binary content with stream to get pdf working:
+
+.. code-block:: python
+
+    >>> # This is just an illustration
+    >>> # In reality, you might deal with pdf file upload
+    >>> # where you will read from requests.FILES['YOUR_PDF_FILE']
+    >>> pdffile = "another_file.pdf"
+    >>> with open(pdffile, "rb") as f:
+    ...     content = f.read()
+    ...     r = pe.get_book(file_type="pdf", file_content=content)
+    ...     print(r)
+    ...
+    Sheet 1:
+    +---+---+---+
+    | 1 | 2 | 3 |
+    +---+---+---+
+    | 4 | 5 | 6 |
+    +---+---+---+
+    Sheet 2:
+    +-------+-------+-------+
+    | row 1 | row 2 | row 3 |
+    +-------+-------+-------+
+
+
+
+
+License
 ================================================================================
 
+New BSD License
+
+Developer guide
+==================
+
 Development steps for code changes
 
 #. git clone https://github.com/pyexcel/pyexcel-pdfr.git
@@ -132,8 +304,9 @@ Acceptance criteria
 #. Agree on NEW BSD License for your contribution
 
 
+.. testcode::
+   :hide:
 
-License
-================================================================================
-
-New BSD License
+   >>> import os
+   >>> os.unlink("your_file.pdf")
+   >>> os.unlink("another_file.pdf")
diff --git a/pyexcel-pdfr.yml b/pyexcel-pdfr.yml
@@ -4,5 +4,7 @@ nick_name: "pdf"
 version: "0.0.1"
 current_version: "0.0.1"
 release: "0.0.1"
-dependencies: []
+file_type: "pdf"
+dependencies:
+  - pdftables
 description: "Read tables in pdf files as tabular data"
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1 @@
+pdftables
diff --git a/setup.py b/setup.py
@@ -40,11 +40,13 @@
 ]
 
 INSTALL_REQUIRES = [
+    'pdftables',
 ]
 
 
 PACKAGES = find_packages(exclude=['ez_setup', 'examples', 'tests'])
-EXTRAS_REQUIRE = {}
+EXTRAS_REQUIRE = {
+}
 
 
 def read_files(*files):

Original file line number	Diff line number	Diff line change
`@@ -40,11 +40,13 @@`
`40`	`40`	`]`
`41`	`41`
`42`	`42`	`INSTALL_REQUIRES = [`
	`43`	`+ 'pdftables',`
`43`	`44`	`]`
`44`	`45`
`45`	`46`
`46`	`47`	`PACKAGES = find_packages(exclude=['ez_setup', 'examples', 'tests'])`
`47`		`-EXTRAS_REQUIRE = {}`
	`48`	`+EXTRAS_REQUIRE = {`
	`49`	`+}`
`48`	`50`
`49`	`51`
`50`	`52`	`def read_files(*files):`