Skip to content

Commit 36e62f1

Browse files
committed
Init with working Router and Streamer meta-utilities.
0 parents  commit 36e62f1

17 files changed

+530
-0
lines changed

.gitignore

+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# Byte-compiled / optimized / DLL files
2+
__pycache__/
3+
*.py[cod]
4+
5+
# Temporary text editor files
6+
*~
7+
8+
# C extensions
9+
*.so
10+
11+
# Distribution / packaging
12+
.Python
13+
env/
14+
bin/
15+
build/
16+
develop-eggs/
17+
dist/
18+
eggs/
19+
#lib/
20+
lib64/
21+
parts/
22+
sdist/
23+
var/
24+
*.egg-info/
25+
.installed.cfg
26+
*.egg
27+
28+
# Installer logs
29+
pip-log.txt
30+
pip-delete-this-directory.txt
31+
32+
# Unit test / coverage reports
33+
htmlcov/
34+
.tox/
35+
.coverage
36+
.cache
37+
nosetests.xml
38+
coverage.xml
39+
40+
# Sphinx documentation
41+
doc/_build/
42+
doc/.buildfile
43+
*.toctree

LICENSE

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
The MIT License (MIT)
2+
3+
Copyright (c) 2015 Aaron Halfaker
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

MANIFEST.in

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
include LICENSE README.md requirements.txt

README.md

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# MediaWiki command-line interface
2+
3+
A set of helper functions and classes for mediawiki-utilities command-line
4+
utilities.
5+
6+
* **Installation:** ``pip install mwcli``
7+
* **Documentation:** https://pythonhosted.org/mwcli
8+
* **Repositiory:** https://github.com/mediawiki-utilities/python-mwcli
9+
* **License:** MIT
10+
11+
## Author
12+
* Aaron Halfaker -- https://github.com/halfak

mwcli/__init__.py

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
from .streamer import Streamer, read_json
2+
from .router import Router
3+
4+
__all__ = [Router, Streamer, read_json]

mwcli/errors.py

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
class FileTypeError(Exception):
2+
"""
3+
Thrown when an file is not of an expected type.
4+
"""
5+
pass

mwcli/files/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
from .functions import (concat, reader, writer, output_dir_path,
2+
normalize_path, normalize_dir)

mwcli/files/functions.py

+175
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
import bz2
2+
import gzip
3+
import io
4+
import os
5+
6+
from . import p7z
7+
from ..errors import FileTypeError
8+
9+
FILE_READERS = {
10+
'gz': lambda fn: gzip.open(fn, 'rt', encoding='utf-8', errors='replace'),
11+
'bz2': lambda fn: bz2.open(fn, 'rt', encoding='utf-8', errors='replace'),
12+
'7z': p7z.reader,
13+
'json': lambda fn: open(fn, 'rt', encoding='utf-8', errors='replace'),
14+
'xml': lambda fn: open(fn, 'rt', encoding='utf-8', errors='replace')
15+
}
16+
"""
17+
Maps extensions to the strategy for opening/decompressing a file
18+
"""
19+
20+
FILE_WRITERS = {
21+
'gz': lambda fn: gzip.open(fn, 'wt', encoding='utf-8', errors='replace'),
22+
'bz2': lambda fn: bz2.open(fn, 'wt', encoding='utf-8', errors='replace'),
23+
'plaintext': lambda fn: open(fn, 'wt', encoding='utf-8', errors='replace'),
24+
'json': lambda fn: open(fn, 'wt', encoding='utf-8', errors='replace'),
25+
'xml': lambda fn: open(fn, 'wt', encoding='utf-8', errors='replace')
26+
}
27+
"""
28+
Maps compression types to the strategy for opening/compressing a file
29+
"""
30+
31+
32+
def extract_extension(path):
33+
"""
34+
Reads a file path and returns the extension or None if the path
35+
contains no extension.
36+
37+
:Parameters:
38+
path : str
39+
A filesystem path
40+
"""
41+
filename = os.path.basename(path)
42+
parts = filename.split(".")
43+
if len(parts) == 1:
44+
return filename, None
45+
else:
46+
return ".".join(parts[:-1]), parts[-1]
47+
48+
49+
def normalize_path(path_or_f):
50+
"""
51+
Verifies that a file exists at a given path and that the file has a
52+
known extension type.
53+
54+
:Parameters:
55+
path_or_f : `str` | `file`
56+
the path to a dump file or a file handle
57+
58+
"""
59+
if hasattr(path_or_f, "read"):
60+
return path_or_f
61+
else:
62+
path = path_or_f
63+
64+
path = os.path.expanduser(path)
65+
66+
# Check if exists and is a file
67+
if os.path.isdir(path):
68+
raise IsADirectoryError("Is a directory: {0}".format(path))
69+
elif not os.path.isfile(path):
70+
raise FileNotFoundError("No such file: {0}".format(path))
71+
72+
_, extension = extract_extension(path)
73+
74+
if extension not in FILE_READERS:
75+
raise FileTypeError("Extension {0} is not supported."
76+
.format(repr(extension)))
77+
78+
return path
79+
80+
81+
def normalize_dir(path):
82+
if os.path.exists(path) and not os.path.isdir(path):
83+
raise NotADirectoryError("Not a directory: {0}".format(path))
84+
else:
85+
os.makedirs(path, exist_ok=True)
86+
87+
return path
88+
89+
90+
def reader(path_or_f):
91+
"""
92+
Turns a path to a compressed file into a file-like object of (decompressed)
93+
data.
94+
95+
:Parameters:
96+
path : `str`
97+
the path to the dump file to read
98+
"""
99+
if hasattr(path_or_f, "read"):
100+
return path_or_f
101+
else:
102+
path = path_or_f
103+
104+
path = normalize_path(path)
105+
_, extension = extract_extension(path)
106+
107+
reader_func = FILE_READERS[extension]
108+
109+
return reader_func(path)
110+
111+
112+
def output_dir_path(old_path, output_dir, compression):
113+
filename, extension = extract_extension(old_path)
114+
new_filename = filename + "." + compression
115+
return os.path.join(output_dir, new_filename)
116+
117+
118+
def writer(path):
119+
"""
120+
Creates a compressed file writer from for a path with a specified
121+
compression type.
122+
"""
123+
filename, extension = extract_extension(path)
124+
if extension in FILE_WRITERS:
125+
writer_func = FILE_WRITERS[extension]
126+
return writer_func(path)
127+
else:
128+
raise RuntimeError("Output compression {0} not supported. Type {1}"
129+
.format(extension, tuple(FILE_WRITERS.keys())))
130+
131+
132+
class ConcatinatingTextReader(io.TextIOBase):
133+
134+
def __init__(self, *items):
135+
self.items = [io.StringIO(i) if isinstance(i, str) else i
136+
for i in items]
137+
138+
def read(self, size=-1):
139+
return "".join(self._read(size))
140+
141+
def readline(self):
142+
143+
if len(self.items) > 0:
144+
line = self.items[0].readline()
145+
if line == "":
146+
self.items.pop(0)
147+
else:
148+
line = ""
149+
150+
return line
151+
152+
def _read(self, size):
153+
if size > 0:
154+
while len(self.items) > 0:
155+
byte_vals = self.items[0].read(size)
156+
yield byte_vals
157+
if len(byte_vals) < size:
158+
size = size - len(byte_vals) # Decrement bytes
159+
self.items.pop(0)
160+
else:
161+
break
162+
163+
else:
164+
for item in self.items:
165+
yield item.read()
166+
167+
168+
def concat(*stream_items):
169+
"""
170+
Performs a streaming concatenation of `str` or `file`.
171+
:Parameters:
172+
\*stream_items : `str` | `file`
173+
A list of items to concatenate together
174+
"""
175+
return ConcatinatingTextReader(*stream_items)

mwcli/files/p7z.py

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import io
2+
import os
3+
import subprocess
4+
5+
file_open = open
6+
7+
8+
def reader(path):
9+
"""
10+
Turns a path to a dump file into a file-like object of (decompressed)
11+
XML data assuming that '7z' is installed and will know what to do.
12+
13+
:Parameters:
14+
path : `str`
15+
the path to the dump file to read
16+
"""
17+
p = subprocess.Popen(
18+
['7z', 'e', '-so', path],
19+
stdout=subprocess.PIPE,
20+
stderr=file_open(os.devnull, "w")
21+
)
22+
return io.TextIOWrapper(p.stdout, encoding='utf-8',
23+
errors='replace')

mwcli/files/tests/__init__.py

Whitespace-only changes.

mwcli/files/tests/foo.7z

99 Bytes
Binary file not shown.

mwcli/files/tests/test_functions.py

+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import io
2+
import os
3+
4+
from nose.tools import eq_, raises
5+
6+
from ...errors import FileTypeError
7+
from ..functions import concat, extract_extension, normalize_path, reader
8+
9+
10+
def test_concat():
11+
start = "Foobar1"
12+
f = io.StringIO("Foobar2")
13+
end = "Foobar3"
14+
15+
eq_(concat(start, f, end).read(), "Foobar1Foobar2Foobar3")
16+
17+
18+
def test_extract_extension():
19+
eq_(extract_extension("foo")[1], None)
20+
eq_(extract_extension("foo.xml")[1], "xml")
21+
eq_(extract_extension("foo.xml.gz")[1], "gz")
22+
eq_(extract_extension("foo.xml.bz2")[1], "bz2")
23+
eq_(extract_extension("foo.xml-p10001p10200.7z")[1], "7z")
24+
25+
26+
@raises(FileNotFoundError)
27+
def test_normalize_path_noexist():
28+
normalize_path("IDONTEXIST!!!")
29+
30+
31+
@raises(IsADirectoryError)
32+
def test_normalize_path_directory():
33+
normalize_path(os.path.dirname(__file__))
34+
35+
36+
@raises(FileTypeError)
37+
def test_normalize_path_bad_extension():
38+
normalize_path(__file__)
39+
40+
41+
def test_open():
42+
f = io.StringIO()
43+
eq_(f, reader(f))

mwcli/files/tests/test_p7z.py

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
import os.path
2+
3+
from nose.tools import eq_
4+
5+
from ..p7z import reader
6+
7+
8+
def test_open_file():
9+
f = reader(os.path.join(os.path.dirname(__file__), "foo.7z"))
10+
eq_(f.read(), "foo\n")

0 commit comments

Comments
 (0)