Skip to content

Commit 40eba6f

Browse files
authored
Merge pull request #24 from andersinno/skip_rows
Add option to leave out table contents completely
2 parents 25c3c73 + 6865f3a commit 40eba6f

File tree

7 files changed

+123
-6
lines changed

7 files changed

+123
-6
lines changed

README.md

+7
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ strategy:
8787
first_name: name.first_name
8888
last_name: name.last_name
8989
secret_key: string.empty
90+
access_log: skip_rows
9091
```
9192
9293
In the example configuration above, there are first listed two "addon
@@ -108,3 +109,9 @@ sanitation function consists from two parts separated from each other by
108109
a dot: Python module name and name of the actual function, which will
109110
be prefixed with `sanitize_`, so `name.first_name` would be a function
110111
called `sanitize_first_name` in a file called `name.py`.
112+
113+
Table content can be left out completely from the sanitized dump by
114+
setting table strategy to `skip_rows` (check `access_log` table in the
115+
example config). This will leave out all `INSERT INTO` (MySQL) or `COPY`
116+
(PostgreSQL) statements from the sanitized dump file. `CREATE TABLE`
117+
statements will not be removed.

database_sanitizer/config.py

+12-5
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
__all__ = ("Configuration", "ConfigurationError")
1111

12+
SKIP_ROWS_CONFIG_VALUE = "skip_rows"
1213
MYSQLDUMP_DEFAULT_PARAMETERS = ["--single-transaction"]
1314
PG_DUMP_DEFAULT_PARAMETERS = []
1415

@@ -26,6 +27,7 @@ class Configuration(object):
2627
"""
2728
def __init__(self):
2829
self.sanitizers = {}
30+
self.skip_rows_for_tables = []
2931
self.addon_packages = []
3032
self.mysqldump_params = []
3133
self.pg_dump_params = []
@@ -166,13 +168,18 @@ def load_sanitizers(self, config_data):
166168
if not isinstance(section_strategy, dict):
167169
if section_strategy is None:
168170
return
169-
raise ConfigurationError(
170-
"'strategy' is %s instead of dict" % (
171-
type(section_strategy),
172-
),
173-
)
171+
if section_strategy != SKIP_ROWS_CONFIG_VALUE:
172+
raise ConfigurationError(
173+
"'strategy' is %s instead of dict" % (
174+
type(section_strategy),
175+
),
176+
)
174177

175178
for table_name, column_data in six.iteritems(section_strategy):
179+
if column_data == SKIP_ROWS_CONFIG_VALUE:
180+
self.skip_rows_for_tables.append(table_name)
181+
continue
182+
176183
if not isinstance(column_data, dict):
177184
if column_data is None:
178185
continue

database_sanitizer/dump/mysql.py

+5
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,11 @@ def sanitize_from_stream(stream, config):
103103
table_name = insert_into_match.group("table")
104104
column_names = parse_column_names(insert_into_match.group("columns"))
105105

106+
# Skip `INSERT INTO` statement if table rows are configured
107+
# to be skipped.
108+
if table_name in config.skip_rows_for_tables:
109+
continue
110+
106111
# Collect sanitizers possibly used for this table and place them into
107112
# a dictionary from which we can look them up by index later.
108113
sanitizers = {}

database_sanitizer/dump/postgres.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ def sanitize(url, config):
5454
sanitize_value_line = None
5555
current_table = None
5656
current_table_columns = None
57+
skip_table = False
5758

5859
for line in codecs.getreader("utf-8")(process.stdout):
5960
# Eat the trailing new line.
@@ -65,7 +66,12 @@ def sanitize(url, config):
6566
if line == "\\.":
6667
current_table = None
6768
current_table_columns = None
68-
yield "\\."
69+
if not skip_table:
70+
yield "\\."
71+
skip_table = False
72+
continue
73+
74+
if skip_table:
6975
continue
7076

7177
if not sanitize_value_line:
@@ -84,6 +90,12 @@ def sanitize(url, config):
8490
current_table = copy_line_match.group("table")
8591
current_table_columns = parse_column_names(copy_line_match.group("columns"))
8692

93+
# Skip `COPY` statement if table rows are configured
94+
# to be skipped.
95+
if config and current_table in config.skip_rows_for_tables:
96+
skip_table = True
97+
continue
98+
8799
sanitize_value_line = get_value_line_sanitizer(
88100
config, current_table, current_table_columns)
89101

database_sanitizer/tests/test_config.py

+23
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,29 @@ def mock_find_sanitizer(*args):
137137
assert "table2.column1" in config.sanitizers
138138

139139

140+
def test_table_skip_rows_configuration():
141+
config = Configuration()
142+
143+
with pytest.raises(ConfigurationError):
144+
config.load_sanitizers({"strategy": "test"})
145+
146+
def mock_find_sanitizer(*args):
147+
return lambda value: value
148+
149+
with mock.patch("database_sanitizer.config.Configuration.find_sanitizer",
150+
side_effect=mock_find_sanitizer):
151+
152+
config.load_sanitizers({"strategy": {
153+
"table1": "skip_rows",
154+
"table2": {
155+
"column1": "test",
156+
}
157+
}})
158+
159+
assert "table2.column1" in config.sanitizers
160+
assert "table1" in config.skip_rows_for_tables
161+
162+
140163
def test_find_sanitizer():
141164
config = Configuration()
142165

database_sanitizer/tests/test_dump_mysql.py

+32
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,13 @@
2020
2121
DROP TABLE IF EXISTS `test`;
2222
23+
CREATE TABLE `test` (
24+
`id` int(11) NOT NULL AUTO_INCREMENT,
25+
`created_at` date NOT NULL,
26+
`notes` varchar(255) NOT NULL,
27+
PRIMARY KEY (`id`)
28+
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
29+
2330
INSERT INTO `test` (`id`, `created_at`, `notes`) VALUES \
2431
(1,'2018-01-01','Test data 1'),\
2532
(2,'2018-01-02','Test data 2'),\
@@ -61,6 +68,31 @@ def test_sanitize_from_stream():
6168
""" in dump_output_lines
6269

6370

71+
def test_skip_table_rows():
72+
stream = io.BytesIO(MOCK_MYSQLDUMP_OUTPUT)
73+
config = Configuration()
74+
config.skip_rows_for_tables.append('test')
75+
76+
output = list(sanitize_from_stream(stream, config))
77+
78+
assert output == [
79+
'',
80+
'--- Fake MySQL database dump',
81+
'',
82+
'DROP TABLE IF EXISTS `test`;',
83+
'',
84+
'CREATE TABLE `test` (',
85+
'`id` int(11) NOT NULL AUTO_INCREMENT,',
86+
'`created_at` date NOT NULL,',
87+
'`notes` varchar(255) NOT NULL,',
88+
'PRIMARY KEY (`id`)',
89+
') ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;',
90+
'',
91+
'',
92+
'--- Final line after `INSERT INTO` statement.',
93+
]
94+
95+
6496
def test_sanitizer_invalid_input():
6597
stream = io.BytesIO(INVALID_MOCK_MYSQLDUMP_OUTPUT)
6698
config = Configuration()

database_sanitizer/tests/test_dump_postgres.py

+31
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,12 @@
1919
2020
COMMENT ON SCHEMA "public" IS 'standard public schema';
2121
22+
CREATE TABLE "public"."test" (
23+
"id" integer NOT NULL,
24+
"created_at" timestamp with time zone NOT NULL,
25+
"notes" character varying(255) NOT NULL
26+
);
27+
2228
COPY "public"."test" ("id", "created_at", "notes") FROM stdin;
2329
1\t2018-01-01 00:00:00\tTest data 1
2430
2\t2018-01-02 00:00:00\tTest data 2
@@ -65,6 +71,31 @@ def test_sanitize():
6571
assert "2\t2018-01-02 00:00:00\tSanitized" in dump_output_lines
6672

6773

74+
def test_skip_table_rows():
75+
url = urlparse.urlparse("postgres://localhost/test")
76+
config = Configuration()
77+
config.skip_rows_for_tables.append('test')
78+
79+
with mock.patch("subprocess.Popen",
80+
side_effect=create_mock_popen(MOCK_PG_DUMP_OUTPUT)):
81+
output = list(sanitize(url, config))
82+
83+
assert output == [
84+
'--- Fake PostgreSQL database dump',
85+
'',
86+
'COMMENT ON SCHEMA "public" IS \'standard public schema\';',
87+
'',
88+
'CREATE TABLE "public"."test" (',
89+
'"id" integer NOT NULL,',
90+
'"created_at" timestamp with time zone NOT NULL,',
91+
'"notes" character varying(255) NOT NULL',
92+
');',
93+
'',
94+
'',
95+
'--- Final line after `COPY` statement'
96+
]
97+
98+
6899
def test_sanitizer_invalid_input():
69100
url = urlparse.urlparse("postgres://localhost/test")
70101

0 commit comments

Comments
 (0)