Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit d791017

Browse files
authored
Merge pull request #638 from datafold/valentin-dx-842-add-column-types-to-json-output
Add column types to json output
2 parents 0d65830 + 80647b5 commit d791017

File tree

3 files changed

+192
-19
lines changed

3 files changed

+192
-19
lines changed

data_diff/dbt.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -308,13 +308,23 @@ def _local_diff(diff_vars: TDiffVars, json_output: bool = False) -> None:
308308
)
309309
return
310310

311+
dataset1_columns = [
312+
(name, type_, table1.database.dialect.parse_type(table1.table_path, name, type_, *other))
313+
for (name, type_, *other) in table1_columns.values()
314+
]
315+
dataset2_columns = [
316+
(name, type_, table2.database.dialect.parse_type(table2.table_path, name, type_, *other))
317+
for (name, type_, *other) in table2_columns.values()
318+
]
311319
print(
312320
json.dumps(
313321
jsonify(
314322
diff,
315323
dbt_model=diff_vars.dbt_model,
324+
dataset1_columns=dataset1_columns,
325+
dataset2_columns=dataset2_columns,
316326
with_summary=True,
317-
with_columns={
327+
columns_diff={
318328
"added": columns_added,
319329
"removed": columns_removed,
320330
"changed": columns_type_changed,

data_diff/format.py

Lines changed: 77 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,23 @@
11
import collections
2-
from typing import Any, Optional, List, Dict, Tuple
2+
from enum import Enum
3+
from typing import Any, Optional, List, Dict, Tuple, Type
34

45
from runtype import dataclass
56
from data_diff.diff_tables import DiffResultWrapper
7+
from data_diff.sqeleton.abcs.database_types import (
8+
JSON,
9+
Boolean,
10+
ColType,
11+
Array,
12+
ColType_UUID,
13+
Date,
14+
FractionalType,
15+
NumericType,
16+
Struct,
17+
TemporalType,
18+
ColType_Alphanum,
19+
String_Alphanum,
20+
)
621

722

823
def jsonify_error(table1: List[str], table2: List[str], dbt_model: str, error: str) -> "FailedDiff":
@@ -15,11 +30,16 @@ def jsonify_error(table1: List[str], table2: List[str], dbt_model: str, error: s
1530
).json()
1631

1732

33+
Columns = List[Tuple[str, str, ColType]]
34+
35+
1836
def jsonify(
1937
diff: DiffResultWrapper,
2038
dbt_model: str,
39+
dataset1_columns: Columns,
40+
dataset2_columns: Columns,
41+
columns_diff: Dict[str, List[str]],
2142
with_summary: bool = False,
22-
with_columns: Optional[Dict[str, List[str]]] = None,
2343
) -> "JsonDiff":
2444
"""
2545
Converts the diff result into a JSON-serializable format.
@@ -53,16 +73,13 @@ def jsonify(
5373
if with_summary:
5474
summary = _jsonify_diff_summary(diff.get_stats_dict(is_dbt=True))
5575

56-
columns = None
57-
if with_columns:
58-
columns = _jsonify_columns_diff(with_columns, list(key_columns))
76+
columns = _jsonify_columns_diff(dataset1_columns, dataset2_columns, columns_diff, list(key_columns))
5977

6078
is_different = bool(
6179
t1_exclusive_rows
6280
or t2_exclusive_rows
6381
or diff_rows
64-
or with_columns
65-
and (with_columns["added"] or with_columns["removed"] or with_columns["changed"])
82+
or (columns_diff["added"] or columns_diff["removed"] or columns_diff["changed"])
6683
)
6784
return JsonDiff(
6885
status="success",
@@ -138,8 +155,44 @@ class ExclusiveColumns:
138155
dataset2: List[str]
139156

140157

158+
class ColumnKind(Enum):
159+
INTEGER = "integer"
160+
FLOAT = "float"
161+
STRING = "string"
162+
DATE = "date"
163+
TIME = "time"
164+
DATETIME = "datetime"
165+
BOOL = "boolean"
166+
UNSUPPORTED = "unsupported"
167+
168+
169+
KIND_MAPPING: List[Tuple[Type[ColType], ColumnKind]] = [
170+
(Boolean, ColumnKind.BOOL),
171+
(Date, ColumnKind.DATE),
172+
(TemporalType, ColumnKind.DATETIME),
173+
(FractionalType, ColumnKind.FLOAT),
174+
(NumericType, ColumnKind.INTEGER),
175+
(ColType_UUID, ColumnKind.STRING),
176+
(ColType_Alphanum, ColumnKind.STRING),
177+
(String_Alphanum, ColumnKind.STRING),
178+
(JSON, ColumnKind.STRING),
179+
(Array, ColumnKind.STRING),
180+
(Struct, ColumnKind.STRING),
181+
(ColType, ColumnKind.UNSUPPORTED),
182+
]
183+
184+
185+
@dataclass
186+
class Column:
187+
name: str
188+
type: str
189+
kind: str
190+
191+
141192
@dataclass
142193
class JsonColumnsSummary:
194+
dataset1: List[Column]
195+
dataset2: List[Column]
143196
primaryKey: List[str]
144197
exclusive: ExclusiveColumns
145198
typeChanged: List[str]
@@ -179,7 +232,7 @@ class JsonDiff:
179232
summary: Optional[JsonDiffSummary]
180233
columns: Optional[JsonColumnsSummary]
181234

182-
version: str = "1.0.0"
235+
version: str = "1.1.0"
183236

184237

185238
def _group_rows(
@@ -262,12 +315,27 @@ def _jsonify_diff_summary(stats_dict: dict) -> JsonDiffSummary:
262315
)
263316

264317

265-
def _jsonify_columns_diff(columns_diff: Dict[str, List[str]], key_columns: List[str]) -> JsonColumnsSummary:
318+
def _jsonify_columns_diff(
319+
dataset1_columns: Columns, dataset2_columns: Columns, columns_diff: Dict[str, List[str]], key_columns: List[str]
320+
) -> JsonColumnsSummary:
266321
return JsonColumnsSummary(
322+
dataset1=[
323+
Column(name=name, type=type_, kind=_map_kind(kind).value) for (name, type_, kind) in dataset1_columns
324+
],
325+
dataset2=[
326+
Column(name=name, type=type_, kind=_map_kind(kind).value) for (name, type_, kind) in dataset2_columns
327+
],
267328
primaryKey=key_columns,
268329
exclusive=ExclusiveColumns(
269330
dataset2=list(columns_diff.get("added", [])),
270331
dataset1=list(columns_diff.get("removed", [])),
271332
),
272333
typeChanged=list(columns_diff.get("changed", [])),
273334
)
335+
336+
337+
def _map_kind(kind: ColType) -> ColumnKind:
338+
for raw_kind, json_kind in KIND_MAPPING:
339+
if isinstance(kind, raw_kind):
340+
return json_kind
341+
return ColumnKind.UNSUPPORTED

tests/test_format.py

Lines changed: 104 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import unittest
22
from data_diff.diff_tables import DiffResultWrapper, InfoTree, SegmentInfo, TableSegment
33
from data_diff.format import jsonify
4+
from data_diff.sqeleton.abcs.database_types import Integer
45
from data_diff.sqeleton.databases import Database
56

67

@@ -35,11 +36,28 @@ def test_jsonify_diff(self):
3536
diff=[],
3637
stats={},
3738
)
38-
json_diff = jsonify(diff, dbt_model="my_model")
39+
json_diff = jsonify(
40+
diff,
41+
dbt_model="my_model",
42+
dataset1_columns=[
43+
("id", "NUMBER", Integer()),
44+
("value", "NUMBER", Integer()),
45+
],
46+
dataset2_columns=[
47+
("id", "NUMBER", Integer()),
48+
("value", "NUMBER", Integer()),
49+
],
50+
columns_diff={
51+
"added": [],
52+
"removed": [],
53+
"typeChanged": [],
54+
},
55+
)
56+
3957
self.assertEqual(
4058
json_diff,
4159
{
42-
"version": "1.0.0",
60+
"version": "1.1.0",
4361
"status": "success",
4462
"result": "different",
4563
"model": "my_model",
@@ -57,8 +75,23 @@ def test_jsonify_diff(self):
5775
},
5876
],
5977
},
78+
"columns": {
79+
"dataset1": [
80+
{"name": "id", "type": "NUMBER", "kind": "integer"},
81+
{"name": "value", "type": "NUMBER", "kind": "integer"},
82+
],
83+
"dataset2": [
84+
{"name": "id", "type": "NUMBER", "kind": "integer"},
85+
{"name": "value", "type": "NUMBER", "kind": "integer"},
86+
],
87+
"primaryKey": ["id"],
88+
"exclusive": {
89+
"dataset1": [],
90+
"dataset2": [],
91+
},
92+
"typeChanged": [],
93+
},
6094
"summary": None,
61-
"columns": None,
6295
},
6396
)
6497

@@ -86,11 +119,27 @@ def test_jsonify_diff_no_difeference(self):
86119
diff=[],
87120
stats={},
88121
)
89-
json_diff = jsonify(diff, dbt_model="model")
122+
json_diff = jsonify(
123+
diff,
124+
dbt_model="model",
125+
dataset1_columns=[
126+
("id", "NUMBER", Integer()),
127+
("value", "NUMBER", Integer()),
128+
],
129+
dataset2_columns=[
130+
("id", "NUMBER", Integer()),
131+
("value", "NUMBER", Integer()),
132+
],
133+
columns_diff={
134+
"added": [],
135+
"removed": [],
136+
"changed": [],
137+
},
138+
)
90139
self.assertEqual(
91140
json_diff,
92141
{
93-
"version": "1.0.0",
142+
"version": "1.1.0",
94143
"status": "success",
95144
"result": "identical",
96145
"model": "model",
@@ -100,8 +149,23 @@ def test_jsonify_diff_no_difeference(self):
100149
"exclusive": {"dataset1": [], "dataset2": []},
101150
"diff": [],
102151
},
152+
"columns": {
153+
"primaryKey": ["id"],
154+
"dataset1": [
155+
{"name": "id", "type": "NUMBER", "kind": "integer"},
156+
{"name": "value", "type": "NUMBER", "kind": "integer"},
157+
],
158+
"dataset2": [
159+
{"name": "id", "type": "NUMBER", "kind": "integer"},
160+
{"name": "value", "type": "NUMBER", "kind": "integer"},
161+
],
162+
"exclusive": {
163+
"dataset1": [],
164+
"dataset2": [],
165+
},
166+
"typeChanged": [],
167+
},
103168
"summary": None,
104-
"columns": None,
105169
},
106170
)
107171

@@ -133,11 +197,27 @@ def test_jsonify_column_suffix_fix(self):
133197
diff=[],
134198
stats={},
135199
)
136-
json_diff = jsonify(diff, dbt_model="my_model")
200+
json_diff = jsonify(
201+
diff,
202+
dbt_model="my_model",
203+
dataset1_columns=[
204+
("id_a", "NUMBER", Integer()),
205+
("value_b", "NUMBER", Integer()),
206+
],
207+
dataset2_columns=[
208+
("id_a", "NUMBER", Integer()),
209+
("value_b", "NUMBER", Integer()),
210+
],
211+
columns_diff={
212+
"added": [],
213+
"removed": [],
214+
"typeChanged": [],
215+
},
216+
)
137217
self.assertEqual(
138218
json_diff,
139219
{
140-
"version": "1.0.0",
220+
"version": "1.1.0",
141221
"status": "success",
142222
"result": "different",
143223
"model": "my_model",
@@ -158,6 +238,21 @@ def test_jsonify_column_suffix_fix(self):
158238
],
159239
},
160240
"summary": None,
161-
"columns": None,
241+
"columns": {
242+
"dataset1": [
243+
{"name": "id_a", "type": "NUMBER", "kind": "integer"},
244+
{"name": "value_b", "type": "NUMBER", "kind": "integer"},
245+
],
246+
"dataset2": [
247+
{"name": "id_a", "type": "NUMBER", "kind": "integer"},
248+
{"name": "value_b", "type": "NUMBER", "kind": "integer"},
249+
],
250+
"primaryKey": ["id_a"],
251+
"exclusive": {
252+
"dataset1": [],
253+
"dataset2": [],
254+
},
255+
"typeChanged": [],
256+
},
162257
},
163258
)

0 commit comments

Comments
 (0)