-
Notifications
You must be signed in to change notification settings - Fork 254
/
Copy pathtest_common.py
108 lines (84 loc) · 2.62 KB
/
test_common.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# built-in
from math import isclose
# external
import hypothesis
import pytest
# project
import textdistance
ALGS = (
textdistance.bag,
textdistance.hamming,
textdistance.levenshtein,
textdistance.damerau_levenshtein,
textdistance.jaro,
textdistance.jaro_winkler,
textdistance.mlipns,
textdistance.lcsseq,
textdistance.lcsstr,
textdistance.ratcliff_obershelp,
textdistance.jaccard,
textdistance.sorensen,
textdistance.tversky,
textdistance.overlap,
textdistance.cosine,
textdistance.strcmp95,
textdistance.monge_elkan,
textdistance.mra,
textdistance.prefix,
textdistance.postfix,
textdistance.identity,
# textdistance.length,
# numpy-based:
# textdistance.gotoh,
textdistance.needleman_wunsch,
textdistance.smith_waterman,
textdistance.editex,
)
@pytest.mark.parametrize('alg', ALGS)
@hypothesis.given(
left=hypothesis.strategies.text(),
right=hypothesis.strategies.text(),
)
def test_normalization_range(left, right, alg):
assert 0 <= alg.normalized_distance(left, right) <= 1
assert 0 <= alg.normalized_similarity(left, right) <= 1
@pytest.mark.parametrize('alg', ALGS)
@hypothesis.given(
left=hypothesis.strategies.text(),
right=hypothesis.strategies.text(),
)
def test_normalization_by_one(left, right, alg):
d = alg.normalized_distance(left, right)
s = alg.normalized_similarity(left, right)
assert isclose(s + d, 1)
@pytest.mark.parametrize('alg', ALGS)
@hypothesis.given(text=hypothesis.strategies.text())
def test_normalization_same(text, alg):
assert alg.normalized_distance(text, text) == 0
if alg is not textdistance.needleman_wunsch:
assert alg.distance(text, text) == 0
assert alg.normalized_similarity(text, text) == 1
@pytest.mark.parametrize('alg', ALGS)
@hypothesis.settings(deadline=None)
@hypothesis.given(
left=hypothesis.strategies.text(min_size=1),
right=hypothesis.strategies.text(min_size=1),
)
def test_normalization_monotonic(left, right, alg):
nd = alg.normalized_distance(left, right)
ns = alg.normalized_similarity(left, right)
d = alg.distance(left, right)
s = alg.similarity(left, right)
assert (nd < ns) == (d < s)
@pytest.mark.parametrize('alg', ALGS)
def test_no_common_chars(alg):
if alg is textdistance.editex:
return
assert alg.similarity('spam', 'qwer') == 0
@pytest.mark.parametrize('alg', ALGS)
def test_empty(alg):
assert alg.distance('', '') == 0
@pytest.mark.parametrize('alg', ALGS)
def test_unequal_distance(alg):
if alg.maximum('', 'qwertyui'):
assert alg.distance('', 'qwertyui') > 0