-
Notifications
You must be signed in to change notification settings - Fork 254
/
Copy pathtextdistance_fuzzer.py
116 lines (90 loc) · 3.71 KB
/
textdistance_fuzzer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import itertools
import sys
from collections import defaultdict
from typing import List, Dict, Type
import atheris
from fuzz_helpers import EnhancedFuzzedDataProvider
from dataclasses import dataclass, field
with atheris.instrument_imports():
import textdistance
@dataclass
class InitializationConstraints:
"""
Tracks if a given class has the qval and external construction parameters
"""
HAS_QVAL: bool = field(default=True)
HAS_EXTERNAL: bool = field(default=True)
@dataclass
class FuzzTarget:
"""
Defines a class and method that is a possible fuzz candidate
"""
algo_cls: type
fuzz_func_name: str
ALGORITHMS = [textdistance.Hamming, textdistance.Bag, textdistance.Gotoh, textdistance.MLIPNS, textdistance.Levenshtein,
textdistance.DamerauLevenshtein, textdistance.Jaro, textdistance.JaroWinkler, textdistance.StrCmp95,
textdistance.NeedlemanWunsch,
textdistance.SmithWaterman, textdistance.Jaccard, textdistance.Sorensen,
textdistance.Tversky, textdistance.Overlap, textdistance.Cosine, textdistance.Tanimoto,
textdistance.MongeElkan,
textdistance.LCSSeq, textdistance.LCSStr, textdistance.RatcliffObershelp, textdistance.ArithNCD,
textdistance.RLENCD,
textdistance.BWTRLENCD, textdistance.SqrtNCD, textdistance.BZ2NCD, textdistance.LZMANCD,
textdistance.ZLIBNCD, textdistance.MRA, textdistance.Editex, textdistance.Prefix, textdistance.Length,
textdistance.Identity,
textdistance.Matrix]
FUZZ_METHODS = ["__call__", "distance", "similarity", "normalized_distance", "normalized_similarity"]
FUZZ_TARGETS: List[FuzzTarget] = []
CONSTRAINT_MEMORY: Dict[Type, InitializationConstraints] = defaultdict(InitializationConstraints)
def initialize_fuzz_options():
"""
Initializes a cross-product of valid fuzzing targets and methods
"""
global FUZZ_TARGETS
FUZZ_TARGETS = [FuzzTarget(algo, func) for algo, func in itertools.product(ALGORITHMS, FUZZ_METHODS) if
hasattr(algo, func)]
def pick_qval(fdp: EnhancedFuzzedDataProvider):
"""
Let atheris pick a qval to use for this current iteration (None, 1, or 2+)
"""
if fdp.ConsumeBool():
return fdp.ConsumeIntInRange(1, 100)
else:
return None
def TestOneInput(data):
fdp = EnhancedFuzzedDataProvider(data)
# Pick a target
fuzz_target: FuzzTarget = fdp.PickValueInList(FUZZ_TARGETS)
constraints = CONSTRAINT_MEMORY[fuzz_target.algo_cls]
try:
if constraints.HAS_QVAL and constraints.HAS_EXTERNAL:
algo = fuzz_target.algo_cls(qval=pick_qval(fdp), external=False)
elif constraints.HAS_QVAL:
algo = fuzz_target.algo_cls(qval=pick_qval(fdp))
elif constraints.HAS_EXTERNAL:
algo = fuzz_target.algo_cls(external=False)
else:
algo = fuzz_target.algo_cls()
except TypeError as e:
# Update our memory on if a given parameter is invalid
if 'qval' in str(e):
constraints.HAS_QVAL = False
elif 'external' in str(e):
constraints.HAS_EXTERNAL = False
return -1
try:
getattr(algo, fuzz_target.fuzz_func_name)(fdp.ConsumeRandomString(), fdp.ConsumeRandomString())
except AttributeError as e:
# Pops too often, just catch and ignore
if 'split' in str(e):
return -1
except ImportError:
# Remove this algorithm from the list, since we don't have pre-reqs to use it
FUZZ_TARGETS.remove(fuzz_target)
return -1
def main():
initialize_fuzz_options()
atheris.Setup(sys.argv, TestOneInput)
atheris.Fuzz()
if __name__ == "__main__":
main()