Skip to content

Commit c77b3fc

Browse files
Add files via upload
1 parent 2e1db56 commit c77b3fc

File tree

1 file changed

+233
-0
lines changed

1 file changed

+233
-0
lines changed

Trace-Clustering-with-Python.ipynb

+233
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"# Implementation result of article : Trace Clustering based on Conserved Patterns: Towards Achieving Better Process Models"
8+
]
9+
},
10+
{
11+
"cell_type": "markdown",
12+
"metadata": {},
13+
"source": [
14+
"# Maximal Pair"
15+
]
16+
},
17+
{
18+
"cell_type": "code",
19+
"execution_count": 77,
20+
"metadata": {},
21+
"outputs": [
22+
{
23+
"name": "stdout",
24+
"output_type": "stream",
25+
"text": [
26+
"MPS [{'a': 3, 'b': 3, 'bc': 2, 'bcd': 2, 'c': 2, 'cd': 2, 'd': 2}, {'d': 2, 'da': 2, 'dab': 2, 'dabc': 2, 'a': 2, 'ab': 2, 'abc': 2, 'b': 4, 'bc': 2, 'c': 2}, {'b': 6, 'bb': 4, 'bbb': 2, 'bbbc': 2, 'bbc': 2, 'bc': 2, 'c': 3, 'a': 2}, {'a': 4, 'aa': 2, 'b': 2, 'c': 3, 'cc': 2}, {'a': 4, 'aa': 2, 'c': 6, 'cd': 2, 'cdc': 2, 'd': 6, 'dc': 3, 'b': 5, 'cb': 2, 'db': 2, 'e': 2, 'bd': 2}]\n"
27+
]
28+
}
29+
],
30+
"source": [
31+
"#dictionnary of Maximal Pair\n",
32+
"MPS = []\n",
33+
"Voisinages = []\n",
34+
"Log = [\"faabcdbbcda\", \"dabcdabcbb\", \"bbbcdbbbccaa\", \"aaadabbccc\", \"aaacdcdcbedbccbadbdebdcg\"]\n",
35+
"\n",
36+
"for Trace in Log:\n",
37+
" MP = dict()\n",
38+
" Seqs = []\n",
39+
" k = 0\n",
40+
" Voisinage = []\n",
41+
" for i in range(len(Trace)):\n",
42+
" for j in range(len(Trace)):\n",
43+
" seq = Trace[i:j+1]\n",
44+
" Seqs.append(seq)\n",
45+
" if(j+1>=len(Trace)):\n",
46+
" vd = \"\"\n",
47+
" else:\n",
48+
" vd = Trace[j+1]\n",
49+
" if(i-1<0):\n",
50+
" vg = \"\"\n",
51+
" else: \n",
52+
" vg = Trace[i-1]\n",
53+
" if(seq!=\"\"):\n",
54+
" Voisinage.append([seq])\n",
55+
" Voisinage[k].append([vg , vd])\n",
56+
" k+= 1\n",
57+
" if(Seqs.count(seq)>=2 and seq!=\"\"):\n",
58+
" MP[seq] = int(Seqs.count(seq))\n",
59+
" MPS.append(MP)\n",
60+
" Voisinages.append(Voisinage)\n",
61+
"\n",
62+
"print(\"MPS\", MPS)"
63+
]
64+
},
65+
{
66+
"cell_type": "markdown",
67+
"metadata": {},
68+
"source": [
69+
"# Maximal Repeat"
70+
]
71+
},
72+
{
73+
"cell_type": "code",
74+
"execution_count": 78,
75+
"metadata": {},
76+
"outputs": [
77+
{
78+
"name": "stdout",
79+
"output_type": "stream",
80+
"text": [
81+
"MRS [{'a', 'b', 'bcd'}, {'dabc', 'b'}, {'bb', 'bbbc', 'a', 'b', 'c'}, {'a', 'b', 'c', 'aa', 'cc'}, {'bd', 'cdc', 'e', 'a', 'd', 'b', 'db', 'c', 'aa', 'dc', 'cb'}]\n"
82+
]
83+
}
84+
],
85+
"source": [
86+
"MRS = []\n",
87+
"l = 0 \n",
88+
"for MP in MPS:\n",
89+
" MR = {k for k, v in MP.items() if v == MP[max(MP, key=MP.get)]}\n",
90+
" for i in range(len(Voisinages[l])):\n",
91+
" cd = True\n",
92+
" for j in range(len(Voisinages[l])):\n",
93+
" if(Voisinages[l][i][0]==Voisinages[l][j][0] and i!=j and Voisinages[l][i][0] not in MR):\n",
94+
" if(Voisinages[l][i][1][0] == Voisinages[l][j][1][0] or Voisinages[l][i][1][1] == Voisinages[l][j][1][1]):\n",
95+
" break\n",
96+
" MR.add(Voisinages[l][i][0])\n",
97+
" MRS.append(MR)\n",
98+
" l+=1\n",
99+
"\n",
100+
"print(\"MRS\",MRS)"
101+
]
102+
},
103+
{
104+
"cell_type": "markdown",
105+
"metadata": {},
106+
"source": [
107+
"# Super Maximal Repeat"
108+
]
109+
},
110+
{
111+
"cell_type": "code",
112+
"execution_count": 79,
113+
"metadata": {},
114+
"outputs": [
115+
{
116+
"name": "stdout",
117+
"output_type": "stream",
118+
"text": [
119+
"SMRS [{'a', 'bcd'}, {'dabc'}, {'bbbc', 'a'}, {'aa', 'b', 'cc'}, {'bd', 'cdc', 'e', 'db', 'aa', 'cb'}]\n"
120+
]
121+
}
122+
],
123+
"source": [
124+
"SMRS = []\n",
125+
"for e in MRS:\n",
126+
" SMR = e.copy()\n",
127+
" for i in e:\n",
128+
" for j in e:\n",
129+
" if(i!=j):\n",
130+
" if(i in j and i in SMR):\n",
131+
" SMR.remove(i)\n",
132+
" SMRS.append(SMR)\n",
133+
" \n",
134+
"print(\"SMRS\",SMRS)"
135+
]
136+
},
137+
{
138+
"cell_type": "markdown",
139+
"metadata": {},
140+
"source": [
141+
"# Near Super Maximal Repeat"
142+
]
143+
},
144+
{
145+
"cell_type": "code",
146+
"execution_count": 81,
147+
"metadata": {},
148+
"outputs": [
149+
{
150+
"name": "stdout",
151+
"output_type": "stream",
152+
"text": [
153+
"NSMR [{'a', 'b', 'bcd'}, {'dabc', 'b'}, {'bbbc', 'a', 'c'}, {'a', 'b', 'c', 'aa', 'cc'}, {'bd', 'a', 'cdc', 'e', 'd', 'db', 'c', 'aa', 'dc', 'cb'}]\n"
154+
]
155+
}
156+
],
157+
"source": [
158+
"NSMR = []\n",
159+
"l = 0 \n",
160+
"for Trace in Log:\n",
161+
" nt = Trace\n",
162+
" NSM = SMRS[l].copy()\n",
163+
" for i in SMRS[l]:\n",
164+
" nt = nt.replace(i,\"\")\n",
165+
" for i in MRS[l]:\n",
166+
" if(i in nt):\n",
167+
" NSM.add(i)\n",
168+
" NSMR.append(NSM)\n",
169+
" l+=1\n",
170+
"\n",
171+
"print(\"NSMR\",NSMR)"
172+
]
173+
},
174+
{
175+
"cell_type": "code",
176+
"execution_count": 190,
177+
"metadata": {},
178+
"outputs": [
179+
{
180+
"name": "stdout",
181+
"output_type": "stream",
182+
"text": [
183+
"---------------------------------------------------------------------------------------------------------------------------\n",
184+
"|ID| Trace | Maximal Repeat Set | Super Maximal Repeat Set | Near Super Maximal Repeat Set |\n",
185+
"|--+-----------------------+-----------------------------+---------------------------+-------------------------------------|\n",
186+
"|T1| faabcdbbcda | {'a', 'b', 'bcd'} | {'a', 'bcd'} | {'a', 'b', 'bcd'} |\n",
187+
"|T2| dabcdabcbb | {'dabc', 'b'} | {'dabc'} | {'dabc', 'b'} |\n",
188+
"|T3| bbbcdbbbccaa |{'bb', 'bbbc', 'a', 'b', 'c'}| {'bbbc', 'a'} | {'bbbc', 'a', 'c'} |\n",
189+
"|T4| aaadabbccc | {'a', 'b', 'c', 'aa', 'cc'} | {'aa', 'b', 'cc'} | {'a', 'b', 'c', 'aa', 'cc'} |\n",
190+
"---------------------------------------------------------------------------------------------------------------------------\n"
191+
]
192+
}
193+
],
194+
"source": [
195+
"print(\"---------------------------------------------------------------------------------------------------------------------------\")\n",
196+
"print(\"|ID| Trace | Maximal Repeat Set | Super Maximal Repeat Set | Near Super Maximal Repeat Set |\")\n",
197+
"print(\"|--+-----------------------+-----------------------------+---------------------------+-------------------------------------|\")\n",
198+
"print(\"|\", end=\"\")\n",
199+
"print(\"T1| \"+str(Log[0])+\" | \"+str(MRS[0])+\" | \"+str(SMRS[0])+\" | \"+str(NSMR[0])+\" |\")\n",
200+
"print(\"|\", end=\"\")\n",
201+
"print(\"T2| \"+str(Log[1])+\" | \"+str(MRS[1])+\" | \"+str(SMRS[1])+\" | \"+str(NSMR[1])+\" |\")\n",
202+
"print(\"|\", end=\"\") \n",
203+
"print(\"T3| \"+str(Log[2])+\" |\"+str(MRS[2])+\"| \"+str(SMRS[2])+\" | \"+str(NSMR[2])+\" |\")\n",
204+
"print(\"|\", end=\"\")\n",
205+
"print(\"T4| \"+str(Log[3])+\" | \"+str(MRS[3])+\" | \"+str(SMRS[3])+\" | \"+str(NSMR[3])+\" |\")\n",
206+
"print(\"---------------------------------------------------------------------------------------------------------------------------\")\n",
207+
"\n",
208+
"#print(\"T5|\"+str(Log[4])+\"| \"+str(MRS[4])+\"|\"+str(SMRS[4])+\" | \"+str(NSMR[4])+\"|\")\n"
209+
]
210+
}
211+
],
212+
"metadata": {
213+
"kernelspec": {
214+
"display_name": "Python 3",
215+
"language": "python",
216+
"name": "python3"
217+
},
218+
"language_info": {
219+
"codemirror_mode": {
220+
"name": "ipython",
221+
"version": 3
222+
},
223+
"file_extension": ".py",
224+
"mimetype": "text/x-python",
225+
"name": "python",
226+
"nbconvert_exporter": "python",
227+
"pygments_lexer": "ipython3",
228+
"version": "3.7.3"
229+
}
230+
},
231+
"nbformat": 4,
232+
"nbformat_minor": 2
233+
}

0 commit comments

Comments
 (0)