|
| 1 | +{ |
| 2 | + "cells": [ |
| 3 | + { |
| 4 | + "cell_type": "markdown", |
| 5 | + "metadata": {}, |
| 6 | + "source": [ |
| 7 | + "# Implementation result of article : Trace Clustering based on Conserved Patterns: Towards Achieving Better Process Models" |
| 8 | + ] |
| 9 | + }, |
| 10 | + { |
| 11 | + "cell_type": "markdown", |
| 12 | + "metadata": {}, |
| 13 | + "source": [ |
| 14 | + "# Maximal Pair" |
| 15 | + ] |
| 16 | + }, |
| 17 | + { |
| 18 | + "cell_type": "code", |
| 19 | + "execution_count": 77, |
| 20 | + "metadata": {}, |
| 21 | + "outputs": [ |
| 22 | + { |
| 23 | + "name": "stdout", |
| 24 | + "output_type": "stream", |
| 25 | + "text": [ |
| 26 | + "MPS [{'a': 3, 'b': 3, 'bc': 2, 'bcd': 2, 'c': 2, 'cd': 2, 'd': 2}, {'d': 2, 'da': 2, 'dab': 2, 'dabc': 2, 'a': 2, 'ab': 2, 'abc': 2, 'b': 4, 'bc': 2, 'c': 2}, {'b': 6, 'bb': 4, 'bbb': 2, 'bbbc': 2, 'bbc': 2, 'bc': 2, 'c': 3, 'a': 2}, {'a': 4, 'aa': 2, 'b': 2, 'c': 3, 'cc': 2}, {'a': 4, 'aa': 2, 'c': 6, 'cd': 2, 'cdc': 2, 'd': 6, 'dc': 3, 'b': 5, 'cb': 2, 'db': 2, 'e': 2, 'bd': 2}]\n" |
| 27 | + ] |
| 28 | + } |
| 29 | + ], |
| 30 | + "source": [ |
| 31 | + "#dictionnary of Maximal Pair\n", |
| 32 | + "MPS = []\n", |
| 33 | + "Voisinages = []\n", |
| 34 | + "Log = [\"faabcdbbcda\", \"dabcdabcbb\", \"bbbcdbbbccaa\", \"aaadabbccc\", \"aaacdcdcbedbccbadbdebdcg\"]\n", |
| 35 | + "\n", |
| 36 | + "for Trace in Log:\n", |
| 37 | + " MP = dict()\n", |
| 38 | + " Seqs = []\n", |
| 39 | + " k = 0\n", |
| 40 | + " Voisinage = []\n", |
| 41 | + " for i in range(len(Trace)):\n", |
| 42 | + " for j in range(len(Trace)):\n", |
| 43 | + " seq = Trace[i:j+1]\n", |
| 44 | + " Seqs.append(seq)\n", |
| 45 | + " if(j+1>=len(Trace)):\n", |
| 46 | + " vd = \"\"\n", |
| 47 | + " else:\n", |
| 48 | + " vd = Trace[j+1]\n", |
| 49 | + " if(i-1<0):\n", |
| 50 | + " vg = \"\"\n", |
| 51 | + " else: \n", |
| 52 | + " vg = Trace[i-1]\n", |
| 53 | + " if(seq!=\"\"):\n", |
| 54 | + " Voisinage.append([seq])\n", |
| 55 | + " Voisinage[k].append([vg , vd])\n", |
| 56 | + " k+= 1\n", |
| 57 | + " if(Seqs.count(seq)>=2 and seq!=\"\"):\n", |
| 58 | + " MP[seq] = int(Seqs.count(seq))\n", |
| 59 | + " MPS.append(MP)\n", |
| 60 | + " Voisinages.append(Voisinage)\n", |
| 61 | + "\n", |
| 62 | + "print(\"MPS\", MPS)" |
| 63 | + ] |
| 64 | + }, |
| 65 | + { |
| 66 | + "cell_type": "markdown", |
| 67 | + "metadata": {}, |
| 68 | + "source": [ |
| 69 | + "# Maximal Repeat" |
| 70 | + ] |
| 71 | + }, |
| 72 | + { |
| 73 | + "cell_type": "code", |
| 74 | + "execution_count": 78, |
| 75 | + "metadata": {}, |
| 76 | + "outputs": [ |
| 77 | + { |
| 78 | + "name": "stdout", |
| 79 | + "output_type": "stream", |
| 80 | + "text": [ |
| 81 | + "MRS [{'a', 'b', 'bcd'}, {'dabc', 'b'}, {'bb', 'bbbc', 'a', 'b', 'c'}, {'a', 'b', 'c', 'aa', 'cc'}, {'bd', 'cdc', 'e', 'a', 'd', 'b', 'db', 'c', 'aa', 'dc', 'cb'}]\n" |
| 82 | + ] |
| 83 | + } |
| 84 | + ], |
| 85 | + "source": [ |
| 86 | + "MRS = []\n", |
| 87 | + "l = 0 \n", |
| 88 | + "for MP in MPS:\n", |
| 89 | + " MR = {k for k, v in MP.items() if v == MP[max(MP, key=MP.get)]}\n", |
| 90 | + " for i in range(len(Voisinages[l])):\n", |
| 91 | + " cd = True\n", |
| 92 | + " for j in range(len(Voisinages[l])):\n", |
| 93 | + " if(Voisinages[l][i][0]==Voisinages[l][j][0] and i!=j and Voisinages[l][i][0] not in MR):\n", |
| 94 | + " if(Voisinages[l][i][1][0] == Voisinages[l][j][1][0] or Voisinages[l][i][1][1] == Voisinages[l][j][1][1]):\n", |
| 95 | + " break\n", |
| 96 | + " MR.add(Voisinages[l][i][0])\n", |
| 97 | + " MRS.append(MR)\n", |
| 98 | + " l+=1\n", |
| 99 | + "\n", |
| 100 | + "print(\"MRS\",MRS)" |
| 101 | + ] |
| 102 | + }, |
| 103 | + { |
| 104 | + "cell_type": "markdown", |
| 105 | + "metadata": {}, |
| 106 | + "source": [ |
| 107 | + "# Super Maximal Repeat" |
| 108 | + ] |
| 109 | + }, |
| 110 | + { |
| 111 | + "cell_type": "code", |
| 112 | + "execution_count": 79, |
| 113 | + "metadata": {}, |
| 114 | + "outputs": [ |
| 115 | + { |
| 116 | + "name": "stdout", |
| 117 | + "output_type": "stream", |
| 118 | + "text": [ |
| 119 | + "SMRS [{'a', 'bcd'}, {'dabc'}, {'bbbc', 'a'}, {'aa', 'b', 'cc'}, {'bd', 'cdc', 'e', 'db', 'aa', 'cb'}]\n" |
| 120 | + ] |
| 121 | + } |
| 122 | + ], |
| 123 | + "source": [ |
| 124 | + "SMRS = []\n", |
| 125 | + "for e in MRS:\n", |
| 126 | + " SMR = e.copy()\n", |
| 127 | + " for i in e:\n", |
| 128 | + " for j in e:\n", |
| 129 | + " if(i!=j):\n", |
| 130 | + " if(i in j and i in SMR):\n", |
| 131 | + " SMR.remove(i)\n", |
| 132 | + " SMRS.append(SMR)\n", |
| 133 | + " \n", |
| 134 | + "print(\"SMRS\",SMRS)" |
| 135 | + ] |
| 136 | + }, |
| 137 | + { |
| 138 | + "cell_type": "markdown", |
| 139 | + "metadata": {}, |
| 140 | + "source": [ |
| 141 | + "# Near Super Maximal Repeat" |
| 142 | + ] |
| 143 | + }, |
| 144 | + { |
| 145 | + "cell_type": "code", |
| 146 | + "execution_count": 81, |
| 147 | + "metadata": {}, |
| 148 | + "outputs": [ |
| 149 | + { |
| 150 | + "name": "stdout", |
| 151 | + "output_type": "stream", |
| 152 | + "text": [ |
| 153 | + "NSMR [{'a', 'b', 'bcd'}, {'dabc', 'b'}, {'bbbc', 'a', 'c'}, {'a', 'b', 'c', 'aa', 'cc'}, {'bd', 'a', 'cdc', 'e', 'd', 'db', 'c', 'aa', 'dc', 'cb'}]\n" |
| 154 | + ] |
| 155 | + } |
| 156 | + ], |
| 157 | + "source": [ |
| 158 | + "NSMR = []\n", |
| 159 | + "l = 0 \n", |
| 160 | + "for Trace in Log:\n", |
| 161 | + " nt = Trace\n", |
| 162 | + " NSM = SMRS[l].copy()\n", |
| 163 | + " for i in SMRS[l]:\n", |
| 164 | + " nt = nt.replace(i,\"\")\n", |
| 165 | + " for i in MRS[l]:\n", |
| 166 | + " if(i in nt):\n", |
| 167 | + " NSM.add(i)\n", |
| 168 | + " NSMR.append(NSM)\n", |
| 169 | + " l+=1\n", |
| 170 | + "\n", |
| 171 | + "print(\"NSMR\",NSMR)" |
| 172 | + ] |
| 173 | + }, |
| 174 | + { |
| 175 | + "cell_type": "code", |
| 176 | + "execution_count": 190, |
| 177 | + "metadata": {}, |
| 178 | + "outputs": [ |
| 179 | + { |
| 180 | + "name": "stdout", |
| 181 | + "output_type": "stream", |
| 182 | + "text": [ |
| 183 | + "---------------------------------------------------------------------------------------------------------------------------\n", |
| 184 | + "|ID| Trace | Maximal Repeat Set | Super Maximal Repeat Set | Near Super Maximal Repeat Set |\n", |
| 185 | + "|--+-----------------------+-----------------------------+---------------------------+-------------------------------------|\n", |
| 186 | + "|T1| faabcdbbcda | {'a', 'b', 'bcd'} | {'a', 'bcd'} | {'a', 'b', 'bcd'} |\n", |
| 187 | + "|T2| dabcdabcbb | {'dabc', 'b'} | {'dabc'} | {'dabc', 'b'} |\n", |
| 188 | + "|T3| bbbcdbbbccaa |{'bb', 'bbbc', 'a', 'b', 'c'}| {'bbbc', 'a'} | {'bbbc', 'a', 'c'} |\n", |
| 189 | + "|T4| aaadabbccc | {'a', 'b', 'c', 'aa', 'cc'} | {'aa', 'b', 'cc'} | {'a', 'b', 'c', 'aa', 'cc'} |\n", |
| 190 | + "---------------------------------------------------------------------------------------------------------------------------\n" |
| 191 | + ] |
| 192 | + } |
| 193 | + ], |
| 194 | + "source": [ |
| 195 | + "print(\"---------------------------------------------------------------------------------------------------------------------------\")\n", |
| 196 | + "print(\"|ID| Trace | Maximal Repeat Set | Super Maximal Repeat Set | Near Super Maximal Repeat Set |\")\n", |
| 197 | + "print(\"|--+-----------------------+-----------------------------+---------------------------+-------------------------------------|\")\n", |
| 198 | + "print(\"|\", end=\"\")\n", |
| 199 | + "print(\"T1| \"+str(Log[0])+\" | \"+str(MRS[0])+\" | \"+str(SMRS[0])+\" | \"+str(NSMR[0])+\" |\")\n", |
| 200 | + "print(\"|\", end=\"\")\n", |
| 201 | + "print(\"T2| \"+str(Log[1])+\" | \"+str(MRS[1])+\" | \"+str(SMRS[1])+\" | \"+str(NSMR[1])+\" |\")\n", |
| 202 | + "print(\"|\", end=\"\") \n", |
| 203 | + "print(\"T3| \"+str(Log[2])+\" |\"+str(MRS[2])+\"| \"+str(SMRS[2])+\" | \"+str(NSMR[2])+\" |\")\n", |
| 204 | + "print(\"|\", end=\"\")\n", |
| 205 | + "print(\"T4| \"+str(Log[3])+\" | \"+str(MRS[3])+\" | \"+str(SMRS[3])+\" | \"+str(NSMR[3])+\" |\")\n", |
| 206 | + "print(\"---------------------------------------------------------------------------------------------------------------------------\")\n", |
| 207 | + "\n", |
| 208 | + "#print(\"T5|\"+str(Log[4])+\"| \"+str(MRS[4])+\"|\"+str(SMRS[4])+\" | \"+str(NSMR[4])+\"|\")\n" |
| 209 | + ] |
| 210 | + } |
| 211 | + ], |
| 212 | + "metadata": { |
| 213 | + "kernelspec": { |
| 214 | + "display_name": "Python 3", |
| 215 | + "language": "python", |
| 216 | + "name": "python3" |
| 217 | + }, |
| 218 | + "language_info": { |
| 219 | + "codemirror_mode": { |
| 220 | + "name": "ipython", |
| 221 | + "version": 3 |
| 222 | + }, |
| 223 | + "file_extension": ".py", |
| 224 | + "mimetype": "text/x-python", |
| 225 | + "name": "python", |
| 226 | + "nbconvert_exporter": "python", |
| 227 | + "pygments_lexer": "ipython3", |
| 228 | + "version": "3.7.3" |
| 229 | + } |
| 230 | + }, |
| 231 | + "nbformat": 4, |
| 232 | + "nbformat_minor": 2 |
| 233 | +} |
0 commit comments