|
| 1 | +#!/usr/bin/env python |
| 2 | +# encoding: utf-8 |
| 3 | +import requests |
| 4 | +from lxml import etree |
| 5 | +from StringIO import StringIO |
| 6 | +import re, sys, os |
| 7 | +from ConfigParser import ConfigParser |
| 8 | + |
| 9 | +reload(sys) |
| 10 | +sys.setdefaultencoding('utf-8') |
| 11 | + |
| 12 | +char_map = {'\u000D':'\r', '\u000A':'\n',"\u0022":'"', '\u002C':',', |
| 13 | + "\u002D":'-','\u003B':';', '\u003D':'=',"\u003C":'<', "\u003E":'>'} |
| 14 | +BASE_URL = "https://leetcode.com" |
| 15 | + |
| 16 | +def getConfig(): |
| 17 | + config = ConfigParser() |
| 18 | + config.read('config.ini') |
| 19 | + usr = config.get('info', 'user') |
| 20 | + passwd = config.get('info', 'password') |
| 21 | + mode = config.get('mode', 'mode') |
| 22 | + if not usr or not passwd: |
| 23 | + print "Please spcify your login info at file config.ini" |
| 24 | + exit() |
| 25 | + if mode not in ['hard', 'simple']: |
| 26 | + print "Pelase choose your mode from hard and simple" |
| 27 | + exit() |
| 28 | + return usr, passwd, mode |
| 29 | + |
| 30 | +def login(): |
| 31 | + usr, passwd, mode = getConfig() |
| 32 | + login_url = "https://leetcode.com/accounts/login/" |
| 33 | + s = requests.session() |
| 34 | + resp = s.get(login_url) |
| 35 | + csrftoken = dict(resp.cookies)['csrftoken'] |
| 36 | + payload = { |
| 37 | +# 'action':'/accounts/login/', |
| 38 | + 'login':usr, |
| 39 | + 'password':passwd, |
| 40 | + 'csrfmiddlewaretoken':csrftoken, |
| 41 | + } |
| 42 | + header = { |
| 43 | + 'X-Request-With':'XMLHttpRequest', |
| 44 | + 'Connection':'Keep-Alive', |
| 45 | + 'Accept':'text/html, application/xhtml+xml, */*', |
| 46 | + 'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0;rv:11.0) like Gecko', |
| 47 | + 'Accept-Encoding':'gzip, deflate', |
| 48 | + 'Referer':'https://leetcode.com/accounts/login/' |
| 49 | + } |
| 50 | + |
| 51 | + req = s.post( login_url, data=payload, headers=header) |
| 52 | + fh = open("index.html", "wb") |
| 53 | + fh.write(req.text.encode("utf-8")) |
| 54 | + fh.close() |
| 55 | + |
| 56 | + return s, req.text |
| 57 | + |
| 58 | +def extractQuestion(html): |
| 59 | + parser = etree.HTMLParser() |
| 60 | + tree = etree.parse(StringIO(html.encode('utf-8')), parser) |
| 61 | + trNodes = tree.findall(".//tr") |
| 62 | + allProblems = [] |
| 63 | + solvedProblems = [] |
| 64 | + for ele in trNodes: |
| 65 | + if len(ele) != 5: |
| 66 | + continue |
| 67 | + if ele[0].tag != 'td': |
| 68 | + continue |
| 69 | + status = ele[0][0].get('class') |
| 70 | + id = ele[1].text |
| 71 | + href = ele[2][0].get('href') |
| 72 | + title = ele[2][0].text |
| 73 | + ac_rate = ele[3].text |
| 74 | + level = ele[4].text |
| 75 | + |
| 76 | + allProblems.append(Problem(status, id, |
| 77 | + href, title, ac_rate, level)) |
| 78 | + if status != "None": |
| 79 | + solvedProblems.append(Problem(status, id, |
| 80 | + href, title, ac_rate, level)) |
| 81 | + return allProblems, solvedProblems |
| 82 | + |
| 83 | +def getSubmissionResults(problem, session): |
| 84 | + link = BASE_URL + problem.href + "submissions" |
| 85 | + resp = session.get(link) |
| 86 | + trNodes = findAllElements(resp, 'tr') |
| 87 | + table_nodes= findAllElements(resp, 'table') |
| 88 | + codeLinks = [] |
| 89 | + for trNode in trNodes: |
| 90 | + if len(trNode)!= 5: |
| 91 | + continue |
| 92 | + if trNode[0].tag != 'td': |
| 93 | + continue |
| 94 | + codeLink = trNode[2][0].get('href') |
| 95 | + codeLinks.append(codeLink) |
| 96 | + return codeLinks, table_nodes |
| 97 | + |
| 98 | +def findAllElements(response, ele_str): |
| 99 | + html = response.text.encode('utf-8') |
| 100 | + parser = etree.HTMLParser() |
| 101 | + tree = etree.parse(StringIO(html), parser) |
| 102 | + trNodes = tree.findall(".//"+ele_str) |
| 103 | + return trNodes |
| 104 | + |
| 105 | +def getResultTable(table_nodes): |
| 106 | + if len(table_nodes) == 0: |
| 107 | + return "" |
| 108 | + return etree.tostring(table_nodes[0], pretty_print=True) |
| 109 | + |
| 110 | +def getHTMLHead(): |
| 111 | + resp = requests.get('https://leetcode.com') |
| 112 | + head_nodes = findAllElements(resp, 'head') |
| 113 | + head_html = etree.tostring(head_nodes[0], pretty_print=True) |
| 114 | + return head_html |
| 115 | + |
| 116 | + |
| 117 | + |
| 118 | + |
| 119 | +def createReport(): |
| 120 | + fh = open('report.html', 'w') |
| 121 | + fh.write("<html>") |
| 122 | + fh.write("""<head> |
| 123 | + <link href="http://maxcdn.bootstrapcdn.com/bootstrap/3.2.0/css/bootstrap.min.css" rel="stylesheet"/> |
| 124 | + <link href="http://maxcdn.bootstrapcdn.com/font-awesome/4.2.0/css/font-awesome.min.css" rel="stylesheet"/> |
| 125 | + <link href="https://leetcode.com/static/animate/animate.css" rel="stylesheet"/> |
| 126 | + </head> |
| 127 | + """) |
| 128 | + fh.write(""" <body> |
| 129 | + <table class="table table-hover table-bordered table-striped" id="result_testcases" ng-app="statusPollerApp"> |
| 130 | + """) |
| 131 | + fh.close() |
| 132 | +def addTable2Report(problem, file, tableHTML): |
| 133 | + fh = open(file, 'a') |
| 134 | + fh.write("<thead><tr>" +str(problem.id) + " " + problem.title + "</tr></thead>") |
| 135 | + fh.write(tableHTML) |
| 136 | + fh.close() |
| 137 | + |
| 138 | + |
| 139 | + |
| 140 | +def getCode(session, link): |
| 141 | + resp = session.get(link) |
| 142 | + scriptNodes = findAllElements(resp, 'script') |
| 143 | + if not scriptNodes: |
| 144 | + return "" |
| 145 | + prog = pattern() |
| 146 | + for script in scriptNodes: |
| 147 | + if not script.text: |
| 148 | + continue |
| 149 | + result = prog.search(script.text) |
| 150 | + if result: |
| 151 | + return result.group(1), result.group(2) |
| 152 | + |
| 153 | +def formatCode(code, char_map): |
| 154 | + code = code.replace('\u000D\u000A', '\n') |
| 155 | + for key in char_map: |
| 156 | + if key in code: |
| 157 | + code = code.replace(key, char_map[key]) |
| 158 | + return code |
| 159 | + |
| 160 | +def writeCode2file(lang, code, id, title, no, location): |
| 161 | + format_map = {'python':'.py', 'java':'.java', 'cpp':'cpp'} |
| 162 | + ext = format_map[lang] |
| 163 | + file_name= str(id) + "_"+title.replace(" ", "_") + "_" + str(no) + ext |
| 164 | + file = os.path.join(location, file_name) |
| 165 | + fh = open(file, 'w') |
| 166 | + fh.write(code) |
| 167 | + fh.close() |
| 168 | + |
| 169 | +def makeDir(dirName): |
| 170 | + if not os.path.exists(os.path.join(os.getcwd(),dirName)): |
| 171 | + os.makedirs(os.path.join(os.getcwd(), dirName)) |
| 172 | + |
| 173 | +def pattern(): |
| 174 | + prog = re.compile("storage\.put\('(python|java|cpp)', '(.+)'\);.+storage\.put", re.DOTALL) |
| 175 | + return prog |
| 176 | + |
| 177 | + |
| 178 | +class Problem: |
| 179 | + def __init__(self, status, id, href, title, ac_rate, level): |
| 180 | + self.status = status |
| 181 | + self.id = id |
| 182 | + self.href = href |
| 183 | + self.title = title |
| 184 | + self.ac_rate = ac_rate |
| 185 | + self.level = level |
| 186 | + |
| 187 | + |
| 188 | +def main(): |
| 189 | + session, html = login() |
| 190 | + createReport() |
| 191 | + makeDir('solutions') |
| 192 | + allProblems, solvedProblems = extractQuestion(html) |
| 193 | + for problem in solvedProblems: |
| 194 | + codeLinks, table_nodes = getSubmissionResults(problem, session) |
| 195 | + addTable2Report(problem, 'report.html', getResultTable(table_nodes)) |
| 196 | + count=0 |
| 197 | + for link in codeLinks: |
| 198 | + count += 1 |
| 199 | + lang, code = getCode(session, BASE_URL + link) |
| 200 | + code = formatCode(code, char_map) |
| 201 | + writeCode2file(lang, code, problem.id, problem.title, count, 'solutions') |
| 202 | + print "Done with problem " + problem.title |
| 203 | + print |
| 204 | + fh=open('report.html', 'a') |
| 205 | + fh.write('</table></body></html>') |
| 206 | + fh.close() |
| 207 | + |
| 208 | +if __name__ == "__main__": |
| 209 | + main() |
0 commit comments