-
Notifications
You must be signed in to change notification settings - Fork 26
/
Copy pathdispatcher.py
151 lines (121 loc) · 4.71 KB
/
dispatcher.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#-*- coding:utf- 8 -*-
__author__ = 'huzhicheng'
import Queue
import time
import urllib2
import urllib
import cookielib
import settings
import threading
import logging
import os
import re
from bs4 import BeautifulSoup
import spiderWorker
import codecs
import sys
LOG_FILENAME = "/".join([os.path.dirname(__file__),"logon.log"])
logger = logging.getLogger()
handler = logging.FileHandler(LOG_FILENAME)
logger.addHandler(handler)
logger.setLevel(logging.DEBUG)
class Helper(threading.Thread):
def __init__(self):
super(Helper,self).__init__()
def run(self):
if not self.login():
print "登录失败"
return False
queue = Queue.Queue()
pool = self.buildThreads(queue)
req = urllib2.urlopen(settings.musiclistUrl)
content = req.read()
soup = BeautifulSoup(content)
tags = soup.find_all("div",class_="song-item")
for tag in tags:
span = tag.find("span",class_="song-title")
href = span.a["href"]
if href.startswith("/song"):
queue.put(href)
for worker in pool:
queue.put(u"quit")
for worker in pool:
worker.join()
def buildThreads(self,queue):
workers = []
for i in range(9):
worker = spiderWorker.worker(queue,"T"+str(i))
worker.start()
workers.append(worker)
return workers
def login(self):
cj = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar=cj))
urllib2.install_opener(opener)
print "Get Cookie"
logger.info("Get Cookie")
url = "http://www.baidu.com"
request = urllib2.Request(url)
openRequest = urllib2.urlopen(request)
for index,cookie in enumerate(cj):
print "[%s]:%s\r\n" % (index,cookie)
logger.info("{0}:{1}".format(index,cookie))
print "Get token"
logger.info("Get token")
tokenUrl = "https://passport.baidu.com/v2/api/?getapi&class=login&tpl=mn&tangram=true"
tokenRequest = urllib2.urlopen(tokenUrl)
tokenHtml = tokenRequest.read()
#reg = re.compile(r"bdPass.api.params.login_token='\w+';")
reg = re.compile(r"bdPass.api.params.login_token='(?P<tokenVal>\w+)';")
token = reg.findall(tokenHtml)
if token:
tokenVal = token[0]
print tokenVal
logger.info(tokenVal)
print "登录成功"
logger.info("登录成功")
baiduMainLoginUrl = "https://passport.baidu.com/v2/api/?login"
print settings.username
userInfo = {"userName":settings.username,"passWord":settings.password}
postData =self.BuildPostData(tokenVal,userInfo)
userAgent ="Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.76 Safari/537.36"
#userAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko"
baiduRequest = urllib2.Request(baiduMainLoginUrl,postData,headers={"User-Agent":userAgent})
openBaiduRequest = urllib2.urlopen(baiduRequest)
baiduPage = openBaiduRequest.read()
personCenter = urllib2.urlopen("http://i.baidu.com/")
if personCenter.read().find("header-tuc-uname")>-1:
print "Login OK Congratulation!"
logger.info("Login OK Congratulation!")
else:
print "Login fail"
logger.info("Login fail")
return False
#urlRe = re.compile("encodeURI\\('(?P<fzUrl>\s+)'\\);")
urlRe = re.compile(r"(?P<URL>http://www.baidu.com/cache/user/html/jump.html\S+)'")
#TODO:
#urlRe = re.compile("window.location.replace\\(\\)")
lastUrl = urlRe.findall(baiduPage)
if lastUrl:
print lastUrl[0]
trueLoginUrl = lastUrl[0]
lastRequest = urllib2.Request(trueLoginUrl)
openLastRequest = urllib2.urlopen(trueLoginUrl)
print openLastRequest.geturl()
logger.info("真正登录地址:{0}".format(openLastRequest.geturl()))
return True
def BuildPostData(self,token,userInfo):
jumppage = "http://www.baidu.com/cache/user/html/jump.html"
postDict = {
'charset':"utf-8",
'token':token, #de3dbf1e8596642fa2ddf2921cd6257f
'isPhone':"false",
'index':"0",
'staticpage':jumppage, #http%3A%2F%2Fwww.baidu.com%2Fcache%2Fuser%2Fhtml%2Fjump.html
'loginType':"1",
'tpl':"mn",
'callback':"parent.bdPass.api.login._postCallback",
'username':userInfo["userName"],
'password':userInfo["passWord"],
'mem_pass':"on",}
return urllib.urlencode(postDict)