Skip to content

Commit 8aba092

Browse files
authored
Merge pull request #17 from FeeeeK/master
Format code and fix collectors
2 parents 4795357 + 7400904 commit 8aba092

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+1123
-1067
lines changed

async_requests.py

+33-32
Original file line numberDiff line numberDiff line change
@@ -1,45 +1,44 @@
1-
import aiohttp
21
import json
3-
from aiosocks.connector import ProxyConnector, ProxyClientRequest
2+
3+
import aiohttp
4+
from aiosocks.connector import ProxyClientRequest, ProxyConnector
45

56

67
async def get(url, **kwargs):
7-
return await request('get', url, **kwargs)
8+
return await request("get", url, **kwargs)
89

910

1011
async def post(url, data, **kwargs):
1112
if data is dict or data is str:
12-
kwargs['json'] = data
13+
kwargs["json"] = data
1314
else:
14-
kwargs['data'] = data
15+
kwargs["data"] = data
1516

16-
return await request('post', url, **kwargs)
17+
return await request("post", url, **kwargs)
1718

1819

1920
async def request(method, url, **kwargs):
2021
session_kwargs = {}
21-
if 'proxy' in kwargs and kwargs['proxy'].startswith('socks'):
22-
session_kwargs['connector'] = ProxyConnector(remote_resolve=False)
23-
session_kwargs['request_class'] = ProxyClientRequest
22+
if "proxy" in kwargs and kwargs["proxy"].startswith("socks"):
23+
session_kwargs["connector"] = ProxyConnector(remote_resolve=False)
24+
session_kwargs["request_class"] = ProxyClientRequest
2425

25-
if 'cookies'in kwargs:
26-
session_kwargs['cookies'] = kwargs['cookies']
27-
del kwargs['cookies']
26+
if "cookies" in kwargs:
27+
session_kwargs["cookies"] = kwargs["cookies"]
28+
del kwargs["cookies"]
2829

29-
if 'timeout' not in kwargs:
30-
kwargs['timeout'] = 10
30+
if "timeout" not in kwargs:
31+
kwargs["timeout"] = 10
3132

3233
# headers={'User-Agent': get_random_user_agent()}
33-
if 'headers' not in kwargs:
34-
kwargs['headers'] = {
35-
'User-Agent': get_random_user_agent()
36-
}
37-
elif 'User-Agent' not in kwargs['headers']:
38-
kwargs['headers']['User-Agent'] = get_random_user_agent()
39-
40-
if 'override_session' in kwargs:
41-
session = kwargs['override_session']
42-
del kwargs['override_session']
34+
if "headers" not in kwargs:
35+
kwargs["headers"] = {"User-Agent": get_random_user_agent()}
36+
elif "User-Agent" not in kwargs["headers"]:
37+
kwargs["headers"]["User-Agent"] = get_random_user_agent()
38+
39+
if "override_session" in kwargs:
40+
session = kwargs["override_session"]
41+
del kwargs["override_session"]
4342
async with session.request(method, url, **kwargs) as response:
4443
return await Response.from_aiohttp_response(response)
4544

@@ -57,22 +56,24 @@ def __init__(self, status, text, aiohttp_response=None):
5756
@staticmethod
5857
async def from_aiohttp_response(aiohttp_response):
5958
return Response(
60-
status=aiohttp_response.status,
61-
text=await aiohttp_response.text(),
62-
aiohttp_response=aiohttp_response
59+
status=aiohttp_response.status,
60+
text=await aiohttp_response.text(),
61+
aiohttp_response=aiohttp_response,
6362
)
6463

6564
def __str__(self):
66-
return json.dumps({
67-
'status': self.status,
68-
'text': self.text,
69-
})
65+
return json.dumps(
66+
{
67+
"status": self.status,
68+
"text": self.text,
69+
}
70+
)
7071

7172
__repr__ = __str__
7273

7374

7475
def get_random_user_agent():
75-
return 'Mozilla/5.0 (Windows NT;) Gecko/20100101 Firefox/58.0'
76+
return "Mozilla/5.0 (Windows NT;) Gecko/20100101 Firefox/58.0"
7677
# return 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:55.0) Gecko/20100101 Firefox/55.0'
7778
# TODO: do it
7879
# return UserAgent().random

check_from_stdin.py

+12-16
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,33 @@
11
"""
22
just a helper script for testing proxies
33
"""
4-
from proxy_py import settings
5-
from models import Proxy
6-
from checkers.base_checker import BaseChecker
7-
84
import asyncio
9-
import proxy_utils
10-
import sys
115
import re
6+
import sys
127

8+
import proxy_utils
9+
from checkers.base_checker import BaseChecker
10+
from models import Proxy
11+
from proxy_py import settings
1312

14-
proxy_find_regex = \
15-
r"([0-9]{1,3})[^0-9]+([0-9]{1,3})[^0-9]+([0-9]{1,3})[^0-9]+([0-9]{1,3})"\
13+
proxy_find_regex = (
14+
r"([0-9]{1,3})[^0-9]+([0-9]{1,3})[^0-9]+([0-9]{1,3})[^0-9]+([0-9]{1,3})"
1615
r"[^0-9]+([0-9]{1,5})"
16+
)
1717
semaphore = asyncio.BoundedSemaphore(settings.NUMBER_OF_CONCURRENT_TASKS)
1818
tasks = []
1919

2020

2121
async def check_task(ip, port):
2222
async with semaphore:
2323
for raw_protocol in range(len(Proxy.PROTOCOLS)):
24-
proxy_url = '{}://{}:{}'.format(
25-
Proxy.PROTOCOLS[raw_protocol],
26-
ip,
27-
port
28-
)
24+
proxy_url = "{}://{}:{}".format(Proxy.PROTOCOLS[raw_protocol], ip, port)
2925
check_result, _ = await proxy_utils.check_proxy(proxy_url)
3026
if check_result:
3127
break
3228
# if check_result:
3329
# print('proxy {} works'.format(proxy_url))
34-
print('+' if check_result else '-', end='', file=sys.stderr)
30+
print("+" if check_result else "-", end="", file=sys.stderr)
3531
sys.stderr.flush()
3632

3733

@@ -42,7 +38,7 @@ async def main():
4238
groups = re.search(proxy_find_regex, line).groups()
4339
except:
4440
continue
45-
ip = '.'.join(groups[:4])
41+
ip = ".".join(groups[:4])
4642
port = groups[4]
4743

4844
tasks.append(asyncio.ensure_future(check_task(ip, port)))
@@ -52,5 +48,5 @@ async def main():
5248
BaseChecker.clean()
5349

5450

55-
if __name__ == '__main__':
51+
if __name__ == "__main__":
5652
asyncio.get_event_loop().run_until_complete(main())

checkers/__init__.py

-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
11
from checkers.d3d_info_checker import D3DInfoChecker
22
from checkers.google_com_checker import GoogleComChecker
33
from checkers.ipinfo_io_checker import IPInfoIOChecker
4-

checkers/base_checker.py

+13-7
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1-
from aiosocks.connector import ProxyConnector, ProxyClientRequest
2-
from proxy_py import settings
3-
1+
import asyncio
42
import ssl
3+
54
import aiohttp
65
import aiosocks
7-
import asyncio
6+
from aiosocks.connector import ProxyClientRequest, ProxyConnector
7+
88
import async_requests
9+
from proxy_py import settings
910

1011

1112
class CheckerResult:
@@ -46,7 +47,9 @@ def __init__(self, url=None, request_type="GET", timeout=None):
4647
limit_per_host=settings.NUMBER_OF_SIMULTANEOUS_REQUESTS_PER_HOST,
4748
)
4849
self.request_type = request_type
49-
self.timeout = timeout if timeout is not None else settings.PROXY_CHECKING_TIMEOUT
50+
self.timeout = (
51+
timeout if timeout is not None else settings.PROXY_CHECKING_TIMEOUT
52+
)
5053
self.url = url
5154

5255
@staticmethod
@@ -56,7 +59,6 @@ async def init():
5659
5760
:return:
5861
"""
59-
pass
6062

6163
@staticmethod
6264
def get_aiohttp_connector():
@@ -123,7 +125,11 @@ async def _request(self, proxy_address, timeout) -> tuple:
123125
connector=conn, connector_owner=False, request_class=ProxyClientRequest
124126
) as session:
125127
async with session.request(
126-
self.request_type, self.url, proxy=proxy_address, timeout=timeout, headers=headers
128+
self.request_type,
129+
self.url,
130+
proxy=proxy_address,
131+
timeout=timeout,
132+
headers=headers,
127133
) as response:
128134
is_working = await self.validate(response, checker_result)
129135

checkers/d3d_info_checker.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,11 @@
55

66
class D3DInfoChecker(BaseChecker):
77
def __init__(self, timeout=None):
8-
super(D3DInfoChecker, self).__init__("https://test.d3d.info/ok.html", timeout=timeout)
9-
10-
async def validate(self, response: aiohttp.ClientResponse, checker_result: CheckerResult):
11-
return (await response.text()).strip().lower() == 'ok'
8+
super(D3DInfoChecker, self).__init__(
9+
"https://test.d3d.info/ok.html", timeout=timeout
10+
)
11+
12+
async def validate(
13+
self, response: aiohttp.ClientResponse, checker_result: CheckerResult
14+
):
15+
return (await response.text()).strip().lower() == "ok"

checkers/google_com_checker.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,15 @@
55

66
class GoogleComChecker(BaseChecker):
77
def __init__(self, timeout=None):
8-
super(GoogleComChecker, self).__init__("https://www.google.com/humans.txt", timeout=timeout)
8+
super(GoogleComChecker, self).__init__(
9+
"https://www.google.com/humans.txt", timeout=timeout
10+
)
911

10-
async def validate(self, response: aiohttp.ClientResponse, checker_result: CheckerResult):
11-
'''
12+
async def validate(
13+
self, response: aiohttp.ClientResponse, checker_result: CheckerResult
14+
):
15+
"""
1216
We have already done the request and it was successful,
1317
Google returned something(maybe good response, maybe captcha, we don't care)
14-
'''
18+
"""
1519
return True

checkers/ipinfo_io_checker.py

+19-14
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,32 @@
1-
from checkers.base_checker import BaseChecker, CheckerResult
21
import aiohttp
32

3+
from checkers.base_checker import BaseChecker, CheckerResult
4+
45

56
class IPInfoIOChecker(BaseChecker):
67
def __init__(self, timeout=None):
78
super(IPInfoIOChecker, self).__init__("https://ipinfo.io/json", timeout=timeout)
89

9-
async def validate(self, response: aiohttp.ClientResponse, checker_result: CheckerResult) -> bool:
10+
async def validate(
11+
self, response: aiohttp.ClientResponse, checker_result: CheckerResult
12+
) -> bool:
1013
if response.status != 200:
1114
return False
1215

1316
json_result = await response.json()
14-
if 'ip' in json_result:
15-
checker_result.ipv4 = json_result['ip']
16-
if 'city' in json_result:
17-
checker_result.city = json_result['city']
18-
if 'region' in json_result:
19-
checker_result.region = json_result['region']
20-
if 'country' in json_result:
21-
checker_result.country_code = json_result['country']
22-
if 'loc' in json_result:
23-
checker_result.location_coordinates = tuple(float(x) for x in json_result['loc'].split(','))
24-
if 'org' in json_result:
25-
checker_result.organization_name = json_result['org']
17+
if "ip" in json_result:
18+
checker_result.ipv4 = json_result["ip"]
19+
if "city" in json_result:
20+
checker_result.city = json_result["city"]
21+
if "region" in json_result:
22+
checker_result.region = json_result["region"]
23+
if "country" in json_result:
24+
checker_result.country_code = json_result["country"]
25+
if "loc" in json_result:
26+
checker_result.location_coordinates = tuple(
27+
float(x) for x in json_result["loc"].split(",")
28+
)
29+
if "org" in json_result:
30+
checker_result.organization_name = json_result["org"]
2631

2732
return True

collectors/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
from collectors.abstract_collector import AbstractCollector
2-
from collectors.pages_collector import PagesCollector
2+
from collectors.pages_collector import PagesCollector

collectors/abstract_collector.py

+13-11
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
# TODO: add wrapper for doing requests and saving its cookies and UserAgent
22
import asyncio
3-
4-
from proxy_py import settings
5-
63
import json
4+
75
import models
6+
from proxy_py import settings
87

98

109
class AbstractCollector:
@@ -47,8 +46,9 @@ async def _collect(self):
4746
"""Do not call yourself! It is called on collector's processing automatically"""
4847
collect = self.collect()
4948
if asyncio.iscoroutine(collect):
49+
5050
async def wrapper(f):
51-
for item in (await f):
51+
for item in await f:
5252
yield item
5353

5454
collect = wrapper(collect)
@@ -78,10 +78,12 @@ async def load_state(self, state):
7878
self.last_processing_time = state.last_processing_time
7979
self.processing_period = state.processing_period
8080
self.last_processing_proxies_count = state.last_processing_proxies_count
81-
self.data = json.loads(state.data) if state.data is not None and state.data else {}
82-
if '_variables' in self.data:
83-
for var_name in self.data['_variables']:
84-
setattr(self, var_name, self.data['_variables'][var_name])
81+
self.data = (
82+
json.loads(state.data) if state.data is not None and state.data else {}
83+
)
84+
if "_variables" in self.data:
85+
for var_name in self.data["_variables"]:
86+
setattr(self, var_name, self.data["_variables"][var_name])
8587

8688
async def save_state(self, state: models.CollectorState):
8789
"""
@@ -93,10 +95,10 @@ async def save_state(self, state: models.CollectorState):
9395
state.last_processing_proxies_count = self.last_processing_proxies_count
9496

9597
if self.saved_variables is not None:
96-
if '_variables' not in self.data:
97-
self.data['_variables'] = {}
98+
if "_variables" not in self.data:
99+
self.data["_variables"] = {}
98100
for var_name in self.saved_variables:
99-
self.data['_variables'][var_name] = getattr(self, var_name)
101+
self.data["_variables"][var_name] = getattr(self, var_name)
100102

101103
state.data = json.dumps(self.data)
102104

collectors/pages_collector.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
from proxy_py import settings
21
from collectors.abstract_collector import AbstractCollector
2+
from proxy_py import settings
33

44

55
# TODO: save pages to collector state
@@ -17,14 +17,14 @@ class PagesCollector(AbstractCollector):
1717
def __init__(self):
1818
super(PagesCollector, self).__init__()
1919
self.last_proxies_list = []
20-
self.saved_variables.add('current_page')
21-
self.saved_variables.add('pages_count')
22-
self.saved_variables.add('last_proxies_list')
20+
self.saved_variables.add("current_page")
21+
self.saved_variables.add("pages_count")
22+
self.saved_variables.add("last_proxies_list")
2323

2424
async def collect(self):
25-
proxies = list(
26-
await self.process_page(self.current_page)
27-
)[:settings.COLLECTOR_MAXIMUM_NUMBER_OF_PROXIES_PER_REQUEST]
25+
proxies = list(await self.process_page(self.current_page))[
26+
: settings.COLLECTOR_MAXIMUM_NUMBER_OF_PROXIES_PER_REQUEST
27+
]
2828

2929
if self.dynamic_pages_count:
3030
if proxies:

0 commit comments

Comments
 (0)