Skip to content

Commit 105b6f2

Browse files
committed
scrape.do api handler example.
1 parent e3b212c commit 105b6f2

File tree

6 files changed

+294
-1
lines changed

6 files changed

+294
-1
lines changed

README.md

+69-1
Original file line numberDiff line numberDiff line change
@@ -1 +1,69 @@
1-
# python-sample
1+
# python-sample
2+
3+
<p align="center">
4+
<img src="https://scrape.do/images/logo.png" alt="scrape.do" width="200">
5+
</p>
6+
7+
Web Scraper Api
8+
- Best Rotating Proxy & [Scraping API](https://scrape.do/) Alternative.
9+
- :warning: Please read the [documents](https://docs.scrape.do/) first.
10+
11+
#### You can send request to any webpages with proxy gateway & web api provided by scrape.do. As you can see from the example, this takes only few lines of code
12+
13+
#### You can see Example here ([test.py](/test.py))
14+
15+
## Usage
16+
17+
first you need to create python_sample as [python-sample](/python_sample_scrape_do/__init__.py)
18+
19+
and use python_sample class to handle scrape.do api's
20+
21+
from python_sample_scrape_do import Scrape_do_Exception, python_sample
22+
23+
API_TOKEN = "Your_API_TOKEN_FOR_scrape.do"
24+
25+
# create an python-sample object
26+
sample = python_sample()
27+
28+
# set the scrape.do api key
29+
sample.set_api_token(api_token=API_TOKEN)
30+
31+
# Get Scrape.do account statistics
32+
try:
33+
resp = sample.account_status()
34+
print("Response Type " + str(type(resp)))
35+
print(resp)
36+
except ConnectionError as e:
37+
print(str(e))
38+
print(traceback.format_exc())
39+
40+
except Scrape_do_Exception as e:
41+
print(str(e))
42+
print(traceback.format_exc())
43+
44+
try:
45+
resp = sample.create_request_url(url='https://docs.scrape.do/', method="GET", payload={}, headers={},
46+
render=False, super_proxies=False, geo_code=None)
47+
print(resp)
48+
except ConnectionError as e:
49+
print(str(e))
50+
print(traceback.format_exc())
51+
52+
except Scrape_do_Exception as e:
53+
print(str(e))
54+
print(traceback.format_exc())
55+
56+
## Author
57+
58+
[Batuhan Özyön](https://github.com/bynf) <br/>
59+
[Sameer Narkhede](https://github.com/narkhedesam)
60+
61+
62+
### Screenshot
63+
<br/>
64+
65+
![python-sample](python_sample.png)
66+
67+
<br/><br/>
68+
69+

python_sample.png

126 KB
Loading

python_sample_scrape_do/__init__.py

+169
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
"""
2+
Date: 29-08-2020
3+
Created by Sameer Narkhede
4+
Project : python_sample
5+
Module : python_sample_scrape_do
6+
"""
7+
import traceback
8+
import requests
9+
10+
11+
class Scrape_do_Exception(BaseException):
12+
"""
13+
custom Scrape.do exception class
14+
"""
15+
pass
16+
17+
18+
class python_sample:
19+
"""
20+
Python sample class for proxy rotating api's https://scrape.do
21+
"""
22+
23+
def __init__(self):
24+
self.scrape_do_api_token = None
25+
26+
def set_api_token(self, api_token=None):
27+
"""
28+
set scrape.do api token you can find this token from https://scrape.do/dashboard this needs login.
29+
:param api_token: String API_TOKEN from https://scrape.do
30+
:return: None
31+
"""
32+
self.scrape_do_api_token = api_token
33+
34+
def account_status(self):
35+
"""
36+
returns the statistics of your scrape.do account
37+
:return: Dictionary of statistics
38+
"""
39+
if self.scrape_do_api_token:
40+
41+
response = requests.get("http://api.scrape.do/info?token=" + self.scrape_do_api_token)
42+
43+
return response.json()
44+
else:
45+
raise Scrape_do_Exception("api-token is not configured")
46+
47+
def create_request_url(self, url, method="GET", payload=None, headers=None, render=False,
48+
super_proxies=False, geo_code=None):
49+
"""
50+
Best Rotating Proxy & Scraping API Alternative https://scrape.do/ api handler
51+
new request url
52+
53+
:param url: String the url user needs to scrape. Ex. 'https://httpbin.org/get'
54+
55+
:param method: String method for the url request. Ex. ``GET``, ``OPTIONS``, ``HEAD``, ``POST``, ``PUT``,
56+
``PATCH``, or ``DELETE``
57+
58+
:param payload: (optional) Dictionary, list of tuples, bytes, or file-like object to send in the body of the
59+
60+
:param headers: (optional) Dictionary of HTTP Headers to send with the request
61+
62+
:param render: (optional) Boolean - To use Javascript, all you need to do is setting render parameter to true
63+
** Beware that you need a business plan to use this feature!
64+
65+
:param super_proxies:(optional) Boolean - To use Super Proxies, all you need to do is setting super parameter
66+
to true
67+
** Beware that you need a business plan to use this feature!
68+
:param geo_code: geocode in 'us', 'gb', 'ca', 'tr', 'cn', 'ru', 'se', 'de', 'fr', 'es', 'br' ex. us
69+
** Beware that you need a Pro plan to use this feature!
70+
71+
:return: response of scrape.do api
72+
73+
"""
74+
75+
# check if there is token is configured
76+
if self.scrape_do_api_token:
77+
base_url = "http://api.scrape.do"
78+
79+
params = {'token': self.scrape_do_api_token}
80+
81+
if headers is None:
82+
headers = {}
83+
84+
if payload is None:
85+
payload = {}
86+
87+
if headers is not None and headers is not {}:
88+
params['customHeaders'] = 'true'
89+
90+
params['url'] = url
91+
92+
if render:
93+
params['render'] = 'true' if render else 'false'
94+
95+
if super_proxies:
96+
params['super'] = 'true' if super_proxies else 'false'
97+
98+
if geo_code:
99+
geocodes = ['us', 'gb', 'ca', 'tr', 'cn', 'ru', 'se', 'de', 'fr', 'es', 'br']
100+
101+
if geo_code not in geocodes:
102+
raise Scrape_do_Exception(
103+
"Geo-Code is not valid. please provide geo-code in " + str(geocodes))
104+
105+
params['geo_code'] = geo_code
106+
107+
methods = ["GET", "OPTIONS", "HEAD", "POST", "PUT", "PATCH", "DELETE"]
108+
if method not in methods:
109+
raise Scrape_do_Exception("method is not valid. please provide method in " + str(methods))
110+
111+
response = requests.request(method, base_url, params=params, headers=headers, data=payload)
112+
113+
print("status_code:" + str(response.status_code))
114+
115+
if response.status_code == 200:
116+
return response.text.encode('utf8')
117+
118+
elif response.status_code == 404:
119+
raise Scrape_do_Exception("Target url not found :: Pass valid URL")
120+
121+
elif response.status_code == 429:
122+
raise Scrape_do_Exception("You are sending too many concurrent request :: Please upgrade your "
123+
"plan or contact with sale.")
124+
125+
elif response.status_code == 401:
126+
raise Scrape_do_Exception("You have not credit :: Please upgrade your plan or contact with sale.")
127+
128+
elif response.status_code == 502:
129+
raise Scrape_do_Exception("Gateway Error :: Please retry and check response. If you live "
130+
"constantly, contact [email protected]")
131+
132+
else:
133+
raise Scrape_do_Exception("api-token is not configured")
134+
135+
136+
if __name__ == '__main__':
137+
138+
API_TOKEN = "Your_API_TOKEN_FOR_scrape.do"
139+
140+
# create an python-sample object
141+
sample = python_sample()
142+
143+
# set the scrape.do api key
144+
# sample.set_api_token(api_token=API_TOKEN)
145+
146+
# Get Scrape.do account statistics
147+
try:
148+
resp = sample.account_status()
149+
print("Response Type " + str(type(resp)))
150+
print(resp)
151+
except ConnectionError as e:
152+
print(str(e))
153+
print(traceback.format_exc())
154+
155+
except Scrape_do_Exception as e:
156+
print(str(e))
157+
print(traceback.format_exc())
158+
159+
try:
160+
resp = sample.create_request_url(url='https://docs.scrape.do/', method="GET", payload={}, headers={},
161+
render=False, super_proxies=False, geo_code=None)
162+
print(resp)
163+
except ConnectionError as e:
164+
print(str(e))
165+
print(traceback.format_exc())
166+
167+
except Scrape_do_Exception as e:
168+
print(str(e))
169+
print(traceback.format_exc())

requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
requests

setup.py

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
from setuptools import setup
2+
3+
setup(
4+
name='python_sample_scrape_do',
5+
version='1.0',
6+
packages=['python_sample_scrape_do'],
7+
url='',
8+
license='',
9+
author='Sameer Narkhede',
10+
author_email='[email protected]',
11+
description='Scrape.do python sample repository you can directly copy and paste to your project'
12+
)

test.py

+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
"""
2+
Date: 29-08-2020
3+
Created by Sameer Narkhede
4+
Project : python_sample
5+
"""
6+
import traceback
7+
8+
from python_sample_scrape_do import Scrape_do_Exception, python_sample
9+
10+
11+
API_TOKEN = "Your_API_TOKEN_FOR_scrape.do"
12+
13+
# create an web scrapper object
14+
sample = python_sample()
15+
16+
# set the scrape.do api key
17+
sample.set_api_token(api_token=API_TOKEN)
18+
19+
# Get Scrape.do account statistics
20+
try:
21+
resp = sample.account_status()
22+
print("Response Type " + str(type(resp)))
23+
print(resp)
24+
except ConnectionError as e:
25+
print(str(e))
26+
print(traceback.format_exc())
27+
28+
except Scrape_do_Exception as e:
29+
print(str(e))
30+
print(traceback.format_exc())
31+
32+
try:
33+
resp = sample.create_request_url(url='https://docs.scrape.do/', method="GET", payload={}, headers={},
34+
render=False, super_proxies=False, geo_code=None)
35+
print(resp)
36+
except ConnectionError as e:
37+
print(str(e))
38+
print(traceback.format_exc())
39+
40+
except Scrape_do_Exception as e:
41+
print(str(e))
42+
print(traceback.format_exc())
43+

0 commit comments

Comments
 (0)