Skip to content

Commit 15d0fcf

Browse files
committed
feature/added-v2-integration: removed return_text param, introduced post/out/delete request methods
1 parent e4060e0 commit 15d0fcf

File tree

10 files changed

+146
-79
lines changed

10 files changed

+146
-79
lines changed

.github/workflows/code_checks.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ jobs:
66
code_checks:
77
strategy:
88
matrix:
9-
pyver: [ '3.6', '3.7', '3.8', '3.9', '3.10' ]
9+
pyver: ['3.7', '3.8', '3.9', '3.10', '3.11']
1010
os: [ ubuntu, macos, windows ]
1111
fail-fast: true
1212
runs-on: ${{ matrix.os }}-latest

README.md

Lines changed: 46 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ python 3.6+.
2323
from scrapingant_client import ScrapingAntClient
2424

2525
client = ScrapingAntClient(token='<YOUR-SCRAPINGANT-API-TOKEN>')
26-
# Scrape the example.com site.
26+
# Scrape the example.com site
2727
result = client.general_request('https://example.com')
2828
print(result.content)
2929
```
@@ -62,17 +62,19 @@ Main class of this library.
6262

6363
https://docs.scrapingant.com/request-response-format#available-parameters
6464

65-
| Param | Type | Default |
66-
|-------------------|-----------------------------------|------------|
67-
| url | <code>string</code> | |
68-
| cookies | <code>List[Cookie]</code> | None |
69-
| headers | <code>List[Dict[str, str]]</code> | None |
70-
| js_snippet | <code>string</code> | None |
71-
| proxy_type | <code>ProxyType</code> | datacenter |
72-
| proxy_country | <code>str</code> | None |
73-
| return_text | <code>boolean</code> | False |
74-
| wait_for_selector | <code>str</code> | None |
75-
| browser | <code>boolean</code> | True |
65+
| Param | Type | Default |
66+
|-------------------|----------------------------------------------------------------------------------------------------------------------------|------------|
67+
| url | <code>string</code> | |
68+
| method | <code>string</code> | GET |
69+
| cookies | <code>List[Cookie]</code> | None |
70+
| headers | <code>List[Dict[str, str]]</code> | None |
71+
| js_snippet | <code>string</code> | None |
72+
| proxy_type | <code>ProxyType</code> | datacenter |
73+
| proxy_country | <code>str</code> | None |
74+
| wait_for_selector | <code>str</code> | None |
75+
| browser | <code>boolean</code> | True |
76+
| data | same as [requests param 'data'](https://requests.readthedocs.io/en/latest/user/quickstart/#more-complicated-post-requests) | None |
77+
| json | same as [requests param 'json'](https://requests.readthedocs.io/en/latest/user/quickstart/#more-complicated-post-requests) | None |
7678

7779
**IMPORTANT NOTE:** <code>js_snippet</code> will be encoded to Base64 automatically by the ScrapingAnt client library.
7880

@@ -82,7 +84,7 @@ https://docs.scrapingant.com/request-response-format#available-parameters
8284

8385
Class defining cookie. Currently it supports only name and value
8486

85-
| Param | Type |
87+
| Param | Type |
8688
|-------|---------------------|
8789
| name | <code>string</code> |
8890
| value | <code>string</code> |
@@ -92,12 +94,12 @@ Class defining cookie. Currently it supports only name and value
9294
#### Response
9395

9496
Class defining response from API.
95-
96-
| Param | Type |
97-
|-------------|---------------------------|
98-
| content | <code>string</code> |
99-
| cookies | <code>List[Cookie]</code> |
100-
| status_code | <code>int</code> |
97+
| Param | Type |
98+
|-------------|----------------------------|
99+
| content | <code>string</code> |
100+
| cookies | <code>List[Cookie]</code> |
101+
| status_code | <code>int</code> |
102+
| text | <code>string</code> |
101103

102104
## Exceptions
103105

@@ -231,14 +233,38 @@ client = ScrapingAntClient(token='<YOUR-SCRAPINGANT-API-TOKEN>')
231233

232234

233235
async def main():
234-
# Scrape the example.com site.
236+
# Scrape the example.com site
235237
result = await client.general_request_async('https://example.com')
236238
print(result.content)
237239

238240

239241
asyncio.run(main())
240242
```
241243

244+
### Sending POST request
245+
246+
```python3
247+
from scrapingant_client import ScrapingAntClient
248+
249+
client = ScrapingAntClient(token='<YOUR-SCRAPINGANT-API-TOKEN>')
250+
251+
# Sending POST request with json data
252+
result = client.general_request(
253+
url="https://httpbin.org/post",
254+
method="POST",
255+
json={"test": "test"},
256+
)
257+
print(result.content)
258+
259+
# Sending POST request with bytes data
260+
result = client.general_request(
261+
url="https://httpbin.org/post",
262+
method="POST",
263+
data=b'test_bytes',
264+
)
265+
print(result.content)
266+
```
267+
242268
## Useful links
243269

244270
- [Scrapingant API doumentation](https://docs.scrapingant.com)

scrapingant_client/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = "1.0.1"
1+
__version__ = "2.0.0"
22

33
from scrapingant_client.client import ScrapingAntClient
44
from scrapingant_client.cookie import Cookie

scrapingant_client/client.py

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ def _form_payload(
3939
js_snippet: Optional[str] = None,
4040
proxy_type: ProxyType = ProxyType.datacenter,
4141
proxy_country: Optional[str] = None,
42-
return_text: bool = False,
4342
wait_for_selector: Optional[str] = None,
4443
browser: bool = True,
4544
) -> Dict:
@@ -54,7 +53,6 @@ def _form_payload(
5453
request_data['proxy_country'] = proxy_country.lower()
5554
if wait_for_selector is not None:
5655
request_data['wait_for_selector'] = wait_for_selector
57-
request_data['return_text'] = return_text
5856
request_data['browser'] = browser
5957
return request_data
6058

@@ -69,44 +67,49 @@ def _parse_response(self, response_status_code: int, response_data: Dict, url: s
6967
raise ScrapingantDetectedException()
7068
elif response_status_code == 500:
7169
raise ScrapingantInternalException()
72-
content = response_data['content']
70+
content = response_data['html']
7371
cookies_string = response_data['cookies']
72+
text = response_data['text']
7473
status_code = response_data['status_code']
7574
cookies_list = cookies_list_from_string(cookies_string)
7675
return Response(
7776
content=content,
7877
cookies=cookies_list,
78+
text=text,
7979
status_code=status_code
8080
)
8181

8282
def general_request(
8383
self,
8484
url: str,
85+
method: str = 'GET',
8586
cookies: Optional[List[Cookie]] = None,
8687
headers: Optional[Dict[str, str]] = None,
8788
js_snippet: Optional[str] = None,
8889
proxy_type: ProxyType = ProxyType.datacenter,
8990
proxy_country: Optional[str] = None,
90-
return_text: bool = False,
9191
wait_for_selector: Optional[str] = None,
9292
browser: bool = True,
93+
data=None,
94+
json=None,
9395
) -> Response:
9496
request_data = self._form_payload(
9597
url=url,
9698
cookies=cookies,
9799
js_snippet=js_snippet,
98100
proxy_type=proxy_type,
99101
proxy_country=proxy_country,
100-
return_text=return_text,
101102
wait_for_selector=wait_for_selector,
102103
browser=browser,
103104
)
104105
try:
105-
response = self.requests_session.post(
106-
SCRAPINGANT_API_BASE_URL + '/general',
107-
json=request_data,
106+
response = self.requests_session.request(
107+
method=method,
108+
url=SCRAPINGANT_API_BASE_URL + '/extended',
109+
params=request_data,
108110
headers=convert_headers(headers),
109-
timeout=TIMEOUT_SECONDS
111+
data=data,
112+
json=json,
110113
)
111114
except requests.exceptions.Timeout:
112115
raise ScrapingantTimeoutException()
@@ -118,14 +121,16 @@ def general_request(
118121
async def general_request_async(
119122
self,
120123
url: str,
124+
method: str = 'GET',
121125
cookies: Optional[List[Cookie]] = None,
122126
headers: Optional[Dict[str, str]] = None,
123127
js_snippet: Optional[str] = None,
124128
proxy_type: ProxyType = ProxyType.datacenter,
125129
proxy_country: Optional[str] = None,
126-
return_text: bool = False,
127130
wait_for_selector: Optional[str] = None,
128131
browser: bool = True,
132+
data=None,
133+
json=None,
129134
) -> Response:
130135
import httpx
131136

@@ -135,7 +140,6 @@ async def general_request_async(
135140
js_snippet=js_snippet,
136141
proxy_type=proxy_type,
137142
proxy_country=proxy_country,
138-
return_text=return_text,
139143
wait_for_selector=wait_for_selector,
140144
browser=browser,
141145
)
@@ -147,14 +151,16 @@ async def general_request_async(
147151
timeout=TIMEOUT_SECONDS,
148152
) as client:
149153
try:
150-
response = await client.post(
151-
SCRAPINGANT_API_BASE_URL + '/general',
152-
json=request_data,
154+
response = await client.request(
155+
method=method,
156+
url=SCRAPINGANT_API_BASE_URL + '/extended',
157+
params=request_data,
153158
headers=convert_headers(headers),
159+
data=data,
160+
json=json,
154161
)
155162
except httpx.TimeoutException:
156163
raise ScrapingantTimeoutException()
157-
158164
response_status_code = response.status_code
159165
response_data = response.json()
160166
parsed_response: Response = self._parse_response(response_status_code, response_data, url)

scrapingant_client/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
SCRAPINGANT_API_BASE_URL = 'https://api.scrapingant.com/v1'
1+
SCRAPINGANT_API_BASE_URL = 'https://api.scrapingant.com/v2'
22
TIMEOUT_SECONDS = 120

scrapingant_client/proxy_type.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,6 @@
44
class ProxyType(str, Enum):
55
datacenter = 'datacenter'
66
residential = 'residential'
7+
8+
def __str__(self):
9+
return self.value

scrapingant_client/response.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44

55

66
class Response:
7-
def __init__(self, content: str, cookies: List[Cookie], status_code: Optional[int]):
7+
def __init__(self, content: str, cookies: List[Cookie], text: str, status_code: Optional[int]):
88
self.content = content
99
self.cookies = cookies
10+
self.text = text
1011
self.status_code = status_code

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
"License :: OSI Approved :: Apache Software License",
3535
],
3636
keywords="scrapingant api scraper scraping",
37-
python_requires='~=3.5',
37+
python_requires='~=3.7',
3838
install_requires=['requests>=2,<3'],
3939
extras_require={
4040
'dev': [

tests/test_exceptions.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
@responses.activate
1818
def test_invalid_token():
19-
responses.add(responses.POST, SCRAPINGANT_API_BASE_URL + '/general',
19+
responses.add(responses.GET, SCRAPINGANT_API_BASE_URL + '/extended',
2020
json={'detail': 'wrong token'}, status=403)
2121
client = ScrapingAntClient(token='invalid_token')
2222
with pytest.raises(ScrapingantInvalidTokenException):
@@ -25,7 +25,7 @@ def test_invalid_token():
2525

2626
@responses.activate
2727
def test_invalid_input():
28-
responses.add(responses.POST, SCRAPINGANT_API_BASE_URL + '/general',
28+
responses.add(responses.GET, SCRAPINGANT_API_BASE_URL + '/extended',
2929
json={'detail': 'wrong url'}, status=422)
3030
client = ScrapingAntClient(token='some_token')
3131
with pytest.raises(ScrapingantInvalidInputException) as e:
@@ -35,7 +35,7 @@ def test_invalid_input():
3535

3636
@responses.activate
3737
def test_internal_server_error():
38-
responses.add(responses.POST, SCRAPINGANT_API_BASE_URL + '/general',
38+
responses.add(responses.GET, SCRAPINGANT_API_BASE_URL + '/extended',
3939
json={}, status=500)
4040
client = ScrapingAntClient(token='some_token')
4141
with pytest.raises(ScrapingantInternalException):
@@ -44,7 +44,7 @@ def test_internal_server_error():
4444

4545
@responses.activate
4646
def test_not_reachable():
47-
responses.add(responses.POST, SCRAPINGANT_API_BASE_URL + '/general',
47+
responses.add(responses.GET, SCRAPINGANT_API_BASE_URL + '/extended',
4848
json={}, status=404)
4949
client = ScrapingAntClient(token='some_token')
5050
with pytest.raises(ScrapingantSiteNotReachableException) as e:
@@ -54,7 +54,7 @@ def test_not_reachable():
5454

5555
@responses.activate
5656
def test_detected():
57-
responses.add(responses.POST, SCRAPINGANT_API_BASE_URL + '/general',
57+
responses.add(responses.GET, SCRAPINGANT_API_BASE_URL + '/extended',
5858
json={}, status=423)
5959
client = ScrapingAntClient(token='some_token')
6060
with pytest.raises(ScrapingantDetectedException) as e:

0 commit comments

Comments
 (0)