Skip to content
This repository was archived by the owner on Jun 10, 2024. It is now read-only.

Commit e9cda9a

Browse files
committed
improve couchdb allow empty username password
1 parent 9bae587 commit e9cda9a

File tree

7 files changed

+51
-109
lines changed

7 files changed

+51
-109
lines changed

pyspider/database/__init__.py

+2-20
Original file line numberDiff line numberDiff line change
@@ -214,26 +214,8 @@ def _connect_couchdb(parsed, dbtype, url):
214214
params = {}
215215

216216
# default to env, then url, then hard coded
217-
params['username'] = os.environ.get('COUCHDB_USER') or parsed.username or 'user'
218-
params['password'] = os.environ.get('COUCHDB_PASSWORD') or parsed.password or 'password'
219-
220-
# create necessary DBs + the admin user
221-
res = requests.put(url + "_users")
222-
if 'error' in res and res['error'] == 'unauthorized':
223-
# user is already created. This will happen if CouchDB is running in docker
224-
# and COUCHDB_USER and COUCHDB_PASSWORD are set
225-
from requests.auth import HTTPBasicAuth
226-
requests.put(url + "_users",
227-
auth=HTTPBasicAuth(params['username'], params['password']))
228-
requests.put(url + "_replicator",
229-
auth=HTTPBasicAuth(params['username'], params['password']))
230-
requests.put(url + '_node/_local/_config/admins/' + params['username'],
231-
data=params['password'],
232-
auth=HTTPBasicAuth(params['username'], params['password']))
233-
else:
234-
requests.put(url + "_replicator")
235-
requests.put(url + '_node/_local/_config/admins/' + params['username'],
236-
data=params['password'])
217+
params['username'] = os.environ.get('COUCHDB_USER') or parsed.username
218+
params['password'] = os.environ.get('COUCHDB_PASSWORD') or parsed.password
237219

238220
if dbtype == 'taskdb':
239221
from .couchdb.taskdb import TaskDB

pyspider/database/couchdb/couchdbbase.py

+13-25
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,12 @@
44
class SplitTableMixin(object):
55
UPDATE_PROJECTS_TIME = 10 * 60
66

7+
def __init__(self):
8+
self.session = requests.session()
9+
if self.username:
10+
self.session.auth = HTTPBasicAuth(self.username, self.password)
11+
self.session.headers.update({'Content-Type': 'application/json'})
12+
713
def _collection_name(self, project):
814
if self.collection_prefix:
915
return "%s_%s" % (self.collection_prefix, project)
@@ -32,10 +38,7 @@ def _list_project(self):
3238
prefix = ''
3339

3440
url = self.base_url + "_all_dbs"
35-
res = requests.get(url,
36-
data=json.dumps({}),
37-
headers={"Content-Type": "application/json"},
38-
auth=HTTPBasicAuth(self.username, self.password)).json()
41+
res = self.session.get(url, json={}).json()
3942
for each in res:
4043
if each.startswith('_'):
4144
continue
@@ -45,19 +48,15 @@ def _list_project(self):
4548

4649
def create_database(self, name):
4750
url = self.base_url + name
48-
res = requests.put(url,
49-
headers={"Content-Type": "application/json"},
50-
auth=HTTPBasicAuth(self.username, self.password)).json()
51+
res = self.session.put(url).json()
5152
if 'error' in res and res['error'] == 'unauthorized':
5253
raise Exception("Supplied credentials are incorrect. Reason: {} for User: {} Password: {}".format(res['reason'], self.username, self.password))
5354
return res
5455

5556

5657
def get_doc(self, db_name, doc_id):
5758
url = self.base_url + db_name + "/" + doc_id
58-
res = requests.get(url,
59-
headers={"Content-Type": "application/json"},
60-
auth=HTTPBasicAuth(self.username, self.password)).json()
59+
res = self.session.get(url).json()
6160
if "error" in res and res["error"] == "not_found":
6261
return None
6362
return res
@@ -66,10 +65,7 @@ def get_doc(self, db_name, doc_id):
6665
def get_docs(self, db_name, selector):
6766
url = self.base_url + db_name + "/_find"
6867
selector['use_index'] = self.index
69-
res = requests.post(url,
70-
data=json.dumps(selector),
71-
headers={"Content-Type": "application/json"},
72-
auth=HTTPBasicAuth(self.username, self.password)).json()
68+
res = self.session.post(url, json=selector).json()
7369
if 'error' in res and res['error'] == 'not_found':
7470
return []
7571
return res['docs']
@@ -81,10 +77,7 @@ def get_all_docs(self, db_name):
8177

8278
def insert_doc(self, db_name, doc_id, doc):
8379
url = self.base_url + db_name + "/" + doc_id
84-
return requests.put(url,
85-
data=json.dumps(doc),
86-
headers={"Content-Type": "application/json"},
87-
auth=HTTPBasicAuth(self.username, self.password)).json()
80+
return self.session.put(url, json=doc).json()
8881

8982

9083
def update_doc(self, db_name, doc_id, new_doc):
@@ -94,14 +87,9 @@ def update_doc(self, db_name, doc_id, new_doc):
9487
for key in new_doc:
9588
doc[key] = new_doc[key]
9689
url = self.base_url + db_name + "/" + doc_id
97-
return requests.put(url,
98-
data=json.dumps(doc),
99-
headers={"Content-Type": "application/json"},
100-
auth=HTTPBasicAuth(self.username, self.password)).json()
90+
return self.session.put(url, json=doc).json()
10191

10292

10393
def delete(self, url):
104-
return requests.delete(url,
105-
headers={"Content-Type": "application/json"},
106-
auth=HTTPBasicAuth(self.username, self.password)).json()
94+
return self.session.delete(url).json()
10795

pyspider/database/couchdb/projectdb.py

+13-28
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,19 @@
66
class ProjectDB(BaseProjectDB):
77
__collection_name__ = 'projectdb'
88

9-
def __init__(self, url, database='projectdb', username='username', password='password'):
9+
def __init__(self, url, database='projectdb', username=None, password=None):
1010
self.username = username
1111
self.password = password
1212
self.url = url + self.__collection_name__ + "_" + database + "/"
1313
self.database = database
14-
self.insert('', {})
14+
15+
self.session = requests.session()
16+
if username:
17+
self.session.auth = HTTPBasicAuth(self.username, self.password)
18+
self.session.headers.update({'Content-Type': 'application/json'})
1519

1620
# Create the db
17-
res = requests.put(self.url,
18-
headers={"Content-Type": "application/json"},
19-
auth=HTTPBasicAuth(self.username, self.password)).json()
21+
res = self.session.put(self.url).json()
2022
if 'error' in res and res['error'] == 'unauthorized':
2123
raise Exception(
2224
"Supplied credentials are incorrect. Reason: {} for User: {} Password: {}".format(res['reason'],
@@ -29,9 +31,7 @@ def __init__(self, url, database='projectdb', username='username', password='pas
2931
},
3032
'name': self.__collection_name__ + "_" + database
3133
}
32-
res = requests.post(self.url+"_index", data=json.dumps(payload),
33-
headers={"Content-Type": "application/json"},
34-
auth=HTTPBasicAuth(self.username, self.password)).json()
34+
res = self.session.post(self.url+"_index", json=payload).json()
3535
self.index = res['id']
3636

3737
def _default_fields(self, each):
@@ -51,10 +51,7 @@ def insert(self, name, obj={}):
5151
obj = dict(obj)
5252
obj['name'] = name
5353
obj['updatetime'] = time.time()
54-
res = requests.put(url,
55-
data = json.dumps(obj),
56-
headers = {"Content-Type": "application/json"},
57-
auth=HTTPBasicAuth(self.username, self.password)).json()
54+
res = self.session.put(url, json=obj).json()
5855
return res
5956

6057
def update(self, name, obj={}, **kwargs):
@@ -78,10 +75,7 @@ def get_all(self, fields=None):
7875
"use_index": self.index
7976
}
8077
url = self.url + "_find"
81-
res = requests.post(url,
82-
data=json.dumps(payload),
83-
headers={"Content-Type": "application/json"},
84-
auth=HTTPBasicAuth(self.username, self.password)).json()
78+
res = self.session.post(url, json=payload).json()
8579
for doc in res['docs']:
8680
yield self._default_fields(doc)
8781

@@ -95,10 +89,7 @@ def get(self, name, fields=None):
9589
"use_index": self.index
9690
}
9791
url = self.url + "_find"
98-
res = requests.post(url,
99-
data=json.dumps(payload),
100-
headers={"Content-Type": "application/json"},
101-
auth=HTTPBasicAuth(self.username, self.password)).json()
92+
res = self.session.post(url, json=payload).json()
10293
if len(res['docs']) == 0:
10394
return None
10495
return self._default_fields(res['docs'][0])
@@ -115,13 +106,7 @@ def drop(self, name):
115106
doc = self.get(name)
116107
payload = {"rev": doc["_rev"]}
117108
url = self.url + name
118-
return requests.delete(url,
119-
params=payload,
120-
headers={"Content-Type": "application/json"},
121-
auth=HTTPBasicAuth(self.username, self.password)).json()
109+
return self.session.delete(url, params=payload).json()
122110

123111
def drop_database(self):
124-
return requests.delete(self.url,
125-
headers={"Content-Type": "application/json"},
126-
auth=HTTPBasicAuth(self.username, self.password)).json()
127-
112+
return self.session.delete(self.url).json()

pyspider/database/couchdb/resultdb.py

+5-8
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,19 @@
1-
import time, json, requests
2-
from requests.auth import HTTPBasicAuth
1+
import time, json
32
from pyspider.database.base.resultdb import ResultDB as BaseResultDB
43
from .couchdbbase import SplitTableMixin
54

65

76
class ResultDB(SplitTableMixin, BaseResultDB):
87
collection_prefix = ''
98

10-
def __init__(self, url, database='resultdb', username='username', password='password'):
9+
def __init__(self, url, database='resultdb', username=None, password=None):
1110
self.username = username
1211
self.password = password
13-
1412
self.base_url = url
1513
self.url = url + database + "/"
1614
self.database = database
15+
16+
super().__init__()
1717
self.create_database(database)
1818
self.index = None
1919

@@ -31,10 +31,7 @@ def _create_project(self, project):
3131
'name': collection_name
3232
}
3333

34-
res = requests.post(self.base_url + collection_name + "/_index",
35-
data=json.dumps(payload),
36-
headers={"Content-Type": "application/json"},
37-
auth=HTTPBasicAuth(self.username, self.password)).json()
34+
res = self.session.post(self.base_url + collection_name + "/_index", json=payload).json()
3835
self.index = res['id']
3936
self._list_project()
4037

pyspider/database/couchdb/taskdb.py

+6-8
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,22 @@
1-
import json, time, requests
2-
from requests.auth import HTTPBasicAuth
1+
import json, time
32
from pyspider.database.base.taskdb import TaskDB as BaseTaskDB
43
from .couchdbbase import SplitTableMixin
54

65

76
class TaskDB(SplitTableMixin, BaseTaskDB):
87
collection_prefix = ''
98

10-
def __init__(self, url, database='taskdb', username='username', password='password'):
9+
def __init__(self, url, database='taskdb', username=None, password=None):
1110
self.username = username
1211
self.password = password
1312
self.base_url = url
1413
self.url = url + database + "/"
1514
self.database = database
16-
self.create_database(database)
1715
self.index = None
1816

17+
super().__init__()
18+
19+
self.create_database(database)
1920
self.projects = set()
2021
self._list_project()
2122

@@ -32,10 +33,7 @@ def _create_project(self, project):
3233
},
3334
'name': collection_name
3435
}
35-
res = requests.post(self.base_url + collection_name + "/_index",
36-
data=json.dumps(payload),
37-
headers={"Content-Type": "application/json"},
38-
auth=HTTPBasicAuth(self.username, self.password)).json()
36+
res = self.session.post(self.base_url + collection_name + "/_index", json=payload).json()
3937
self.index = res['id']
4038
self._list_project()
4139

requirements.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
Flask==0.10
22
Jinja2==2.7
3-
chardet==2.2.1
3+
chardet==3.0.4
44
cssselect==0.9
55
lxml==4.3.3
66
pycurl==7.43.0.3
77
pyquery==1.4.0
8-
requests==2.2
8+
requests==2.24.0
99
tornado==4.5.3
1010
mysql-connector-python==8.0.16
1111
pika==1.1.0

setup.py

+10-18
Original file line numberDiff line numberDiff line change
@@ -20,41 +20,33 @@
2020
install_requires = [
2121
'Flask==0.10',
2222
'Jinja2==2.7',
23-
'chardet==2.2.1',
23+
'chardet==3.0.4',
2424
'cssselect==0.9',
2525
"lxml==4.3.3",
2626
'pycurl==7.43.0.3',
27-
'requests==2.2',
27+
'requests==2.24.0',
2828
'Flask-Login==0.2.11',
2929
'u-msgpack-python==1.6',
3030
'click==3.3',
3131
'six==1.10.0',
32-
'tblib==1.4.0'
32+
'tblib==1.4.0',
33+
'wsgidav==2.3.0',
34+
'tornado>=3.2,<=4.5.3',
35+
'pyquery',
3336
]
3437

35-
if sys.version_info >= (3, 0): # 3.*
36-
install_requires.extend([
37-
'wsgidav==2.3.0',
38-
'tornado>=3.2,<=4.5.3',
39-
'pyquery',
40-
])
41-
4238
extras_require_all = [
4339
'mysql-connector-python==8.0.16',
4440
'pymongo==3.9.0',
4541
'redis==2.10.6',
4642
'redis-py-cluster==1.3.6',
4743
'psycopg2==2.8.2',
4844
'elasticsearch==2.3.0',
45+
'kombu==4.4.0',
46+
'amqp==2.4.0',
47+
'SQLAlchemy==1.3.10',
48+
'pika==1.1.0'
4949
]
50-
if sys.version_info >= (3, 0): # 3.*
51-
extras_require_all.extend([
52-
'kombu==4.4.0',
53-
'amqp==2.4.0',
54-
'SQLAlchemy==1.3.10',
55-
'pika==1.1.0'
56-
])
57-
5850

5951
setup(
6052
name='pyspider',

0 commit comments

Comments
 (0)