Skip to content

Commit 0ce71dd

Browse files
authored
restaurant initial commit
1 parent 0022727 commit 0ce71dd

File tree

1 file changed

+228
-0
lines changed

1 file changed

+228
-0
lines changed

db_restaurants.py

Lines changed: 228 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,228 @@
1+
# PyMongo Tutorial : Insert, Read, Update, Delete in MongoDB
2+
3+
# !/usr/bin/env python3.5
4+
# -*- coding: UTF-8 -*-
5+
6+
import pprint
7+
import dateutil.parser
8+
import datetime
9+
10+
try:
11+
from pymongo import MongoClient
12+
except ImportError:
13+
raise ImportError('PyMongo is not installed in your machine.')
14+
15+
# The below code will connect on the default host and port.
16+
client = MongoClient()
17+
18+
# The below code will connect on the specified host and port.
19+
client = MongoClient(host='127.0.0.1', port=27017, maxPoolSize=100)
20+
21+
# selecting database (use db)
22+
database = client['test']
23+
24+
# selecting collection - kind of selecting table in NoSQL
25+
collection = database['restaurants']
26+
27+
# get/query/fetch only one value from collection without any conditions
28+
result = collection.find_one()
29+
30+
# get/query/fetch all documents from collection without any conditions.
31+
# Below will return all values from collection
32+
result = collection.find()
33+
34+
# inserting a sample document
35+
sample_document_post = {
36+
"address": {
37+
"street": "2 Avenue",
38+
"zipcode": "10075",
39+
"building": "1480",
40+
"coord": [-73.9557413, 40.7720266]
41+
},
42+
"borough": "Manhattan",
43+
"cuisine": "Italian",
44+
"grades": [
45+
{
46+
"date": dateutil.parser.parse("2014-10-01T00:00:00Z"),
47+
"grade": "A",
48+
"score": 11
49+
},
50+
{
51+
"date": dateutil.parser.parse("2014-01-16T00:00:00Z"),
52+
"grade": "B",
53+
"score": 17
54+
}
55+
],
56+
"name": "Vella",
57+
"restaurant_id": "41704620"
58+
}
59+
60+
# post_id = collection.insert(sample_document_post)
61+
# pprint.pprint(post_id)
62+
63+
# Query by a Top Level Field
64+
''' I am trying to get the above restaurant ID from collection '''
65+
result = collection.find({"restaurant_id": "41704620"})
66+
67+
# Query by a Top Level Field
68+
result = collection.find_one({"restaurant_id": "41704620"})
69+
70+
# Query by a Top Level Field
71+
''' The following operation finds documents whose borough field equals "Manhattan". '''
72+
result = collection.find({"borough": "Manhattan"})
73+
74+
# Query by a Field in an Embedded Document
75+
'''
76+
To specify a condition on a field within an embedded document, use the dot notation.
77+
Dot notation requires quotes around the whole dotted field name.
78+
The following operation specifies an equality condition on the zipcode field in the address embedded document.
79+
Example -
80+
{"address": {
81+
"street": "2 Avenue",
82+
"zipcode": "10075",
83+
"building": "1480",
84+
"coord": [-73.9557413, 40.7720266 ]
85+
}}
86+
'''
87+
result = collection.find({"address.zipcode": "10075"})
88+
89+
# Query by a Field in an Array
90+
'''
91+
The grades array contains embedded documents as its elements.
92+
To specify a condition on a field in these documents, use the dot notation.
93+
Dot notation requires quotes around the whole dotted field name.
94+
The following queries for documents whose grades array contains an embedded document with a field grade equal to "B".
95+
Example -
96+
{"grades": [
97+
{
98+
"date": dateutil.parser.parse("2014-10-01T00:00:00Z"),
99+
"grade": "A",
100+
"score": 11
101+
},
102+
{
103+
"date": dateutil.parser.parse("2014-01-16T00:00:00Z"),
104+
"grade": "B",
105+
"score": 17
106+
}]}
107+
'''
108+
result = collection.find({"grades.grade": "B"})
109+
110+
# Greater Than Operator ($gt)
111+
''' Query for documents whose grades array contains an embedded document with a field score greater than 30. '''
112+
result = collection.find({"grades.score": {'$gt': 30}})
113+
114+
# Lesser Than Operator ($lt)
115+
''' Query for documents whose grades array contains an embedded document with a field score lesser than 10. '''
116+
result = collection.find({"grades.score": {'$lt': 10}})
117+
118+
# Lesser Than Equals To Operator ($lte) / Greater Than Equals To Operator ($gte)
119+
''' I would like to get all documents in the year 2015 '''
120+
start_date = datetime.datetime(year=2015, month=1, day=1)
121+
end_date = datetime.datetime(year=2015, month=12, day=31)
122+
result = collection.find({'grades.date': {'$gte': start_date, '$lte': end_date}}).count()
123+
124+
# Combine Conditions - Logical AND
125+
'''
126+
You can specify a logical conjunction (AND) for a list of query conditions by
127+
separating the conditions with a comma in the conditions document
128+
'''
129+
result = collection.find({"cuisine": "Italian", "address.zipcode": "10075"})
130+
131+
# Combine Conditions - Logical OR
132+
'''
133+
You can specify a logical disjunction (OR) for a list of query conditions by using the $or query operator.
134+
'''
135+
result = collection.find({'$or': [{"cuisine": "Italian", "address.zipcode": "10075"}]})
136+
137+
# Sort Query Results
138+
'''
139+
To specify an order for the result set, append the sort() method to the query.
140+
141+
For example, the following operation returns all documents in the restaurants collection,
142+
sorted first by the borough field in ascending order, and then, within each borough,
143+
by the "address.zipcode" field in ascending order:
144+
'''
145+
result = collection.find().sort("borough").sort("address.zipcode")
146+
147+
# Update Top-Level Fields
148+
'''
149+
The following operation updates the first document with name equal to "Juni",
150+
using the $set operator to update the cuisine field and the $currentDate operator to
151+
update the lastModified field with the current date.
152+
153+
upsert (boolean): True - if no matching documents found, then create a new one.
154+
multi (boolean): True - if update all the matching records.
155+
'''
156+
result = collection.update({
157+
"name": "Juni"}, {
158+
'$set': {"cuisine": "American (New) Vijay Anand"},
159+
'$currentDate': {"lastModified": True}
160+
}, upsert=False, multi=False)
161+
162+
163+
# Remove/Delete All Documents That Match a Condition
164+
result = collection.remove({"cuisine": "American (New) Vijay Anand"})
165+
166+
# Remove/Delete one Document - Use the justOne Option
167+
result = collection.remove({"borough": "Queens"}, {'$justOne': True})
168+
169+
# Total count of all value from collection
170+
count = collection.find().count()
171+
pprint.pprint('Total documents - {}'.format(count))
172+
173+
# Total count value from collection
174+
count = collection.find({"restaurant_id": "41704620"}).count()
175+
pprint.pprint('Total documents found with {} - {}'.format({"restaurant_id": "41704620"}, count))
176+
177+
# Group Documents by a Field and Calculate Count
178+
'''
179+
Use the $group stage to group by a specified key. In the $group stage,
180+
specify the group by key in the _id field. $group accesses fields by the
181+
field path, which is the field name prefixed by a dollar sign $.
182+
The $group stage can use accumulators to perform calculations for each group.
183+
The following example groups the documents in the restaurants collection by the
184+
borough field and uses the $sum accumulator to count the documents for each group.
185+
'''
186+
result = collection.aggregate([{'$group': {"_id": "$borough", "count": {'$sum': 1}}}])
187+
188+
189+
# Filter and Group Documents
190+
''' The _id field contains the distinct zipcode value, i.e., the group by key value. '''
191+
result = collection.aggregate([
192+
{'$match': {"borough": "Brooklyn"}},
193+
{'$group': {"_id": "$address.zipcode", "count": {'$sum': 1}}}])
194+
195+
196+
# $in operator for getting matching documents
197+
borough = ['Missing', 'Manhattan']
198+
result = collection.find({"borough": {'$in': borough}})
199+
200+
201+
# get overall database, collection information
202+
details = dict((db, [collection for collection in client[db].collection_names()])
203+
for db in client.database_names())
204+
pprint.pprint(details)
205+
206+
207+
import pandas as pd
208+
209+
210+
def posts_2_df(iterator, chunk_size=1000):
211+
"""
212+
Turn an iterator into multiple small pandas.DataFrame
213+
This is a balance between memory and efficiency
214+
"""
215+
records = []
216+
frames = []
217+
for index, record in enumerate(iterator):
218+
records.append(record)
219+
if index % chunk_size == chunk_size - 1:
220+
frames.append(pd.DataFrame(records))
221+
records = []
222+
if records:
223+
frames.append(pd.DataFrame(records))
224+
return pd.concat(frames)
225+
226+
result = collection.find({'grades.date': {'$gte': start_date, '$lte': end_date}})
227+
data_frame = posts_2_df(iterator=result, chunk_size=10000)
228+
print(data_frame.head())

0 commit comments

Comments
 (0)