-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathcrawler_mongo.py
50 lines (38 loc) · 1.54 KB
/
crawler_mongo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
from datetime import datetime
import motor.motor_asyncio
from pymongo import ASCENDING
from crawler import TorrentCrawler
from utils import hexlify, decode_bytes
from bencode import bdecode
from utils import decode_bkeys
class TorrentCrawlerMongo(TorrentCrawler):
def __init__(self, db_url, db_name, **kwargs):
super().__init__(**kwargs)
client = motor.motor_asyncio.AsyncIOMotorClient(db_url)
self.db = client[db_name]
self.loop.run_until_complete(self.create_index())
async def create_index(self):
index = {
"name": "info_hash",
"keys": [("info_hash", ASCENDING)],
"unique": True
}
coll = self.db.torrents
if index["name"] not in await coll.index_information():
await coll.create_index(**index)
async def enqueue_torrent(self, info_hash):
if await self.db.torrents.count(filter={"info_hash": hexlify(info_hash)}) == 0:
await super(TorrentCrawlerMongo, self).enqueue_torrent(info_hash)
async def save_torrent_metadata(self, info_hash, metadata):
torrent = bdecode(metadata, decoder=decode_bkeys)
if "files" in torrent:
files = torrent["files"]
else:
files = [{"length": torrent["length"], "path": [torrent["name"]]}]
item = {
"info_hash": hexlify(info_hash),
"files": decode_bytes(files),
"name": decode_bytes(torrent["name"]),
"timestamp": datetime.now()
}
await self.db.torrents.insert_one(item)