Skip to content

Commit a67430e

Browse files
feat(data): persist data in git repo (#401)
1 parent 77db212 commit a67430e

File tree

12 files changed

+824
-207
lines changed

12 files changed

+824
-207
lines changed

README.md

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,9 @@ platforms such as GitHub discussions/issues might be added in the future.
4242
| DAILY_TASKS | False | `true` | Daily tasks on or off. |
4343
| DAILY_RELEASES | False | `true` | Send a message for each game released on this day in history. |
4444
| DAILY_CHANNEL_ID | False | `None` | Required if daily_tasks is enabled. |
45-
| DAILY_TASKS_UTC_HOUR | False | `12` | The hour to run daily tasks. |
45+
| DAILY_TASKS_UTC_HOUR | False | `12` | The hour to run daily tasks. |
46+
| DATA_REPO | False | `https://github.com/LizardByte/support-bot-data` | Repository to store persistent data. This repository should be private! |
47+
| DATA_REPO_BRANCH | False | `master` | Branch to store persistent data. |
4648
| DISCORD_BOT_TOKEN | True | `None` | Token from Bot page on discord developer portal. |
4749
| DISCORD_CLIENT_ID | True | `None` | Discord OAuth2 client id. |
4850
| DISCORD_CLIENT_SECRET | True | `None` | Discord OAuth2 client secret. |
@@ -58,11 +60,11 @@ platforms such as GitHub discussions/issues might be added in the future.
5860
| GRAVATAR_EMAIL | False | `None` | Gravatar email address for bot avatar. |
5961
| IGDB_CLIENT_ID | False | `None` | Required if daily_releases is enabled. |
6062
| IGDB_CLIENT_SECRET | False | `None` | Required if daily_releases is enabled. |
61-
| PRAW_CLIENT_ID | True | None | `client_id` from reddit app setup page. |
62-
| PRAW_CLIENT_SECRET | True | None | `client_secret` from reddit app setup page. |
63-
| PRAW_SUBREDDIT | True | None | Subreddit to monitor (reddit user should be moderator of the subreddit) |
64-
| REDDIT_USERNAME | True | None | Reddit username |
65-
| REDDIT_PASSWORD | True | None | Reddit password |
63+
| PRAW_CLIENT_ID | True | `None` | `client_id` from reddit app setup page. |
64+
| PRAW_CLIENT_SECRET | True | `None` | `client_secret` from reddit app setup page. |
65+
| PRAW_SUBREDDIT | True | `None` | Subreddit to monitor (reddit user should be moderator of the subreddit) |
66+
| REDDIT_USERNAME | True | `None` | Reddit username |
67+
| REDDIT_PASSWORD | True | `None` | Reddit password |
6668
| SUPPORT_COMMANDS_REPO | False | `https://github.com/LizardByte/support-bot-commands` | Repository for support commands. |
6769
| SUPPORT_COMMANDS_BRANCH | False | `master` | Branch for support commands. |
6870

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,4 @@ py-cord==2.6.1
99
python-dotenv==1.1.0
1010
requests==2.32.3
1111
requests-oauthlib==2.0.0
12+
tinydb==4.8.2

src/common/database.py

Lines changed: 255 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,271 @@
11
# standard imports
2+
import os
3+
from pathlib import Path
24
import shelve
35
import threading
6+
import traceback
7+
from typing import Union
8+
9+
# lib imports
10+
import git
11+
from tinydb import TinyDB
12+
from tinydb.storages import JSONStorage
13+
from tinydb.middlewares import CachingMiddleware
14+
15+
# local imports
16+
from src.common.common import data_dir
17+
18+
# Constants
19+
DATA_REPO_LOCK = threading.Lock()
420

521

622
class Database:
7-
def __init__(self, db_path):
8-
self.db_path = db_path
23+
def __init__(self, db_name: str, db_dir: Union[str, Path] = data_dir, use_git: bool = True):
24+
self.db_name = db_name
25+
self.db_dir = db_dir
26+
27+
# Check for CI environment
28+
is_ci = os.environ.get('GITHUB_PYTEST', '').lower() == 'true'
29+
30+
self.use_git = use_git and not is_ci
31+
32+
self.repo_url = None
33+
self.repo_branch = None
34+
if self.use_git:
35+
self.repo_url = os.getenv("DATA_REPO", "https://github.com/LizardByte/support-bot-data")
36+
self.repo_branch = os.getenv("DATA_REPO_BRANCH", "master")
37+
self.db_dir = os.path.join(self.db_dir, "support-bot-data")
38+
39+
if not os.path.exists(self.db_dir):
40+
# Clone repo if it doesn't exist
41+
print(f"Cloning repository {self.repo_url} to {self.db_dir}")
42+
try:
43+
# Try cloning with the specified branch
44+
self.repo = git.Repo.clone_from(self.repo_url, self.db_dir, branch=self.repo_branch)
45+
except git.exc.GitCommandError as e:
46+
# Check if the error is due to branch not found
47+
if "Remote branch" in str(e) and "not found in upstream origin" in str(e):
48+
print(f"Branch '{self.repo_branch}' not found in remote. Creating a new empty branch.")
49+
# Clone with default branch first
50+
self.repo = git.Repo.clone_from(self.repo_url, self.db_dir)
51+
52+
# Create a new orphan branch (not based on any other branch)
53+
self.repo.git.checkout('--orphan', self.repo_branch)
54+
55+
# Clear the index and working tree
56+
try:
57+
self.repo.git.rm('-rf', '.', '--cached')
58+
except git.exc.GitCommandError:
59+
# This might fail if there are no files yet, which is fine
60+
pass
61+
62+
# Remove all files in the directory except .git
63+
for item in os.listdir(self.db_dir):
64+
if item != '.git':
65+
item_path = os.path.join(self.db_dir, item)
66+
if os.path.isdir(item_path):
67+
import shutil
68+
shutil.rmtree(item_path)
69+
else:
70+
os.remove(item_path)
71+
72+
# Create empty .gitkeep file to ensure the branch can be committed
73+
gitkeep_path = os.path.join(self.db_dir, '.gitkeep')
74+
with open(gitkeep_path, 'w'):
75+
pass
76+
77+
# Add and commit the .gitkeep file
78+
self.repo.git.add(gitkeep_path)
79+
self.repo.git.commit('-m', f"Initialize empty branch '{self.repo_branch}'")
80+
81+
# Push the new branch to remote
82+
try:
83+
self.repo.git.push('--set-upstream', 'origin', self.repo_branch)
84+
print(f"Created and pushed new empty branch '{self.repo_branch}'")
85+
except git.exc.GitCommandError as e:
86+
print(f"Failed to push new branch: {str(e)}")
87+
# Continue anyway - we might not have push permissions
88+
else:
89+
# Re-raise if it's a different error
90+
raise
91+
else:
92+
# Use existing repo
93+
self.repo = git.Repo(self.db_dir)
94+
95+
# Make sure the correct branch is checked out
96+
if self.repo_branch not in [ref.name.split('/')[-1] for ref in self.repo.refs]:
97+
# Branch doesn't exist locally, check if it exists remotely
98+
try:
99+
self.repo.git.fetch('origin')
100+
remote_branches = [ref.name.split('/')[-1] for ref in self.repo.remote().refs]
101+
102+
if self.repo_branch in remote_branches:
103+
# Checkout existing remote branch
104+
self.repo.git.checkout(self.repo_branch)
105+
else:
106+
# Create new orphan branch
107+
self.repo.git.checkout('--orphan', self.repo_branch)
108+
self.repo.git.rm('-rf', '.', '--cached')
109+
110+
# Create empty .gitkeep file
111+
gitkeep_path = os.path.join(self.db_dir, '.gitkeep')
112+
with open(gitkeep_path, 'w'):
113+
pass
114+
115+
self.repo.git.add(gitkeep_path)
116+
self.repo.git.commit('-m', f"Initialize empty branch '{self.repo_branch}'")
117+
self.repo.git.push('--set-upstream', 'origin', self.repo_branch)
118+
print(f"Created and pushed new empty branch '{self.repo_branch}'")
119+
except git.exc.GitCommandError:
120+
print(f"Failed to work with branch '{self.repo_branch}'. Using current branch instead.")
121+
else:
122+
# Branch exists locally, make sure it's checked out
123+
self.repo.git.checkout(self.repo_branch)
124+
125+
self.json_path = os.path.join(self.db_dir, f"{self.db_name}.json")
126+
self.shelve_path = os.path.join(db_dir, self.db_name) # Shelve adds its own extensions
9127
self.lock = threading.Lock()
10128

129+
# Check if migration is needed before creating TinyDB instance
130+
self._check_for_migration()
131+
132+
# Initialize the TinyDB instance with CachingMiddleware
133+
self.tinydb = TinyDB(
134+
self.json_path,
135+
storage=CachingMiddleware(JSONStorage),
136+
indent=4,
137+
)
138+
139+
def _check_for_migration(self):
140+
# Check if migration is needed (shelve exists but json doesn't)
141+
# No extension is used on Linux
142+
shelve_exists = os.path.exists(f"{self.shelve_path}.dat") or os.path.exists(self.shelve_path)
143+
json_exists = os.path.exists(self.json_path)
144+
145+
if shelve_exists and not json_exists:
146+
print(f"Migrating database from shelve to TinyDB: {self.shelve_path}")
147+
self._migrate_from_shelve()
148+
149+
def _migrate_from_shelve(self):
150+
try:
151+
# Create a temporary database just for migration
152+
migration_db = TinyDB(
153+
self.json_path,
154+
storage=CachingMiddleware(JSONStorage),
155+
indent=4,
156+
)
157+
158+
# Determine if this is the Reddit database
159+
is_reddit_db = "reddit_bot" in self.db_name
160+
161+
# Open the shelve database
162+
with shelve.open(self.shelve_path) as shelve_db:
163+
# Process each key in the shelve database
164+
for key in shelve_db.keys():
165+
value = shelve_db[key]
166+
167+
# If value is a dict and looks like a collection of records
168+
if isinstance(value, dict) and all(isinstance(k, str) for k in value.keys()):
169+
table = migration_db.table(key)
170+
171+
# Insert each record into TinyDB with proper fields
172+
for record_id, record_data in value.items():
173+
if isinstance(record_data, dict):
174+
if is_reddit_db:
175+
# Check if it's a comment or submission
176+
is_comment = 'body' in record_data
177+
178+
if is_comment:
179+
# For comments
180+
simplified_record = {
181+
'reddit_id': record_data.get('id', record_id),
182+
'author': record_data.get('author'),
183+
'body': record_data.get('body'),
184+
'created_utc': record_data.get('created_utc', 0),
185+
'processed': record_data.get('processed', False),
186+
'slash_command': record_data.get('slash_command', {
187+
'project': None,
188+
'command': None,
189+
}),
190+
}
191+
else:
192+
# For submissions
193+
simplified_record = {
194+
'reddit_id': record_data.get('id', record_id),
195+
'title': record_data.get('title'),
196+
'selftext': record_data.get('selftext'),
197+
'author': str(record_data.get('author')),
198+
'created_utc': record_data.get('created_utc', 0),
199+
'permalink': record_data.get('permalink'),
200+
'url': record_data.get('url'),
201+
'link_flair_text': record_data.get('link_flair_text'),
202+
'link_flair_background_color': record_data.get(
203+
'link_flair_background_color'),
204+
'bot_discord': record_data.get('bot_discord', {
205+
'sent': False,
206+
'sent_utc': None,
207+
}),
208+
}
209+
210+
table.insert(simplified_record)
211+
else:
212+
# Non-Reddit databases keep original structure
213+
record_data['id'] = record_id
214+
table.insert(record_data)
215+
216+
# Flush changes to disk
217+
migration_db.storage.flush()
218+
migration_db.close()
219+
220+
print(f"Migration completed successfully: {self.json_path}")
221+
except Exception as e:
222+
print(f"Migration failed: {str(e)}")
223+
traceback.print_exc()
224+
11225
def __enter__(self):
12226
self.lock.acquire()
13-
self.db = shelve.open(self.db_path, writeback=True)
14-
return self.db
227+
return self.tinydb
15228

16229
def __exit__(self, exc_type, exc_val, exc_tb):
17230
self.sync()
18-
self.db.close()
19231
self.lock.release()
20232

21233
def sync(self):
22-
self.db.sync()
234+
# Only call flush if using CachingMiddleware
235+
if hasattr(self.tinydb.storage, 'flush'):
236+
self.tinydb.storage.flush()
237+
238+
# Git operations - commit and push changes if using git
239+
with DATA_REPO_LOCK:
240+
if self.use_git and self.repo is not None:
241+
try:
242+
# Check for untracked database files and tracked files with changes
243+
status = self.repo.git.status('--porcelain')
244+
245+
# If there are any changes or untracked files
246+
if status:
247+
# Add ALL json files in the directory to ensure we track all databases
248+
json_files = [f for f in os.listdir(self.db_dir) if f.endswith('.json')]
249+
if json_files:
250+
for json_file in json_files:
251+
file_path = os.path.join(self.db_dir, json_file)
252+
self.repo.git.add(file_path)
253+
254+
# Check if we have anything to commit after adding
255+
if self.repo.git.status('--porcelain'):
256+
# Commit all changes at once with a general message
257+
commit_message = "Update database files"
258+
self.repo.git.commit('-m', commit_message)
259+
print("Committed changes to git data repository")
260+
261+
# Push to remote
262+
try:
263+
origin = self.repo.remote('origin')
264+
origin.push()
265+
print("Pushed changes to remote git data repository")
266+
except git.exc.GitCommandError as e:
267+
print(f"Failed to push changes: {str(e)}")
268+
269+
except Exception as e:
270+
print(f"Git operation failed: {str(e)}")
271+
traceback.print_exc()

0 commit comments

Comments
 (0)