Skip to content

Commit 1b1819b

Browse files
authored
Move org deletion to background job with access to backend ops classes (#2098)
This PR introduces background jobs that have full access to the backend ops classes and moves the delete org job to a background job.
1 parent 84a74c4 commit 1b1819b

File tree

10 files changed

+404
-16
lines changed

10 files changed

+404
-16
lines changed

backend/btrixcloud/background_jobs.py

+72-7
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""k8s background jobs"""
22

33
import asyncio
4+
import os
45
from datetime import datetime
56
from typing import Optional, Tuple, Union, List, Dict, TYPE_CHECKING, cast
67
from uuid import UUID
@@ -19,6 +20,7 @@
1920
BgJobType,
2021
CreateReplicaJob,
2122
DeleteReplicaJob,
23+
DeleteOrgJob,
2224
PaginatedBackgroundJobResponse,
2325
AnyJob,
2426
StorageRef,
@@ -273,6 +275,51 @@ async def create_delete_replica_job(
273275
)
274276
return None
275277

278+
async def create_delete_org_job(
279+
self,
280+
org: Organization,
281+
existing_job_id: Optional[str] = None,
282+
) -> Optional[str]:
283+
"""Create background job to delete org and its data"""
284+
285+
try:
286+
job_id = await self.crawl_manager.run_delete_org_job(
287+
oid=str(org.id),
288+
backend_image=os.environ.get("BACKEND_IMAGE", ""),
289+
pull_policy=os.environ.get("BACKEND_IMAGE_PULL_POLICY", ""),
290+
existing_job_id=existing_job_id,
291+
)
292+
if existing_job_id:
293+
delete_org_job = await self.get_background_job(existing_job_id, org.id)
294+
previous_attempt = {
295+
"started": delete_org_job.started,
296+
"finished": delete_org_job.finished,
297+
}
298+
if delete_org_job.previousAttempts:
299+
delete_org_job.previousAttempts.append(previous_attempt)
300+
else:
301+
delete_org_job.previousAttempts = [previous_attempt]
302+
delete_org_job.started = dt_now()
303+
delete_org_job.finished = None
304+
delete_org_job.success = None
305+
else:
306+
delete_org_job = DeleteOrgJob(
307+
id=job_id,
308+
oid=org.id,
309+
started=dt_now(),
310+
)
311+
312+
await self.jobs.find_one_and_update(
313+
{"_id": job_id}, {"$set": delete_org_job.to_dict()}, upsert=True
314+
)
315+
316+
return job_id
317+
# pylint: disable=broad-exception-caught
318+
except Exception as exc:
319+
# pylint: disable=raise-missing-from
320+
print(f"warning: delete org job could not be started: {exc}")
321+
return None
322+
276323
async def job_finished(
277324
self,
278325
job_id: str,
@@ -316,10 +363,13 @@ async def job_finished(
316363
)
317364

318365
async def get_background_job(
319-
self, job_id: str, oid: UUID
320-
) -> Union[CreateReplicaJob, DeleteReplicaJob]:
366+
self, job_id: str, oid: Optional[UUID] = None
367+
) -> Union[CreateReplicaJob, DeleteReplicaJob, DeleteOrgJob]:
321368
"""Get background job"""
322-
query: dict[str, object] = {"_id": job_id, "oid": oid}
369+
query: dict[str, object] = {"_id": job_id}
370+
if oid:
371+
query["oid"] = oid
372+
323373
res = await self.jobs.find_one(query)
324374
if not res:
325375
raise HTTPException(status_code=404, detail="job_not_found")
@@ -331,9 +381,10 @@ def _get_job_by_type_from_data(self, data: dict[str, object]):
331381
if data["type"] == BgJobType.CREATE_REPLICA:
332382
return CreateReplicaJob.from_dict(data)
333383

334-
return DeleteReplicaJob.from_dict(data)
384+
if data["type"] == BgJobType.DELETE_REPLICA:
385+
return DeleteReplicaJob.from_dict(data)
335386

336-
# return BackgroundJob.from_dict(data)
387+
return DeleteOrgJob.from_dict(data)
337388

338389
async def list_background_jobs(
339390
self,
@@ -432,9 +483,8 @@ async def retry_background_job(
432483
if job.success:
433484
raise HTTPException(status_code=400, detail="job_already_succeeded")
434485

435-
file = await self.get_replica_job_file(job, org)
436-
437486
if job.type == BgJobType.CREATE_REPLICA:
487+
file = await self.get_replica_job_file(job, org)
438488
primary_storage = self.storage_ops.get_org_storage_by_ref(org, file.storage)
439489
primary_endpoint, bucket_suffix = self.strip_bucket(
440490
primary_storage.endpoint_url
@@ -452,6 +502,7 @@ async def retry_background_job(
452502
)
453503

454504
if job.type == BgJobType.DELETE_REPLICA:
505+
file = await self.get_replica_job_file(job, org)
455506
await self.create_delete_replica_job(
456507
org,
457508
file,
@@ -461,6 +512,12 @@ async def retry_background_job(
461512
existing_job_id=job_id,
462513
)
463514

515+
if job.type == BgJobType.DELETE_ORG:
516+
await self.create_delete_org_job(
517+
org,
518+
existing_job_id=job_id,
519+
)
520+
464521
return {"success": True}
465522

466523
async def retry_failed_background_jobs(
@@ -523,6 +580,14 @@ async def get_background_job(
523580
"""Retrieve information for background job"""
524581
return await ops.get_background_job(job_id, org.id)
525582

583+
@app.get("/orgs/all/jobs/{job_id}", response_model=SuccessResponse, tags=["jobs"])
584+
async def get_background_job_all_orgs(job_id: str, user: User = Depends(user_dep)):
585+
"""Get background job from any org"""
586+
if not user.is_superuser:
587+
raise HTTPException(status_code=403, detail="Not Allowed")
588+
589+
return await ops.get_background_job(job_id)
590+
526591
@router.post("/{job_id}/retry", response_model=SuccessResponse)
527592
async def retry_background_job(
528593
job_id: str,

backend/btrixcloud/crawlmanager.py

+30
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
# ============================================================================
1818
DEFAULT_PROXY_ID: str = os.environ.get("DEFAULT_PROXY_ID", "")
1919

20+
DEFAULT_NAMESPACE: str = os.environ.get("DEFAULT_NAMESPACE", "default")
21+
2022

2123
# ============================================================================
2224
class CrawlManager(K8sAPI):
@@ -110,6 +112,34 @@ async def run_replica_job(
110112

111113
return job_id
112114

115+
async def run_delete_org_job(
116+
self,
117+
oid: str,
118+
backend_image: str,
119+
pull_policy: str,
120+
existing_job_id: Optional[str] = None,
121+
):
122+
"""run job to delete org and all of its data"""
123+
124+
if existing_job_id:
125+
job_id = existing_job_id
126+
else:
127+
job_id = f"delete-org-{oid}-{secrets.token_hex(5)}"
128+
129+
params = {
130+
"id": job_id,
131+
"oid": oid,
132+
"job_type": BgJobType.DELETE_ORG.value,
133+
"backend_image": backend_image,
134+
"pull_policy": pull_policy,
135+
}
136+
137+
data = self.templates.env.get_template("background_job.yaml").render(params)
138+
139+
await self.create_from_yaml(data, namespace=DEFAULT_NAMESPACE)
140+
141+
return job_id
142+
113143
async def create_crawl_job(
114144
self,
115145
crawlconfig: CrawlConfig,

backend/btrixcloud/main.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ def main() -> None:
244244

245245
init_uploads_api(*base_crawl_init)
246246

247-
org_ops.set_ops(base_crawl_ops, profiles, coll_ops)
247+
org_ops.set_ops(base_crawl_ops, profiles, coll_ops, background_job_ops)
248248

249249
user_manager.set_ops(org_ops, crawl_config_ops, base_crawl_ops)
250250

backend/btrixcloud/main_bg.py

+144
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
""" entrypoint module for background jobs """
2+
3+
import asyncio
4+
import os
5+
import sys
6+
import traceback
7+
from uuid import UUID
8+
9+
from .crawlmanager import CrawlManager
10+
from .db import init_db
11+
from .emailsender import EmailSender
12+
13+
# from .utils import register_exit_handler
14+
from .models import BgJobType
15+
16+
from .basecrawls import BaseCrawlOps
17+
from .invites import InviteOps
18+
from .users import init_user_manager
19+
from .orgs import OrgOps
20+
from .colls import CollectionOps
21+
from .crawlconfigs import CrawlConfigOps
22+
from .crawls import CrawlOps
23+
from .profiles import ProfileOps
24+
from .storages import StorageOps
25+
from .webhooks import EventWebhookOps
26+
from .background_jobs import BackgroundJobOps
27+
from .pages import PageOps
28+
29+
job_type = os.environ.get("BG_JOB_TYPE")
30+
oid = os.environ.get("OID")
31+
32+
33+
# ============================================================================
34+
# pylint: disable=too-many-function-args, duplicate-code, too-many-locals
35+
async def main():
36+
"""main init"""
37+
email = EmailSender()
38+
crawl_manager = None
39+
40+
dbclient, mdb = init_db()
41+
42+
invite_ops = InviteOps(mdb, email)
43+
44+
user_manager = init_user_manager(mdb, email, invite_ops)
45+
46+
org_ops = OrgOps(mdb, invite_ops, user_manager)
47+
48+
event_webhook_ops = EventWebhookOps(mdb, org_ops)
49+
50+
# pylint: disable=import-outside-toplevel
51+
if not os.environ.get("KUBERNETES_SERVICE_HOST"):
52+
print(
53+
"Sorry, the Browsertrix Backend must be run inside a Kubernetes environment.\
54+
Kubernetes not detected (KUBERNETES_SERVICE_HOST is not set), Exiting"
55+
)
56+
sys.exit(1)
57+
58+
crawl_manager = CrawlManager()
59+
60+
storage_ops = StorageOps(org_ops, crawl_manager)
61+
62+
background_job_ops = BackgroundJobOps(
63+
mdb, email, user_manager, org_ops, crawl_manager, storage_ops
64+
)
65+
66+
profile_ops = ProfileOps(
67+
mdb, org_ops, crawl_manager, storage_ops, background_job_ops
68+
)
69+
70+
crawl_config_ops = CrawlConfigOps(
71+
dbclient,
72+
mdb,
73+
user_manager,
74+
org_ops,
75+
crawl_manager,
76+
profile_ops,
77+
)
78+
79+
coll_ops = CollectionOps(mdb, crawl_manager, org_ops, event_webhook_ops)
80+
81+
base_crawl_ops = BaseCrawlOps(
82+
mdb,
83+
user_manager,
84+
org_ops,
85+
crawl_config_ops,
86+
coll_ops,
87+
storage_ops,
88+
event_webhook_ops,
89+
background_job_ops,
90+
)
91+
92+
crawl_ops = CrawlOps(
93+
crawl_manager,
94+
mdb,
95+
user_manager,
96+
org_ops,
97+
crawl_config_ops,
98+
coll_ops,
99+
storage_ops,
100+
event_webhook_ops,
101+
background_job_ops,
102+
)
103+
104+
page_ops = PageOps(mdb, crawl_ops, org_ops, storage_ops)
105+
106+
base_crawl_ops.set_page_ops(page_ops)
107+
crawl_ops.set_page_ops(page_ops)
108+
109+
background_job_ops.set_ops(crawl_ops, profile_ops)
110+
111+
org_ops.set_ops(base_crawl_ops, profile_ops, coll_ops, background_job_ops)
112+
113+
user_manager.set_ops(org_ops, crawl_config_ops, base_crawl_ops)
114+
115+
background_job_ops.set_ops(base_crawl_ops, profile_ops)
116+
117+
crawl_config_ops.set_coll_ops(coll_ops)
118+
119+
# Run job
120+
if job_type == BgJobType.DELETE_ORG:
121+
if not oid:
122+
print("Org id missing, quitting")
123+
return 1
124+
org = await org_ops.get_org_by_id(UUID(oid))
125+
if not org:
126+
print("Org id invalid, quitting")
127+
return 1
128+
129+
try:
130+
await org_ops.delete_org_and_data(org, user_manager)
131+
return 0
132+
# pylint: disable=broad-exception-caught
133+
except Exception:
134+
traceback.print_exc()
135+
return 1
136+
137+
print(f"Provided job type {job_type} not currently supported")
138+
return 1
139+
140+
141+
# # ============================================================================
142+
if __name__ == "__main__":
143+
return_code = asyncio.run(main())
144+
sys.exit(return_code)

backend/btrixcloud/models.py

+18-1
Original file line numberDiff line numberDiff line change
@@ -2013,6 +2013,7 @@ class BgJobType(str, Enum):
20132013

20142014
CREATE_REPLICA = "create-replica"
20152015
DELETE_REPLICA = "delete-replica"
2016+
DELETE_ORG = "delete-org"
20162017

20172018

20182019
# ============================================================================
@@ -2051,10 +2052,19 @@ class DeleteReplicaJob(BackgroundJob):
20512052
replica_storage: StorageRef
20522053

20532054

2055+
# ============================================================================
2056+
class DeleteOrgJob(BackgroundJob):
2057+
"""Model for tracking deletion of org data jobs"""
2058+
2059+
type: Literal[BgJobType.DELETE_ORG] = BgJobType.DELETE_ORG
2060+
2061+
20542062
# ============================================================================
20552063
# Union of all job types, for response model
20562064

2057-
AnyJob = RootModel[Union[CreateReplicaJob, DeleteReplicaJob, BackgroundJob]]
2065+
AnyJob = RootModel[
2066+
Union[CreateReplicaJob, DeleteReplicaJob, BackgroundJob, DeleteOrgJob]
2067+
]
20582068

20592069

20602070
# ============================================================================
@@ -2274,6 +2284,13 @@ class DeletedResponse(BaseModel):
22742284
deleted: bool
22752285

22762286

2287+
# ============================================================================
2288+
class DeletedResponseId(DeletedResponse):
2289+
"""Response for delete API endpoints that return job id"""
2290+
2291+
id: str
2292+
2293+
22772294
# ============================================================================
22782295
class DeletedResponseQuota(DeletedResponse):
22792296
"""Response for delete API endpoints"""

0 commit comments

Comments
 (0)