|
3 | 3 | """
|
4 | 4 | import asyncio
|
5 | 5 | import itertools
|
6 |
| -from typing import Dict, FrozenSet |
| 6 | +from typing import Dict, FrozenSet, Set, Tuple |
7 | 7 | from typing import cast
|
8 | 8 |
|
9 | 9 | import typer
|
| 10 | +from aleph_message.models import ItemHash |
10 | 11 | from configmanager import Config
|
11 | 12 |
|
12 | 13 | import aleph.model
|
| 14 | +import aleph.services.p2p.singleton as singleton |
| 15 | +from aleph import config as aleph_config |
13 | 16 | from aleph.ccn_cli.cli_config import CliConfig
|
14 | 17 | from aleph.config import get_defaults
|
| 18 | +from aleph.exceptions import ContentCurrentlyUnavailable |
15 | 19 | from aleph.model import init_db_globals
|
| 20 | +from aleph.services.p2p import http |
| 21 | +from aleph.storage import get_hash_content |
16 | 22 | from .toolkit.local_storage import list_expected_local_files
|
17 | 23 |
|
18 | 24 | repair_ns = typer.Typer()
|
19 | 25 |
|
20 | 26 |
|
21 |
| - |
22 |
| -def print_files_to_preserve(files_to_preserve: Dict[str, FrozenSet[str]]) -> None: |
23 |
| - typer.echo("The following files will be preserved:") |
24 |
| - for file_type, files in files_to_preserve.items(): |
25 |
| - typer.echo(f"* {len(files)} {file_type}") |
| 27 | +async def init_api_servers(): |
| 28 | + peers = [peer async for peer in aleph.model.db["peers"].find({"type": "HTTP"})] |
| 29 | + singleton.api_servers = [peer["address"] for peer in peers] |
26 | 30 |
|
27 | 31 |
|
28 | 32 | async def list_missing_files() -> FrozenSet[str]:
|
29 |
| - # Get a set of all the files currently in GridFS |
30 |
| - gridfs_files_dict = { |
31 |
| - file["filename"]: file |
32 |
| - async for file in aleph.model.db["fs.files"].find( |
33 |
| - projection={"_id": 0, "filename": 1, "length": 1, "uploadDate": 1}, |
34 |
| - batch_size=1000, |
35 |
| - ) |
36 |
| - } |
| 33 | + if aleph.model.db is None: # for mypy |
| 34 | + raise ValueError("DB not initialized as expected.") |
37 | 35 |
|
38 |
| - gridfs_files = frozenset(gridfs_files_dict.keys()) |
39 |
| - typer.echo(f"Found {len(gridfs_files_dict)} files in local storage.") |
| 36 | + # Get a set of all the files currently in GridFS |
| 37 | + gridfs_files = frozenset( |
| 38 | + [ |
| 39 | + file["filename"] |
| 40 | + async for file in aleph.model.db["fs.files"].find( |
| 41 | + projection={"_id": 0, "filename": 1}, |
| 42 | + batch_size=1000, |
| 43 | + ) |
| 44 | + ] |
| 45 | + ) |
| 46 | + |
| 47 | + typer.echo(f"Found {len(gridfs_files)} files in local storage.") |
40 | 48 |
|
41 | 49 | expected_local_files_dict = await list_expected_local_files()
|
42 |
| - expected_local_files = frozenset(itertools.chain.from_iterable(expected_local_files_dict.values())) |
| 50 | + expected_local_files = frozenset( |
| 51 | + itertools.chain.from_iterable(expected_local_files_dict.values()) |
| 52 | + ) |
43 | 53 |
|
44 | 54 | missing_files = expected_local_files - gridfs_files
|
45 | 55 | return missing_files
|
46 | 56 |
|
47 | 57 |
|
| 58 | +async def fetch_and_store_file(filename: str): |
| 59 | + item_hash = ItemHash(filename) |
| 60 | + _ = await get_hash_content( |
| 61 | + content_hash=filename, |
| 62 | + engine=item_hash.item_type, |
| 63 | + use_network=True, |
| 64 | + use_ipfs=True, |
| 65 | + store_value=True, |
| 66 | + timeout=15, |
| 67 | + ) |
| 68 | + |
| 69 | + |
| 70 | +def process_results( |
| 71 | + finished_tasks: Set[asyncio.Task], task_dict: Dict[asyncio.Task, str] |
| 72 | +) -> Tuple[Set[str], Set[str]]: |
| 73 | + fetched_files = set() |
| 74 | + failed_files = set() |
| 75 | + |
| 76 | + for task in finished_tasks: |
| 77 | + filename = task_dict.pop(task) |
| 78 | + exception = task.exception() |
| 79 | + |
| 80 | + if exception is None: |
| 81 | + fetched_files.add(filename) |
| 82 | + |
| 83 | + else: |
| 84 | + failed_files.add(filename) |
| 85 | + if isinstance(exception, ContentCurrentlyUnavailable): |
| 86 | + typer.echo( |
| 87 | + f"WARNING: Could not fetch {filename}: currently unavailable." |
| 88 | + ) |
| 89 | + else: |
| 90 | + typer.echo( |
| 91 | + f"ERROR: Could not fetch {filename}: unexpected error: {exception}" |
| 92 | + ) |
| 93 | + |
| 94 | + return fetched_files, failed_files |
| 95 | + |
| 96 | + |
| 97 | +async def fetch_files(missing_files: FrozenSet[str], batch_size: int): |
| 98 | + tasks = set() |
| 99 | + task_dict = {} |
| 100 | + |
| 101 | + fetched_files = set() |
| 102 | + failed_files = set() |
| 103 | + |
| 104 | + for i, filename in enumerate(missing_files, start=1): |
| 105 | + typer.echo(f"Fetching {filename} ({i}/{len(missing_files)})...") |
| 106 | + fetch_task = asyncio.create_task(fetch_and_store_file(filename)) |
| 107 | + tasks.add(fetch_task) |
| 108 | + task_dict[fetch_task] = filename |
| 109 | + |
| 110 | + if len(tasks) == batch_size: |
| 111 | + done, tasks = await asyncio.wait(tasks, return_when=asyncio.FIRST_COMPLETED) |
| 112 | + fetched, failed = process_results(done, task_dict) |
| 113 | + fetched_files |= fetched |
| 114 | + failed_files |= failed |
| 115 | + |
| 116 | + # Finish |
| 117 | + if tasks: |
| 118 | + done, _ = await asyncio.wait(tasks, return_when=asyncio.ALL_COMPLETED) |
| 119 | + fetched, failed = process_results(done, task_dict) |
| 120 | + fetched_files |= fetched |
| 121 | + failed_files |= failed |
| 122 | + |
| 123 | + typer.echo(f"Successfully fetched {len(fetched_files)} files.") |
| 124 | + if failed_files: |
| 125 | + typer.echo(f"WARNING: Failed to fetch {len(failed_files)} files.") |
| 126 | + |
| 127 | + |
48 | 128 | async def fetch_missing_files():
|
49 | 129 | missing_files = await list_missing_files()
|
50 | 130 | typer.echo(f"Found {len(missing_files)} missing files.")
|
51 | 131 |
|
| 132 | + await fetch_files(missing_files, 2000) |
| 133 | + |
52 | 134 |
|
53 | 135 | async def run(ctx: typer.Context):
|
54 | 136 | config = Config(schema=get_defaults())
|
55 | 137 | cli_config = cast(CliConfig, ctx.obj)
|
56 | 138 | config.yaml.load(str(cli_config.config_file_path))
|
57 | 139 |
|
| 140 | + # Set the config global variable, otherwise the IPFS client will not be initialized properly |
| 141 | + aleph_config.app_config = config |
| 142 | + |
58 | 143 | init_db_globals(config=config)
|
| 144 | + # To be able to fetch data from the network |
| 145 | + await init_api_servers() |
59 | 146 | if aleph.model.db is None: # for mypy
|
60 | 147 | raise ValueError("DB not initialized as expected.")
|
61 | 148 |
|
62 | 149 | await fetch_missing_files()
|
63 | 150 |
|
| 151 | + # Clean up aiohttp client sessions to avoid a warning |
| 152 | + for client_session in http.SESSIONS.values(): |
| 153 | + await client_session.close() |
| 154 | + |
64 | 155 | typer.echo("Done.")
|
65 | 156 |
|
66 | 157 |
|
|
0 commit comments