Skip to content

Extract ConfigLoaders, and add Github config loader for raw user content #144

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 74 additions & 40 deletions aws_doc_sdk_examples_tools/doc_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@

import yaml
import json
import requests

from abc import ABC, abstractmethod
from collections import defaultdict
from dataclasses import dataclass, field, fields, is_dataclass, asdict
from functools import reduce
from pathlib import Path
from typing import Dict, Iterable, Optional, Set, Tuple, List, Any
from typing import Dict, Iterable, Optional, Set, Tuple, List, Any, Union

# from os import glob

Expand Down Expand Up @@ -47,6 +49,44 @@ class DocGenMergeWarning(MetadataError):
pass


class ConfigLoader(ABC):
@abstractmethod
def load(self, filename: str) -> Tuple[Path, Any]:
pass


class NoneLoader(ConfigLoader):
def load(self, filename: str) -> Tuple[Path, Any]:
return Path(filename), yaml.safe_load("")


class FileLoader(ConfigLoader):
def __init__(self, root: Optional[Path] = None):
self.config = root or Path(__file__).parent / "config"

def load(self, filename: str) -> Tuple[Path, Any]:
path = self.config / filename
with path.open(encoding="utf-8") as file:
return path, yaml.safe_load(file)


class GitHubLoader(ConfigLoader):
def __init__(self, repo: Optional[str] = None, commit: Optional[str] = None):
self.repo = repo or "awsdocs/aws-doc-sdk-examples-tools"
self.commit = (
commit or "refs/heads/main"
) # or refs/tags/2025.07.0 or a specific SHA
self.path = f"{self.repo}/{self.commit}/aws_doc_sdk_examples_tools/config"

def load(self, filename: str) -> Tuple[Path, Any]:
path = f"{self.path}/{filename}"
url = f"https://raw.githubusercontent.com/{path}"
r = requests.get(url)
if r.status_code == 200:
return Path(path), yaml.safe_load(r.text)
raise Exception(f"Failed to request {url} ({r.status_code} {r.text})")


@dataclass
class DocGen:
root: Path
Expand Down Expand Up @@ -80,11 +120,12 @@ def collect_snippets(
self.snippets = snippets
self.errors.extend(errs)

def languages(self) -> Set[str]:
languages: Set[str] = set()
def languages(self) -> List[str]:
languages: List[str] = []
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why the switch to List?
Although I can't find where languages is used so it might not matter.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because I needed it in a consistent order somewhere, but now I don't remember where

for sdk_name, sdk in self.sdks.items():
for version in sdk.versions:
languages.add(f"{sdk_name}:{version.version}")
languages.append(f"{sdk_name}:{version.version}")
languages.sort()
return languages

def expand_entities(self, text: str) -> Tuple[str, EntityErrors]:
Expand Down Expand Up @@ -170,7 +211,9 @@ def empty(cls, validation: ValidationConfig = ValidationConfig()) -> "DocGen":

@classmethod
def default(cls) -> "DocGen":
return DocGen.empty().for_root(Path(__file__).parent, incremental=True)
return DocGen.empty().for_root(
Path(__file__).parent, GitHubLoader(), incremental=True
)

def clone(self) -> "DocGen":
return DocGen(
Expand All @@ -186,13 +229,9 @@ def clone(self) -> "DocGen":
examples={},
)

def for_root(
self, root: Path, config: Optional[Path] = None, incremental=False
) -> "DocGen":
def for_root(self, root: Path, loader: ConfigLoader, incremental=False) -> "DocGen":
self.root = root

config = config or Path(__file__).parent / "config"

try:
with open(root / ".doc_gen" / "validation.yaml", encoding="utf-8") as file:
validation = yaml.safe_load(file)
Expand All @@ -203,46 +242,37 @@ def for_root(
pass

try:
sdk_path = config / "sdks.yaml"
with sdk_path.open(encoding="utf-8") as file:
meta = yaml.safe_load(file)
sdks, errs = parse_sdks(sdk_path, meta)
self.sdks = sdks
self.errors.extend(errs)
sdk_path, meta = loader.load("sdks.yaml")
sdks, errs = parse_sdks(sdk_path, meta)
self.sdks = sdks
self.errors.extend(errs)
except Exception:
pass

try:
services_path = config / "services.yaml"
with services_path.open(encoding="utf-8") as file:
meta = yaml.safe_load(file)
services, service_errors = parse_services(services_path, meta)
self.services = services
for service in self.services.values():
if service.expanded:
self.entities[service.long] = service.expanded.long
self.entities[service.short] = service.expanded.short
self.errors.extend(service_errors)
services_path, meta = loader.load("services.yaml")
services, service_errors = parse_services(services_path, meta)

self.services = services
for service in self.services.values():
if service.expanded:
self.entities[service.long] = service.expanded.long
self.entities[service.short] = service.expanded.short
self.errors.extend(service_errors)
except Exception:
pass

try:
categories_path = config / "categories.yaml"
with categories_path.open(encoding="utf-8") as file:
meta = yaml.safe_load(file)
standard_categories, categories, errs = parse_categories(
categories_path, meta
)
self.standard_categories = standard_categories
self.categories = categories
self.errors.extend(errs)
path, meta = loader.load("categories.yaml")
standard_categories, categories, errs = parse_categories(path, meta)
self.standard_categories = standard_categories
self.categories = categories
self.errors.extend(errs)
except Exception:
pass

try:
entities_config_path = config / "entities.yaml"
with entities_config_path.open(encoding="utf-8") as file:
entities_config = yaml.safe_load(file)
path, entities_config = loader.load("entities.yaml")
for entity, expanded in entities_config["expanded_override"].items():
self.entities[entity] = expanded
except Exception:
Expand Down Expand Up @@ -294,12 +324,16 @@ def process_metadata(self, path: Path) -> "DocGen":
def from_root(
cls,
root: Path,
config: Optional[Path] = None,
loader: Optional[Union[ConfigLoader, Path]] = None,
validation: ValidationConfig = ValidationConfig(),
incremental: bool = False,
) -> "DocGen":
if not loader:
loader = GitHubLoader()
if isinstance(loader, Path):
loader = FileLoader(loader)
return DocGen.empty(validation=validation).for_root(
root, config, incremental=incremental
root, loader, incremental=incremental
)

def validate(self):
Expand Down
6 changes: 3 additions & 3 deletions aws_doc_sdk_examples_tools/doc_gen_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import json

from .categories import Category, TitleInfo
from .doc_gen import DocGen, DocGenEncoder
from .doc_gen import DocGen, DocGenEncoder, FileLoader
from .metadata import Example
from .metadata_errors import MetadataErrors, MetadataError
from .sdks import Sdk, SdkVersion
Expand Down Expand Up @@ -65,7 +65,7 @@ def test_merge(a: DocGen, b: DocGen, d: DocGen):
def test_incremental():
errors = MetadataErrors()
doc_gen = DocGen(Path(), errors).for_root(
Path(__file__).parent / "test_resources", incremental=False
Path(__file__).parent / "test_resources", loader=FileLoader(), incremental=False
)
assert len(doc_gen.examples) == 0
doc_gen.process_metadata(doc_gen.root / "awsentity_metadata.yaml")
Expand Down Expand Up @@ -231,7 +231,7 @@ def test_doc_gen_encoder(sample_doc_gen: DocGen):
def test_doc_gen_load_snippets():
errors = MetadataErrors()
doc_gen = DocGen(Path(), errors).for_root(
Path(__file__).parent / "test_resources", incremental=False
Path(__file__).parent / "test_resources", loader=FileLoader(), incremental=False
)
doc_gen.process_metadata(doc_gen.root / "valid_metadata.yaml")
doc_gen.collect_snippets()
Expand Down
1 change: 1 addition & 0 deletions aws_doc_sdk_examples_tools/sdks.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ def parse(file: Path, yaml: Dict[str, Any]) -> tuple[Dict[str, Sdk], MetadataErr
from pathlib import Path

path = Path(__file__).parent.parent.parent / ".doc_gen" / "metadata" / "sdks.yaml"

with open(path) as file:
meta = yaml.safe_load(file)
examples, errors = parse(path, meta)
Expand Down
6 changes: 4 additions & 2 deletions aws_doc_sdk_examples_tools/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,16 @@
from pathlib import Path
from pprint import pformat

from .doc_gen import DocGen
from .doc_gen import DocGen, FileLoader


def main(roots: List[str]):
base = DocGen.empty()
for root in roots:
docgen_root = Path(root)
doc_gen = base.clone().for_root(docgen_root)
doc_gen = base.clone().for_root(
docgen_root, loader=FileLoader(Path(root) / ".doc_gen" / "config")
)
doc_gen.collect_snippets()
print(f"Root {docgen_root.name}")
stats = doc_gen.stats()
Expand Down
15 changes: 13 additions & 2 deletions aws_doc_sdk_examples_tools/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from pathlib import Path
from sys import exit

from .doc_gen import DocGen
from .doc_gen import DocGen, FileLoader
from .project_validator import check_files, verify_sample_files, ValidationConfig


Expand All @@ -32,11 +32,22 @@ def main():
"must have them.",
required=False,
)
parser.add_argument(
"--local_loader",
action="store_true",
default=False,
help="Use LocalConfigLoader, instead of GithubConfigLoader",
required=False,
)
args = parser.parse_args()
root_path = Path(args.root).resolve()

loader = FileLoader() if args.local_loader else None

doc_gen = DocGen.from_root(
root=root_path, validation=ValidationConfig(strict_titles=args.strict_titles)
root=root_path,
loader=loader,
validation=ValidationConfig(strict_titles=args.strict_titles),
)
doc_gen.collect_snippets(snippets_root=root_path)
doc_gen.validate()
Expand Down
Loading