diff --git a/LICENSE b/LICENSE deleted file mode 100644 index a3ef75b..0000000 --- a/LICENSE +++ /dev/null @@ -1,30 +0,0 @@ -BSD 3-Clause License - -Copyright (c) 2025, Scientific Python Developers -Copyright (c) 2024, Lars Grüter -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the vector package developers nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..a2438c0 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,93 @@ +Files: * +Copyright: 2025, Scientific Python Developers + 2024, Lars Grüter +License: BSD 3-Clause + +------------------------------------------------------------------------------- + +Files: src/docstub/_vendored/stdlib.py +Copyright: 2001-2025, Python Software Foundation +License: PSF-2.0 + +------------------------------------------------------------------------------- + +License: BSD-3-Clause + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the vector package developers nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +------------------------------------------------------------------------------- + +License: PSF-2.0 + +PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 +-------------------------------------------- + +1. This LICENSE AGREEMENT is between the Python Software Foundation +("PSF"), and the Individual or Organization ("Licensee") accessing and +otherwise using this software ("Python") in source or binary form and +its associated documentation. + +2. Subject to the terms and conditions of this License Agreement, PSF hereby +grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, +analyze, test, perform and/or display publicly, prepare derivative works, +distribute, and otherwise use Python alone or in any derivative version, +provided, however, that PSF's License Agreement and PSF's notice of copyright, +i.e., "Copyright (c) 2001 Python Software Foundation; All Rights Reserved" +are retained in Python alone or in any derivative version prepared by Licensee. + +3. In the event Licensee prepares a derivative work that is based on +or incorporates Python or any part thereof, and wants to make +the derivative work available to others as provided herein, then +Licensee hereby agrees to include in any such work a brief summary of +the changes made to Python. + +4. PSF is making Python available to Licensee on an "AS IS" +basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT +INFRINGE ANY THIRD PARTY RIGHTS. + +5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON +FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS +A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, +OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +6. This License Agreement will automatically terminate upon a material +breach of its terms and conditions. + +7. Nothing in this License Agreement shall be deemed to create any +relationship of agency, partnership, or joint venture between PSF and +Licensee. This License Agreement does not grant permission to use PSF +trademarks or trade name in a trademark sense to endorse or promote +products or services of Licensee, or any third party. + +8. By copying, installing or otherwise using Python, Licensee +agrees to be bound by the terms and conditions of this License +Agreement. diff --git a/docs/command_line.md b/docs/command_line.md index 8e7d92e..2e9d53a 100644 --- a/docs/command_line.md +++ b/docs/command_line.md @@ -43,6 +43,8 @@ Options: --config PATH Set one or more configuration file(s) explicitly. Otherwise, it will look for a `pyproject.toml` or `docstub.toml` in the current directory. + --ignore GLOB Ignore files matching this glob-style pattern. Can be + used multiple times. --group-errors Group identical errors together and list where they occurred. Will delay showing errors until all files have been processed. Otherwise, simply report errors as the diff --git a/docs/configuration.md b/docs/configuration.md new file mode 100644 index 0000000..a0a9024 --- /dev/null +++ b/docs/configuration.md @@ -0,0 +1,95 @@ +# Configuration reference + +Docstub will automatically look for configuration in files named + +- `pyproject.toml`, and +- `docstub.toml` + +in the current working directory. +If config files are explicitly passed to the command line interface via the `--config` option, docstub won't look implicitly look for files in the current directory. +Multiple configuration files can be used, whose content will be merged. + +Out of the box, docstub makes use of an internal configuration file [`numpy_config.toml`](../src/docstub/numpy_config.toml) which provides defaults to use NumPy types. + + +## Configuration fields in `[tool.docstub]` + +All configuration must be declared inside a `[tool.docstub]` table. + + +### `ignore_files` + +- [TOML type](https://toml.io/en/latest): array of string(s) + +Ignore files and directories matching these [glob-style patterns](https://docs.python.org/3/library/glob.html#glob.translate). +Patterns that don't start with "/" are interpreted as relative to the +directory that contains the Python package for which stubs are generated. + +Example: + +```toml +[tool.docstub] +ignore_files = [ + "**/tests", +] +``` + +- Will ignore any directory anywhere that is named `tests`. + + +### `types` + +- [TOML type](https://toml.io/en/latest): table, mapping string to string + +Types and their external modules to use in docstrings. +Docstub can't yet automatically discover where to import types from other packages from. +Instead, you can provide this information explicitly. +Any type on the left side will be associated with the given "module" on the right side. + +Example: + +```toml +[tool.docstub.types] +Path = "pathlib" +NDArray = "numpy.typing" +``` + +- Will allow using `Path` in docstrings and will use `from pathlib import Path` to import the type. +- Will allow using `NDarray` in docstrings and will use `from numpy.typing import NDArray` to import the type. + + +### `type_prefixes` + +- [TOML type](https://toml.io/en/latest): table, mapping string to string + +Prefixes for external modules to match types in docstrings. +Docstub can't yet automatically discover where to import types from other packages from. +Instead, you can provide this information explicitly. +Any type in a docstring whose prefix matches the name given on the left side, will be associated with the given "module" on the right side. + +Example: + +```toml +[tool.docstub.type_prefixes] +np = "numpy" +plt = "matplotlib.pyplot +``` + +- Will match `np.uint8` and `np.typing.NDarray` and use `import numpy as np`. +- Will match `plt.Figure` use `import matplotlib.pyplot as plt`. + + +### `type_nicknames` + +- [TOML type](https://toml.io/en/latest): table, mapping string to string + +Nicknames for types that can be used in docstrings to describe valid Python types or annotations. + +Example: + +```toml +[tool.docstub.type_nicknames] +func = "Callable" +``` + +- Will map `func` to the `Callable` type from the `typing` module. diff --git a/pyproject.toml b/pyproject.toml index 285f903..e55eb23 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ maintainers = [ description = "Generate Python stub files from docstrings" readme = "README.md" license = "BSD-3-Clause" -license-files = ["LICENSE"] +license-files = ["LICENSE.txt"] requires-python = ">=3.12" keywords = ["typing", "stub files", "docstings", "numpydoc"] classifiers = [ @@ -120,6 +120,7 @@ run.source = ["docstub"] Path = "pathlib" [tool.docstub.type_prefixes] +re = "re" cst = "libcst" lark = "lark" numpydoc = "numpydoc" diff --git a/src/docstub/_cli.py b/src/docstub/_cli.py index 33b137b..3bcbf76 100644 --- a/src/docstub/_cli.py +++ b/src/docstub/_cli.py @@ -15,13 +15,12 @@ ) from ._cache import FileCache from ._config import Config -from ._stubs import ( +from ._path_utils import ( STUB_HEADER_COMMENT, - Py2StubTransformer, - try_format_stub, walk_python_package, walk_source_and_targets, ) +from ._stubs import Py2StubTransformer, try_format_stub from ._utils import ErrorReporter, GroupedErrorReporter from ._version import __version__ @@ -43,22 +42,24 @@ def _load_configuration(config_paths=None): numpy_config = Config.from_toml(Config.NUMPY_PATH) config = config.merge(numpy_config) - pyproject_toml = Path.cwd() / "pyproject.toml" - if pyproject_toml.is_file(): - logger.info("using %s", pyproject_toml) - add_config = Config.from_toml(pyproject_toml) - config = config.merge(add_config) - - docstub_toml = Path.cwd() / "docstub.toml" - if docstub_toml.is_file(): - logger.info("using %s", docstub_toml) - add_config = Config.from_toml(docstub_toml) - config = config.merge(add_config) - - for path in config_paths: - logger.info("using %s", path) - add_config = Config.from_toml(path) - config = config.merge(add_config) + if config_paths: + for path in config_paths: + logger.info("using %s", path) + add_config = Config.from_toml(path) + config = config.merge(add_config) + + else: + pyproject_toml = Path.cwd() / "pyproject.toml" + if pyproject_toml.is_file(): + logger.info("using %s", pyproject_toml) + add_config = Config.from_toml(pyproject_toml) + config = config.merge(add_config) + + docstub_toml = Path.cwd() / "docstub.toml" + if docstub_toml.is_file(): + logger.info("using %s", docstub_toml) + add_config = Config.from_toml(docstub_toml) + config = config.merge(add_config) return config @@ -78,12 +79,17 @@ def _setup_logging(*, verbose): ) -def _collect_types(root_path): +def _collect_types(root_path, *, ignore=()): """Collect types. Parameters ---------- root_path : Path + ignore : Sequence[str], optional + Don't yield files matching these glob-like patterns. The pattern is + interpreted relative to the root of the Python package unless it starts + with "/". See :ref:`glob.translate(..., recursive=True, include_hidden=True)` + for more details on the precise implementation. Returns ------- @@ -98,7 +104,7 @@ def _collect_types(root_path): name=f"{__version__}/collected_types", ) if root_path.is_dir(): - for source_path in walk_python_package(root_path): + for source_path in walk_python_package(root_path, ignore=ignore): logger.info("collecting types in %s", source_path) types_in_source = collect_cached_types(source_path) types.update(types_in_source) @@ -162,6 +168,13 @@ def cli(): "Otherwise, it will look for a `pyproject.toml` or `docstub.toml` in the " "current directory.", ) +@click.option( + "--ignore", + type=str, + multiple=True, + metavar="GLOB", + help="Ignore files matching this glob-style pattern. Can be used multiple times.", +) @click.option( "--group-errors", is_flag=True, @@ -182,7 +195,7 @@ def cli(): @click.option("-v", "--verbose", count=True, help="Print more details (repeatable).") @click.help_option("-h", "--help") @report_execution_time() -def run(root_path, out_dir, config_paths, group_errors, allow_errors, verbose): +def run(root_path, out_dir, config_paths, ignore, group_errors, allow_errors, verbose): """Generate Python stub files. Given a `PACKAGE_PATH` to a Python package, generate stub files for it. @@ -194,7 +207,8 @@ def run(root_path, out_dir, config_paths, group_errors, allow_errors, verbose): ---------- root_path : Path out_dir : Path - config_paths : list[Path] + config_paths : Sequence[Path] + ignore : Sequence[str] group_errors : bool allow_errors : int verbose : str @@ -212,9 +226,10 @@ def run(root_path, out_dir, config_paths, group_errors, allow_errors, verbose): ) config = _load_configuration(config_paths) + config = config.merge(Config(ignore_files=list(ignore))) types = common_known_types() - types |= _collect_types(root_path) + types |= _collect_types(root_path, ignore=config.ignore_files) types |= { type_name: KnownImport(import_path=module, import_name=type_name) for type_name, module in config.types.items() @@ -245,7 +260,9 @@ def run(root_path, out_dir, config_paths, group_errors, allow_errors, verbose): # Stub generation --------------------------------------------------------- - for source_path, stub_path in walk_source_and_targets(root_path, out_dir): + for source_path, stub_path in walk_source_and_targets( + root_path, out_dir, ignore=config.ignore_files + ): if source_path.suffix.lower() == ".pyi": logger.debug("using existing stub file %s", source_path) with source_path.open() as fo: diff --git a/src/docstub/_config.py b/src/docstub/_config.py index d5df49a..401fa39 100644 --- a/src/docstub/_config.py +++ b/src/docstub/_config.py @@ -15,8 +15,9 @@ class Config: types: dict[str, str] = dataclasses.field(default_factory=dict) type_prefixes: dict[str, str] = dataclasses.field(default_factory=dict) type_nicknames: dict[str, str] = dataclasses.field(default_factory=dict) + ignore_files: list[str] = dataclasses.field(default_factory=list) - _source: tuple[Path, ...] = () + config_paths: tuple[Path, ...] = () @classmethod def from_toml(cls, path): @@ -33,7 +34,7 @@ def from_toml(cls, path): path = Path(path) with open(path, "rb") as fp: raw = tomllib.load(fp) - config = cls(**raw.get("tool", {}).get("docstub", {}), _source=(path,)) + config = cls(**raw.get("tool", {}).get("docstub", {}), config_paths=(path,)) logger.debug("created Config from %s", path) return config @@ -54,9 +55,10 @@ def merge(self, other): types=self.types | other.types, type_prefixes=self.type_prefixes | other.type_prefixes, type_nicknames=self.type_nicknames | other.type_nicknames, - _source=self._source + other._source, + ignore_files=self.ignore_files + other.ignore_files, + config_paths=self.config_paths + other.config_paths, ) - logger.debug("merged Config from %s", new._source) + logger.debug("merged Config from %s", new.config_paths) return new def to_dict(self): @@ -66,7 +68,7 @@ def __post_init__(self): self.validate(self.to_dict()) def __repr__(self) -> str: - sources = " | ".join(str(s) for s in self._source) + sources = " | ".join(str(s) for s in self.config_paths) formatted = f"<{type(self).__name__}: {sources}>" return formatted @@ -79,3 +81,11 @@ def validate(mapping): for key, value in table.items(): if not isinstance(key, str) or not isinstance(value, str): raise TypeError(f"`{key} = {value}` in {name} must both be a str") + + for name in ["ignore_files"]: + sequence = mapping[name] + if not isinstance(sequence, list): + raise TypeError(f"{name} must be a list") + for value in sequence: + if not isinstance(value, str): + raise TypeError(f"`{value}` in {name} must be a str") diff --git a/src/docstub/_docstrings.py b/src/docstub/_docstrings.py index c4aa13e..1591cd4 100644 --- a/src/docstub/_docstrings.py +++ b/src/docstub/_docstrings.py @@ -31,7 +31,7 @@ _lark = lark.Lark(_grammar, propagate_positions=True, strict=True) -def _find_one_token(tree: lark.Tree, *, name: str) -> lark.Token: +def _find_one_token(tree, *, name): """Find token with a specific type name in tree. Parameters diff --git a/src/docstub/_path_utils.py b/src/docstub/_path_utils.py new file mode 100644 index 0000000..f29a1b6 --- /dev/null +++ b/src/docstub/_path_utils.py @@ -0,0 +1,227 @@ +"""Utilities to work with Python packages on the file system.""" + +import logging +import re +import sys +from functools import lru_cache + +if sys.version_info >= (3, 13): + from glob import translate as glob_translate +else: + from ._vendored.stdlib import glob_translate + + +logger = logging.getLogger(__name__) + + +STUB_HEADER_COMMENT = "# File generated with docstub" + + +def is_docstub_generated(path): + """Check if the stub file was generated by docstub. + + Parameters + ---------- + path : Path + + Returns + ------- + is_generated : bool + """ + assert path.suffix == ".pyi" + with path.open("r") as fo: + content = fo.read() + if re.match(f"^{re.escape(STUB_HEADER_COMMENT)}", content): + return True + return False + + +def is_python_package(path): + """ + Parameters + ---------- + path : Path + + Returns + ------- + is_package : bool + + Examples + -------- + >>> from pathlib import Path + >>> is_python_package(Path(__file__)) + False + >>> is_python_package(Path(__file__).parent) + True + >>> is_python_package(Path(__file__).parent.parent) + False + """ + has_init = (path / "__init__.py").is_file() or (path / "__init__.pyi").is_file() + is_package = path.is_dir() and has_init + return is_package + + +def find_package_root(path): + """Determine the root a Python package from any path pointing inside it. + + Parameters + ---------- + path : Path + + Returns + ------- + package_root : Path + + Examples + -------- + >>> from pathlib import Path + >>> package_root = find_package_root(Path(__file__)) + >>> (package_root / "docstub").is_dir() + True + """ + root = path + if root.is_file(): + root = root.parent + + for _ in range(2**16): + if not is_python_package(root): + logger.debug("detected %s as the package root of %s", root, path) + return root + root = root.parent + + msg = f"exceeded iteration length while trying to find package root for {path}" + raise RuntimeError(msg) + + +@lru_cache(maxsize=10) +def glob_patterns_to_regex(patterns, relative_to=None): + r"""Combine glob-style patterns into a single regex. + + Parameters + ---------- + patterns : tuple of (str, ...), optional + relative_to : Path, optional + + Returns + ------- + regex : re.Pattern | None + + Examples + -------- + >>> from pathlib import Path + >>> glob_patterns_to_regex(("**/tests", "**/test_*.py")) # doctest: +SKIP + re.compile('(?s:(?:.+/)?tests)\\Z|(?s:(?:.+/)?test_[^/]*\\.py)\\Z') + + Use `relative_to` to make patterns absolute: + + >>> glob_patterns_to_regex(("**/tests",), relative_to=Path(__file__).parent # doctest: +SKIP + re.compile('(?s:/.../docstub/(?:.+/)?tests)\\Z') + + A glob prefixed with "/" isn't normalized with `relative_to`: + + >>> glob_patterns_to_regex(("/**/tests",), relative_to=Path(__file__).parent) # doctest: +SKIP + re.compile('(?s:/(?:.+/)?tests)\\Z') + """ + if relative_to: + + def prefix(pattern): + return str((relative_to / pattern).resolve()) + + patterns = tuple( + pattern if pattern.startswith("/") else prefix(pattern) + for pattern in patterns + ) + + if len(patterns) == 0: + return None + translated = ( + glob_translate(pattern, recursive=True, include_hidden=True) + for pattern in patterns + ) + joined = "|".join(translated) + regex = re.compile(f"{joined}") + return regex + + +def walk_python_package(root_dir, *, ignore=()): + """Iterate source files in a Python package. + + Given a Python package, yield the path of contained Python modules. If an + alternate stub file already exists and isn't generated by docstub, it is + returned instead. + + Parameters + ---------- + root_dir : Path + Root directory of a Python package. + ignore : Sequence[str], optional + Don't yield files matching these glob-like patterns. The pattern is + interpreted relative to the root of the Python package unless it starts + with "/". See :ref:`glob.translate(..., recursive=True, include_hidden=True)` + for more details on the precise implementation. + + Yields + ------ + source_path : Path + Either a Python file or a stub file that takes precedence. + """ + package_root = find_package_root(root_dir) + regex = glob_patterns_to_regex(tuple(ignore), relative_to=package_root) + + if regex and regex.match(str(root_dir)): + logger.info("ignoring %s", root_dir) + return + + for path in root_dir.iterdir(): + if regex and regex.match(str(path.resolve())): + logger.info("ignoring %s", path) + continue + if path.is_dir(): + if is_python_package(path): + yield from walk_python_package(path, ignore=ignore) + else: + logger.debug("skipping directory %s which isn't a Python package", path) + continue + + assert path.is_file() + suffix = path.suffix.lower() + + if suffix == ".py": + stub = path.with_suffix(".pyi") + if stub.exists() and not is_docstub_generated(stub): + # Non-generated stub file already exists and takes precedence + yield stub + else: + yield path + + +def walk_source_and_targets(root_path, target_dir, *, ignore=()): + """Iterate modules in a Python package and its target stub files. + + Parameters + ---------- + root_path : Path + Root directory of a Python package or a single Python file. + target_dir : Path + Root directory in which a matching stub package will be created. + ignore : Sequence[str], optional + Don't yield files matching these glob-like patterns. The pattern is + interpreted relative to the root of the Python package unless it starts + with "/". See :ref:`glob.translate(..., recursive=True, include_hidden=True)` + for more details on the precise implementation. + + Yields + ------ + source_path : Path + Either a Python file or a stub file that takes precedence. + stub_path : Path + Target stub file. + """ + if root_path.is_file(): + stub_path = target_dir / root_path.with_suffix(".pyi").name + yield root_path, stub_path + return + + for source_path in walk_python_package(root_path, ignore=ignore): + stub_path = target_dir / source_path.with_suffix(".pyi").relative_to(root_path) + yield source_path, stub_path diff --git a/src/docstub/_stubs.py b/src/docstub/_stubs.py index 801ffac..72ecfa0 100644 --- a/src/docstub/_stubs.py +++ b/src/docstub/_stubs.py @@ -7,7 +7,6 @@ import enum import logging -import re from dataclasses import dataclass from functools import wraps from typing import ClassVar @@ -22,106 +21,6 @@ logger = logging.getLogger(__name__) -STUB_HEADER_COMMENT = "# File generated with docstub" - - -def is_python_package(path): - """ - Parameters - ---------- - path : Path - - Returns - ------- - is_package : bool - """ - is_package = (path / "__init__.py").is_file() or (path / "__init__.pyi").is_file() - return is_package - - -def is_docstub_generated(path): - """Check if the stub file was generated by docstub. - - Parameters - ---------- - path : Path - - Returns - ------- - is_generated : bool - """ - assert path.suffix == ".pyi" - with path.open("r") as fo: - content = fo.read() - if re.match(f"^{re.escape(STUB_HEADER_COMMENT)}", content): - return True - return False - - -def walk_python_package(root_dir): - """Iterate source files in a Python package. - - Given a Python package, yield the path of contained Python modules. If an - alternate stub file already exists and isn't generated by docstub, it is - returned instead. - - Parameters - ---------- - root_dir : Path - Root directory of a Python package. - - Yields - ------ - source_path : Path - Either a Python file or a stub file that takes precedence. - """ - for path in root_dir.iterdir(): - if path.is_dir(): - if is_python_package(path): - yield from walk_python_package(path) - else: - logger.debug("skipping directory %s which isn't a Python package", path) - continue - - assert path.is_file() - suffix = path.suffix.lower() - - if suffix == ".py": - stub = path.with_suffix(".pyi") - if stub.exists() and not is_docstub_generated(stub): - # Non-generated stub file already exists and takes precedence - yield stub - else: - yield path - - -def walk_source_and_targets(root_path, target_dir): - """Iterate modules in a Python package and its target stub files. - - Parameters - ---------- - root_path : Path - Root directory of a Python package or a single Python file. - target_dir : Path - Root directory in which a matching stub package will be created. - - Yields - ------ - source_path : Path - Either a Python file or a stub file that takes precedence. - stub_path : Path - Target stub file. - """ - if root_path.is_file(): - stub_path = target_dir / root_path.with_suffix(".pyi").name - yield root_path, stub_path - return - - for source_path in walk_python_package(root_path): - stub_path = target_dir / source_path.with_suffix(".pyi").relative_to(root_path) - yield source_path, stub_path - - def try_format_stub(stub: str) -> str: """Try to format a stub file with isort and black if available.""" try: diff --git a/src/docstub/_vendored/__init__.py b/src/docstub/_vendored/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/docstub/_vendored/stdlib.py b/src/docstub/_vendored/stdlib.py new file mode 100644 index 0000000..cd0659c --- /dev/null +++ b/src/docstub/_vendored/stdlib.py @@ -0,0 +1,150 @@ +# Copyright: 2001-2025, Python Software Foundation +# License: PSF-2.0 +# +# See LICENSE.txt for the full license text + +import os +import re +from collections.abc import Sequence + + +# Vendored `fnmatch._translate` from Python 3.13.4 because it isn't available in +# earlier Python versions and needed for `glob_translate` below. Copied from +# https://github.com/python/cpython/blob/8a526ec7cbea8fafc9dae4b3dd6371906b9be342/Lib/fnmatch.py#L85-L154 +def _fnmatch_translate(pat: str, STAR: str, QUESTION_MARK: str) -> str: + res = [] + add = res.append + i, n = 0, len(pat) + while i < n: + c = pat[i] + i = i + 1 + if c == "*": + # compress consecutive `*` into one + if (not res) or res[-1] is not STAR: + add(STAR) + elif c == "?": + add(QUESTION_MARK) + elif c == "[": + j = i + if j < n and pat[j] == "!": + j = j + 1 + if j < n and pat[j] == "]": + j = j + 1 + while j < n and pat[j] != "]": + j = j + 1 + if j >= n: + add("\\[") + else: + stuff = pat[i:j] + if "-" not in stuff: + stuff = stuff.replace("\\", r"\\") + else: + chunks = [] + k = i + 2 if pat[i] == "!" else i + 1 + while True: + k = pat.find("-", k, j) + if k < 0: + break + chunks.append(pat[i:k]) + i = k + 1 + k = k + 3 + chunk = pat[i:j] + if chunk: + chunks.append(chunk) + else: + chunks[-1] += "-" + # Remove empty ranges -- invalid in RE. + for k in range(len(chunks) - 1, 0, -1): + if chunks[k - 1][-1] > chunks[k][0]: + chunks[k - 1] = chunks[k - 1][:-1] + chunks[k][1:] + del chunks[k] + # Escape backslashes and hyphens for set difference (--). + # Hyphens that create ranges shouldn't be escaped. + stuff = "-".join( + s.replace("\\", r"\\").replace("-", r"\-") for s in chunks + ) + # Escape set operations (&&, ~~ and ||). + stuff = re.sub(r"([&~|])", r"\\\1", stuff) + i = j + 1 + if not stuff: + # Empty range: never match. + add("(?!)") + elif stuff == "!": + # Negated empty range: match any character. + add(".") + else: + if stuff[0] == "!": + stuff = "^" + stuff[1:] + elif stuff[0] in ("^", "["): + stuff = "\\" + stuff + add(f"[{stuff}]") + else: + add(re.escape(c)) + assert i == n + return res + + +# Vendored `glob.translate` from Python 3.13.4 because it isn't available in +# earlier Python versions. Copied from +# https://github.com/python/cpython/blob/8a526ec7cbea8fafc9dae4b3dd6371906b9be342/Lib/glob.py#L267-L319 + + +def glob_translate( + pat: str, + *, + recursive: bool = False, + include_hidden: bool = False, + seps: Sequence[str] | None = None, +) -> str: + """Translate a pathname with shell wildcards to a regular expression. + + If `recursive` is true, the pattern segment '**' will match any number of + path segments. + + If `include_hidden` is true, wildcards can match path segments beginning + with a dot ('.'). + + If a sequence of separator characters is given to `seps`, they will be + used to split the pattern into segments and match path separators. If not + given, os.path.sep and os.path.altsep (where available) are used. + """ + if not seps: + if os.path.altsep: + seps = (os.path.sep, os.path.altsep) + else: + seps = os.path.sep + escaped_seps = "".join(map(re.escape, seps)) + any_sep = f"[{escaped_seps}]" if len(seps) > 1 else escaped_seps + not_sep = f"[^{escaped_seps}]" + if include_hidden: + one_last_segment = f"{not_sep}+" + one_segment = f"{one_last_segment}{any_sep}" + any_segments = f"(?:.+{any_sep})?" + any_last_segments = ".*" + else: + one_last_segment = f"[^{escaped_seps}.]{not_sep}*" + one_segment = f"{one_last_segment}{any_sep}" + any_segments = f"(?:{one_segment})*" + any_last_segments = f"{any_segments}(?:{one_last_segment})?" + + results = [] + parts = re.split(any_sep, pat) + last_part_idx = len(parts) - 1 + for idx, part in enumerate(parts): + if part == "*": + results.append(one_segment if idx < last_part_idx else one_last_segment) + elif recursive and part == "**": + if idx < last_part_idx: + if parts[idx + 1] != "**": + results.append(any_segments) + else: + results.append(any_last_segments) + else: + if part: + if not include_hidden and part[0] in "*?": + results.append(r"(?!\.)") + results.extend(_fnmatch_translate(part, f"{not_sep}*", not_sep)) + if idx < last_part_idx: + results.append(any_sep) + res = "".join(results) + return rf"(?s:{res})\Z" diff --git a/src/docstub/config_template.toml b/src/docstub/config_template.toml index a277ba8..192d677 100644 --- a/src/docstub/config_template.toml +++ b/src/docstub/config_template.toml @@ -1,33 +1,28 @@ [tool.docstub] + +# Ignore files and directories matching these glob-style patterns. +# Patterns that don't start with "/" are interpreted as relative to the +# directory that contains the Python package for which stubs are generated. +ignore_files = [] + + # Types and their external modules to use in docstrings. # Docstub can't yet automatically discover where to import types from other # packages from. Instead, you can provide this information explicitly. # Any type on the left side will be associated with the given "module" on the # right side. -# -# Examples: -# Path = "pathlib" -# Will allow using "Path" and use "from pathlib import Path". -# -# NDArray = "numpy.typing" -# Will allow "NDarray" and use "from numpy.typing import NDArray". [tool.docstub.types] + # Prefixes for external modules to match types in docstrings. # Docstub can't yet automatically discover where to import types from other # packages from. Instead, you can provide this information explicitly. # Any type in a docstring whose prefix matches the name given on the left side, # will be associated with the given "module" on the right side. -# -# Examples: -# np = "numpy" -# Will match `np.uint8` and `np.typing.NDarray` and use "import numpy as np". -# -# plt = "matplotlib.pyplot -# Will match `plt.Figure` use `import matplotlib.pyplot as plt`. [tool.docstub.type_prefixes] + # Nicknames for types that can be used in docstrings to describe valid Python # types or annotations. [tool.docstub.type_nicknames] diff --git a/tests/test_path_utils.py b/tests/test_path_utils.py new file mode 100644 index 0000000..7be7a2a --- /dev/null +++ b/tests/test_path_utils.py @@ -0,0 +1,30 @@ +from docstub._path_utils import walk_python_package + + +class Test_walk_python_package: + def test_ignore(self, tmp_path): + top_script = tmp_path / "script.py" + top_script.touch() + sub_package = tmp_path / "sub_package" + sub_package.mkdir() + sub_init = sub_package / "__init__.py" + sub_init.touch() + module_in_dir = sub_package / "module_in_dir.py" + module_in_dir.touch() + + paths = set(walk_python_package(tmp_path)) + assert paths == {top_script, sub_init, module_in_dir} + + paths = set(walk_python_package(tmp_path, ignore=["**/*.py"])) + assert paths == set() + + paths = set( + walk_python_package(tmp_path, ignore=["**/module*", "**/script.py"]) + ) + assert paths == {sub_init} + + paths = set(walk_python_package(tmp_path, ignore=["**/sub_package"])) + assert paths == {top_script} + + paths = set(walk_python_package(tmp_path, ignore=["**/*init*"])) + assert paths == {top_script, module_in_dir}