diff --git a/CHANGELOG.md b/CHANGELOG.md index 155bc2ee..25782a1f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,13 @@ # Unreleased: pdoc next + - **Add search functionality.** + pdoc now has a search bar which allows users to quickly + find relevant parts in the documentation. + See https://pdoc.dev/docs/pdoc/search.html for details. + - Redesign module list (index.html.jinja2). - Update Bootstrap to v5.0.0. + - Do not fail if `inspect.getdoc()` raises. # 2021-04-30: pdoc 6.6.0 diff --git a/pdoc/__init__.py b/pdoc/__init__.py index 71616e70..6c5a761b 100644 --- a/pdoc/__init__.py +++ b/pdoc/__init__.py @@ -381,12 +381,13 @@ def write(mod: doc.Module): def write(mod: doc.Module): retval.write(r(mod)) - all_modules = extract.parse_specs(modules) + module_names = extract.parse_specs(modules) + all_modules: dict[str, doc.Module] = {} if format == "html": def r(mod: doc.Module) -> str: - return render.html_module(module=mod, all_modules=all_modules) + return render.html_module(module=mod, all_modules=module_names) elif format == "markdown": # pragma: no cover raise NotImplementedError( @@ -397,7 +398,7 @@ def r(mod: doc.Module) -> str: else: raise ValueError(f"Invalid rendering format {format!r}.") - for mod in all_modules: + for mod in module_names: try: m = extract.load_module(mod) except RuntimeError: @@ -405,7 +406,8 @@ def r(mod: doc.Module) -> str: f"Error importing {mod}:\n{traceback.format_exc()}", RuntimeWarning ) else: - write(doc.Module(m)) + all_modules[mod] = doc.Module(m) + write(all_modules[mod]) if not output_directory: return retval.getvalue() @@ -417,4 +419,8 @@ def r(mod: doc.Module) -> str: if index: (output_directory / "index.html").write_bytes(index.encode()) + search = render.search_index(all_modules) + if search: + (output_directory / "search.json").write_bytes(search.encode()) + return retval.getvalue() diff --git a/pdoc/doc.py b/pdoc/doc.py index 32d4b20a..56a80c6a 100644 --- a/pdoc/doc.py +++ b/pdoc/doc.py @@ -131,8 +131,7 @@ def docstring(self) -> str: If no docstring can be found, an empty string is returned. """ - doc = inspect.getdoc(self.obj) or "" - return doc.strip() + return _safe_getdoc(self.obj) @cached_property def source(self) -> str: @@ -744,8 +743,7 @@ def docstring(self) -> str: cls = sys.modules.get(_safe_getattr(self.obj, "__module__", None), None) for name in _safe_getattr(self.obj, "__qualname__", "").split(".")[:-1]: cls = _safe_getattr(cls, name, None) - doc = inspect.getdoc(_safe_getattr(cls, self.name, None)) or "" - doc = doc.strip() + doc = _safe_getdoc(_safe_getattr(cls, self.name, None)) if doc == object.__init__.__doc__: # inspect.getdoc(Foo.__init__) returns the docstring, for object.__init__ if left undefined... @@ -1040,3 +1038,17 @@ def _safe_getattr(obj, attr, default): RuntimeWarning, ) return default + + +def _safe_getdoc(obj: Any) -> str: + """Like `inspect.getdoc()`, but never raises. Always returns a stripped string.""" + try: + doc = inspect.getdoc(obj) or "" + except Exception as e: + warnings.warn( + f"inspect.getdoc({obj!r}) raised an exception: {e}", + RuntimeWarning, + ) + return "" + else: + return doc.strip() diff --git a/pdoc/render.py b/pdoc/render.py index 3fdc727c..53fdf50c 100644 --- a/pdoc/render.py +++ b/pdoc/render.py @@ -1,12 +1,15 @@ from __future__ import annotations import os +import types from pathlib import Path from typing import Collection, Mapping, Optional +import jinja2 from jinja2 import Environment, FileSystemLoader import pdoc.doc +import pdoc.docstrings from pdoc._compat import Literal from pdoc.render_helpers import ( DefaultMacroExtension, @@ -16,8 +19,9 @@ link, linkify, minify_css, - render_docstring, + render_docstring_with_context, ) +from pdoc.search import make_index, precompile_index def configure( @@ -67,9 +71,7 @@ def html_module( This is only passed by `pdoc.web`. """ with defuse_unsafe_reprs(): - return env.select_template( - ["module.html.jinja2", "default/module.html.jinja2"] - ).render( + return env.get_template("module.html.jinja2").render( module=module, all_modules=all_modules, mtime=mtime, @@ -81,24 +83,42 @@ def html_module( def html_index(all_modules: Collection[str]) -> str: """Renders the module index.""" - return env.select_template( - ["index.html.jinja2", "default/index.html.jinja2"] - ).render( + return env.get_template("index.html.jinja2").render( all_modules=[m for m in all_modules if "._" not in m], ) def html_error(error: str, details: str = "") -> str: """Renders an error message.""" - return env.select_template( - ["index.html.jinja2", "default/error.html.jinja2"] - ).render( + return env.get_template("error.html.jinja2").render( error=error, details=details, ) -@defuse_unsafe_reprs() +def search_index(all_modules: dict[str, pdoc.doc.Module]) -> str: + """Renders the Elasticlunr.js search index.""" + # This is a rather terrible hack to determine if a given object is public and should be included in the index. + module_template: jinja2.Template = env.get_template("module.html.jinja2") + ctx: jinja2.runtime.Context = module_template.new_context( + {"module": pdoc.doc.Module(types.ModuleType("")), "all_modules": {}} + ) + for _ in module_template.root_render_func(ctx): # type: ignore + pass + + def is_public(x: pdoc.doc.Doc) -> bool: + return bool(ctx["is_public"](x).strip()) + + index = make_index( + all_modules, + is_public, + env.globals["docformat"], + ) + + compile_js = Path(env.get_template("build-search-index.js").filename) # type: ignore + return precompile_index(index, compile_js) + + def repr_module(module: pdoc.doc.Module) -> str: """Renders `repr(pdoc.doc.Module)`, primarily used for tests and debugging.""" with defuse_unsafe_reprs(): @@ -108,6 +128,7 @@ def repr_module(module: pdoc.doc.Module) -> str: _default_searchpath = [ Path(os.environ.get("XDG_CONFIG_HOME", "~/.config")).expanduser() / "pdoc", Path(__file__).parent / "templates", + Path(__file__).parent / "templates" / "default", ] env = Environment( @@ -122,7 +143,7 @@ def repr_module(module: pdoc.doc.Module) -> str: You can modify this object to add custom filters and globals. Examples can be found in this module's source code. """ -env.filters["render_docstring"] = render_docstring +env.filters["render_docstring"] = render_docstring_with_context env.filters["highlight"] = highlight env.filters["linkify"] = linkify env.filters["link"] = link diff --git a/pdoc/render_helpers.py b/pdoc/render_helpers.py index 8c6090cc..69b6a081 100644 --- a/pdoc/render_helpers.py +++ b/pdoc/render_helpers.py @@ -63,12 +63,19 @@ def _markdown(docstring: str) -> str: @contextfilter -def render_docstring(context: Context, docstring: str) -> str: +def render_docstring_with_context(context: Context, docstring: str) -> str: """ Converts `docstring` from a custom docformat to Markdown (if necessary), and then from Markdown to HTML. """ module: pdoc.doc.Module = context["module"] - docformat = getattr(module.obj, "__docformat__", context["docformat"]) or "" + docformat: str = context["docformat"] + return render_docstring(docstring, module, docformat) + + +def render_docstring( + docstring: str, module: pdoc.doc.Module, default_docformat: str +) -> str: + docformat = getattr(module.obj, "__docformat__", default_docformat) or "" docstring = docstrings.convert(docstring, docformat, module.source_file) return _markdown(docstring) diff --git a/pdoc/search.py b/pdoc/search.py new file mode 100644 index 00000000..d9647169 --- /dev/null +++ b/pdoc/search.py @@ -0,0 +1,129 @@ +""" +pdoc has a search box which allows users to quickly find relevant parts in the documentation. +This feature is implemented entirely client-side so that pdoc can still be hosted statically, +and works without any third-party services in a privacy-preserving way. When a user focuses the +search box for the first time, pdoc will fetch the search index (`search.json`) and use that to +answer all upcoming queries. + +##### Search Performance + +pdoc uses [Elasticlunr.js](https://github.com/weixsong/elasticlunr.js) to implement search. To improve end user +performance, pdoc will attempt to precompile the search index when building the documentation. This only works if +`nodejs` is available, and pdoc gracefully falls back to client-side index building if this is not the case. + +If your search index reaches a size where compilation times are meaningful and `nodejs` cannot be invoked, +pdoc will let you know and print a notice when building your documentation. In this case it should be enough to install +a recent version of [Node.js](https://nodejs.org/) on your system and make a `nodejs` or `node` available on your PATH. +There are no other additional dependencies. pdoc only uses `node` to interpret a local JS file, it does not download any +additional packages. + +You can test if your search index is precompiled by clicking the search box (so that the search index is fetched) and +then checking your browser's developer console. + +##### Search Index Size + +The search index can be relatively large as it includes all docstrings. For larger projects, you should make sure that +you have [HTTP compression](https://en.wikipedia.org/wiki/HTTP_compression) and caching enabled. `search.json` usually +compresses to about 10% of its original size. For example, pdoc's own precompiled search index compresses from 312kB +to 27kB. + +##### Disabling Search + +If you wish to hide the search box, you can add +```html+jinja +{% block search %}{% endblock %} +{% block search_js %}{% endblock %} +``` +in your [`module.html.jinja2` template](../pdoc.html#editing-pdocs-html-template). +""" +from __future__ import annotations + +import json +import shutil +import subprocess +from collections.abc import Callable +from pathlib import Path + +import pdoc.doc +from pdoc.render_helpers import render_docstring + + +def make_index( + all_modules: dict[str, pdoc.doc.Module], + is_public: Callable[[pdoc.doc.Doc], bool], + default_docformat: str, +) -> list[dict]: + """ + This method compiles all currently documented modules into a pile of documentation JSON objects, + which can then be ingested by Elasticlunr.js. + """ + + documents = [] + for modname, mod in all_modules.items(): + + def make_item(doc: pdoc.doc.Doc, **kwargs) -> dict[str, str]: + # TODO: We could be extra fancy here and split `doc.docstring` by toc sections. + return { + "fullname": doc.fullname, + "modulename": doc.modulename, + "qualname": doc.qualname, + "type": doc.type, + "doc": render_docstring(doc.docstring, mod, default_docformat), + **kwargs, + } + + def make_index(mod: pdoc.doc.Namespace): + if not is_public(mod): + return + yield make_item(mod) + for m in mod.own_members: + if isinstance(m, pdoc.doc.Variable) and is_public(m): + yield make_item(m) + elif isinstance(m, pdoc.doc.Function) and is_public(m): + yield make_item( + m, + parameters=list(m.signature.parameters), + funcdef=m.funcdef, + ) + elif isinstance(m, pdoc.doc.Class): + yield from make_index(m) + else: + pass + + documents.extend(make_index(mod)) + + return documents + + +def precompile_index(documents: list[dict], compile_js: Path) -> str: + """ + This method tries to precompile the Elasticlunr.js search index by invoking `nodejs` or `node`. + If that fails, an unprocessed index will be returned (which will be compiled locally on the client side). + If this happens and the index is rather large (>3MB), a warning with precompile instructions is printed. + + We currently require nodejs, but we'd welcome PRs that support other JaveScript runtimes or + – even better – a Python-based search index generation similar to + [elasticlunr-rs](https://github.com/mattico/elasticlunr-rs) that could be shipped as part of pdoc. + """ + raw = json.dumps(documents) + try: + if shutil.which("nodejs"): + executable = "nodejs" + else: + executable = "node" + out = subprocess.check_output( + [executable, compile_js], + input=raw.encode(), + cwd=Path(__file__).parent / "templates", + ) + index = json.loads(out) + index["_isPrebuiltIndex"] = True + except Exception as e: + if len(raw) > 3 * 1024 * 1024: + print( + f"Note: pdoc failed to precompile the search index ({e}). " + f"To improve search speed, see https://pdoc.dev/docs/pdoc/search.html" + ) + return raw + else: + return json.dumps(index) diff --git a/pdoc/templates/build-search-index.js b/pdoc/templates/build-search-index.js new file mode 100644 index 00000000..bd6e8b43 --- /dev/null +++ b/pdoc/templates/build-search-index.js @@ -0,0 +1,24 @@ +/** + * This script is invoked by pdoc to precompile the search index. + * Precompiling the search index increases file size, but skips the CPU-heavy index building in the browser. + */ +let elasticlunr = require('./elasticlunr.min'); + +let fs = require('fs'); +let docs = JSON.parse(fs.readFileSync(0, 'utf-8')); + +/* mirrored in module.html.jinja2 (part 1) */ +elasticlunr.tokenizer.setSeperator(/[\s\-.;&]+|<[^>]*>/); + +/* mirrored in module.html.jinja2 (part 2) */ +searchIndex = elasticlunr(function () { + this.addField('qualname'); + this.addField('fullname'); + this.addField('doc'); + this.setRef('fullname'); +}); +for (let doc of docs) { + searchIndex.addDoc(doc); +} + +process.stdout.write(JSON.stringify(searchIndex.toJSON())); diff --git a/pdoc/templates/default/error.html.jinja2 b/pdoc/templates/default/error.html.jinja2 index 87f40429..bc969ecd 100644 --- a/pdoc/templates/default/error.html.jinja2 +++ b/pdoc/templates/default/error.html.jinja2 @@ -2,7 +2,7 @@ {% block title %}{{ error }}{% endblock %} {% block style %} {{ super() | safe }} - {% endblock %} -{% block body %} -