Skip to content

Add Search Bar #255

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
May 12, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,13 @@

# Unreleased: pdoc next

- **Add search functionality.**
pdoc now has a search bar which allows users to quickly
find relevant parts in the documentation.
See https://pdoc.dev/docs/pdoc/search.html for details.
- Redesign module list (index.html.jinja2).
- Update Bootstrap to v5.0.0.
- Do not fail if `inspect.getdoc()` raises.

# 2021-04-30: pdoc 6.6.0

Expand Down
14 changes: 10 additions & 4 deletions pdoc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,12 +381,13 @@ def write(mod: doc.Module):
def write(mod: doc.Module):
retval.write(r(mod))

all_modules = extract.parse_specs(modules)
module_names = extract.parse_specs(modules)
all_modules: dict[str, doc.Module] = {}

if format == "html":

def r(mod: doc.Module) -> str:
return render.html_module(module=mod, all_modules=all_modules)
return render.html_module(module=mod, all_modules=module_names)

elif format == "markdown": # pragma: no cover
raise NotImplementedError(
Expand All @@ -397,15 +398,16 @@ def r(mod: doc.Module) -> str:
else:
raise ValueError(f"Invalid rendering format {format!r}.")

for mod in all_modules:
for mod in module_names:
try:
m = extract.load_module(mod)
except RuntimeError:
warnings.warn(
f"Error importing {mod}:\n{traceback.format_exc()}", RuntimeWarning
)
else:
write(doc.Module(m))
all_modules[mod] = doc.Module(m)
write(all_modules[mod])

if not output_directory:
return retval.getvalue()
Expand All @@ -417,4 +419,8 @@ def r(mod: doc.Module) -> str:
if index:
(output_directory / "index.html").write_bytes(index.encode())

search = render.search_index(all_modules)
if search:
(output_directory / "search.json").write_bytes(search.encode())

return retval.getvalue()
20 changes: 16 additions & 4 deletions pdoc/doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,7 @@ def docstring(self) -> str:

If no docstring can be found, an empty string is returned.
"""
doc = inspect.getdoc(self.obj) or ""
return doc.strip()
return _safe_getdoc(self.obj)

@cached_property
def source(self) -> str:
Expand Down Expand Up @@ -744,8 +743,7 @@ def docstring(self) -> str:
cls = sys.modules.get(_safe_getattr(self.obj, "__module__", None), None)
for name in _safe_getattr(self.obj, "__qualname__", "").split(".")[:-1]:
cls = _safe_getattr(cls, name, None)
doc = inspect.getdoc(_safe_getattr(cls, self.name, None)) or ""
doc = doc.strip()
doc = _safe_getdoc(_safe_getattr(cls, self.name, None))

if doc == object.__init__.__doc__:
# inspect.getdoc(Foo.__init__) returns the docstring, for object.__init__ if left undefined...
Expand Down Expand Up @@ -1040,3 +1038,17 @@ def _safe_getattr(obj, attr, default):
RuntimeWarning,
)
return default


def _safe_getdoc(obj: Any) -> str:
"""Like `inspect.getdoc()`, but never raises. Always returns a stripped string."""
try:
doc = inspect.getdoc(obj) or ""
except Exception as e:
warnings.warn(
f"inspect.getdoc({obj!r}) raised an exception: {e}",
RuntimeWarning,
)
return ""
else:
return doc.strip()
45 changes: 33 additions & 12 deletions pdoc/render.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
from __future__ import annotations

import os
import types
from pathlib import Path
from typing import Collection, Mapping, Optional

import jinja2
from jinja2 import Environment, FileSystemLoader

import pdoc.doc
import pdoc.docstrings
from pdoc._compat import Literal
from pdoc.render_helpers import (
DefaultMacroExtension,
Expand All @@ -16,8 +19,9 @@
link,
linkify,
minify_css,
render_docstring,
render_docstring_with_context,
)
from pdoc.search import make_index, precompile_index


def configure(
Expand Down Expand Up @@ -67,9 +71,7 @@ def html_module(
This is only passed by `pdoc.web`.
"""
with defuse_unsafe_reprs():
return env.select_template(
["module.html.jinja2", "default/module.html.jinja2"]
).render(
return env.get_template("module.html.jinja2").render(
module=module,
all_modules=all_modules,
mtime=mtime,
Expand All @@ -81,24 +83,42 @@ def html_module(

def html_index(all_modules: Collection[str]) -> str:
"""Renders the module index."""
return env.select_template(
["index.html.jinja2", "default/index.html.jinja2"]
).render(
return env.get_template("index.html.jinja2").render(
all_modules=[m for m in all_modules if "._" not in m],
)


def html_error(error: str, details: str = "") -> str:
"""Renders an error message."""
return env.select_template(
["index.html.jinja2", "default/error.html.jinja2"]
).render(
return env.get_template("error.html.jinja2").render(
error=error,
details=details,
)


@defuse_unsafe_reprs()
def search_index(all_modules: dict[str, pdoc.doc.Module]) -> str:
"""Renders the Elasticlunr.js search index."""
# This is a rather terrible hack to determine if a given object is public and should be included in the index.
module_template: jinja2.Template = env.get_template("module.html.jinja2")
ctx: jinja2.runtime.Context = module_template.new_context(
{"module": pdoc.doc.Module(types.ModuleType("")), "all_modules": {}}
)
for _ in module_template.root_render_func(ctx): # type: ignore
pass

def is_public(x: pdoc.doc.Doc) -> bool:
return bool(ctx["is_public"](x).strip())

index = make_index(
all_modules,
is_public,
env.globals["docformat"],
)

compile_js = Path(env.get_template("build-search-index.js").filename) # type: ignore
return precompile_index(index, compile_js)


def repr_module(module: pdoc.doc.Module) -> str:
"""Renders `repr(pdoc.doc.Module)`, primarily used for tests and debugging."""
with defuse_unsafe_reprs():
Expand All @@ -108,6 +128,7 @@ def repr_module(module: pdoc.doc.Module) -> str:
_default_searchpath = [
Path(os.environ.get("XDG_CONFIG_HOME", "~/.config")).expanduser() / "pdoc",
Path(__file__).parent / "templates",
Path(__file__).parent / "templates" / "default",
]

env = Environment(
Expand All @@ -122,7 +143,7 @@ def repr_module(module: pdoc.doc.Module) -> str:
You can modify this object to add custom filters and globals.
Examples can be found in this module's source code.
"""
env.filters["render_docstring"] = render_docstring
env.filters["render_docstring"] = render_docstring_with_context
env.filters["highlight"] = highlight
env.filters["linkify"] = linkify
env.filters["link"] = link
Expand Down
11 changes: 9 additions & 2 deletions pdoc/render_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,19 @@ def _markdown(docstring: str) -> str:


@contextfilter
def render_docstring(context: Context, docstring: str) -> str:
def render_docstring_with_context(context: Context, docstring: str) -> str:
"""
Converts `docstring` from a custom docformat to Markdown (if necessary), and then from Markdown to HTML.
"""
module: pdoc.doc.Module = context["module"]
docformat = getattr(module.obj, "__docformat__", context["docformat"]) or ""
docformat: str = context["docformat"]
return render_docstring(docstring, module, docformat)


def render_docstring(
docstring: str, module: pdoc.doc.Module, default_docformat: str
) -> str:
docformat = getattr(module.obj, "__docformat__", default_docformat) or ""
docstring = docstrings.convert(docstring, docformat, module.source_file)
return _markdown(docstring)

Expand Down
129 changes: 129 additions & 0 deletions pdoc/search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
"""
pdoc has a search box which allows users to quickly find relevant parts in the documentation.
This feature is implemented entirely client-side so that pdoc can still be hosted statically,
and works without any third-party services in a privacy-preserving way. When a user focuses the
search box for the first time, pdoc will fetch the search index (`search.json`) and use that to
answer all upcoming queries.

##### Search Performance

pdoc uses [Elasticlunr.js](https://github.com/weixsong/elasticlunr.js) to implement search. To improve end user
performance, pdoc will attempt to precompile the search index when building the documentation. This only works if
`nodejs` is available, and pdoc gracefully falls back to client-side index building if this is not the case.

If your search index reaches a size where compilation times are meaningful and `nodejs` cannot be invoked,
pdoc will let you know and print a notice when building your documentation. In this case it should be enough to install
a recent version of [Node.js](https://nodejs.org/) on your system and make a `nodejs` or `node` available on your PATH.
There are no other additional dependencies. pdoc only uses `node` to interpret a local JS file, it does not download any
additional packages.

You can test if your search index is precompiled by clicking the search box (so that the search index is fetched) and
then checking your browser's developer console.

##### Search Index Size

The search index can be relatively large as it includes all docstrings. For larger projects, you should make sure that
you have [HTTP compression](https://en.wikipedia.org/wiki/HTTP_compression) and caching enabled. `search.json` usually
compresses to about 10% of its original size. For example, pdoc's own precompiled search index compresses from 312kB
to 27kB.

##### Disabling Search

If you wish to hide the search box, you can add
```html+jinja
{% block search %}{% endblock %}
{% block search_js %}{% endblock %}
```
in your [`module.html.jinja2` template](../pdoc.html#editing-pdocs-html-template).
"""
from __future__ import annotations

import json
import shutil
import subprocess
from collections.abc import Callable
from pathlib import Path

import pdoc.doc
from pdoc.render_helpers import render_docstring


def make_index(
all_modules: dict[str, pdoc.doc.Module],
is_public: Callable[[pdoc.doc.Doc], bool],
default_docformat: str,
) -> list[dict]:
"""
This method compiles all currently documented modules into a pile of documentation JSON objects,
which can then be ingested by Elasticlunr.js.
"""

documents = []
for modname, mod in all_modules.items():

def make_item(doc: pdoc.doc.Doc, **kwargs) -> dict[str, str]:
# TODO: We could be extra fancy here and split `doc.docstring` by toc sections.
return {
"fullname": doc.fullname,
"modulename": doc.modulename,
"qualname": doc.qualname,
"type": doc.type,
"doc": render_docstring(doc.docstring, mod, default_docformat),
**kwargs,
}

def make_index(mod: pdoc.doc.Namespace):
if not is_public(mod):
return
yield make_item(mod)
for m in mod.own_members:
if isinstance(m, pdoc.doc.Variable) and is_public(m):
yield make_item(m)
elif isinstance(m, pdoc.doc.Function) and is_public(m):
yield make_item(
m,
parameters=list(m.signature.parameters),
funcdef=m.funcdef,
)
elif isinstance(m, pdoc.doc.Class):
yield from make_index(m)
else:
pass

documents.extend(make_index(mod))

return documents


def precompile_index(documents: list[dict], compile_js: Path) -> str:
"""
This method tries to precompile the Elasticlunr.js search index by invoking `nodejs` or `node`.
If that fails, an unprocessed index will be returned (which will be compiled locally on the client side).
If this happens and the index is rather large (>3MB), a warning with precompile instructions is printed.

We currently require nodejs, but we'd welcome PRs that support other JaveScript runtimes or
– even better – a Python-based search index generation similar to
[elasticlunr-rs](https://github.com/mattico/elasticlunr-rs) that could be shipped as part of pdoc.
"""
raw = json.dumps(documents)
try:
if shutil.which("nodejs"):
executable = "nodejs"
else:
executable = "node"
out = subprocess.check_output(
[executable, compile_js],
input=raw.encode(),
cwd=Path(__file__).parent / "templates",
)
index = json.loads(out)
index["_isPrebuiltIndex"] = True
except Exception as e:
if len(raw) > 3 * 1024 * 1024:
print(
f"Note: pdoc failed to precompile the search index ({e}). "
f"To improve search speed, see https://pdoc.dev/docs/pdoc/search.html"
)
return raw
else:
return json.dumps(index)
24 changes: 24 additions & 0 deletions pdoc/templates/build-search-index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/**
* This script is invoked by pdoc to precompile the search index.
* Precompiling the search index increases file size, but skips the CPU-heavy index building in the browser.
*/
let elasticlunr = require('./elasticlunr.min');

let fs = require('fs');
let docs = JSON.parse(fs.readFileSync(0, 'utf-8'));

/* mirrored in module.html.jinja2 (part 1) */
elasticlunr.tokenizer.setSeperator(/[\s\-.;&]+|<[^>]*>/);

/* mirrored in module.html.jinja2 (part 2) */
searchIndex = elasticlunr(function () {
this.addField('qualname');
this.addField('fullname');
this.addField('doc');
this.setRef('fullname');
});
for (let doc of docs) {
searchIndex.addDoc(doc);
}

process.stdout.write(JSON.stringify(searchIndex.toJSON()));
2 changes: 1 addition & 1 deletion pdoc/templates/default/error.html.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
{% block title %}{{ error }}{% endblock %}
{% block style %}
{{ super() | safe }}
<style type="text/css">
<style>
body {
padding: 2rem;
}
Expand Down
Loading