Skip to content

Commit e3f944f

Browse files
authored
Fix RsT edge cases to better support polars (#44)
* Fix a number of edge cases for polars docstrings * Bump version
1 parent f12c337 commit e3f944f

File tree

3 files changed

+161
-7
lines changed

3 files changed

+161
-7
lines changed

docstring_to_markdown/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
if TYPE_CHECKING:
77
from importlib_metadata import EntryPoint
88

9-
__version__ = "0.16"
9+
__version__ = "0.17"
1010

1111

1212
class UnknownFormatError(Exception):

docstring_to_markdown/rst.py

+34-5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from abc import ABC, abstractmethod
22
from enum import IntEnum, auto
3+
from textwrap import dedent
34
from types import SimpleNamespace
45
from typing import Callable, Match, Union, List, Dict
56
import re
@@ -299,8 +300,8 @@ def inline_markdown(self):
299300
SECTION_DIRECTIVES: Dict[str, List[Directive]] = {
300301
'Parameters': [
301302
Directive(
302-
pattern=r'^(?P<other_args>\*\*kwargs|\*args)$',
303-
replacement=r'- `\g<other_args>`'
303+
pattern=r'^(?P<other_args>(\w[\w\d_\.]*)|\*\*kwargs|\*args)$',
304+
replacement=r'- `\g<other_args>`:'
304305
),
305306
Directive(
306307
pattern=r'^(?P<arg1>[^:\s]+\d), (?P<arg2>[^:\s]+\d), \.\.\. : (?P<type>.+)$',
@@ -336,6 +337,7 @@ def _find_directive_pattern(name: str):
336337

337338

338339
def looks_like_rst(value: str) -> bool:
340+
value = dedent(value)
339341
# check if any of the characteristic sections (and the properly formatted underline) is there
340342
for section in _RST_SECTIONS:
341343
if (section + '\n' + '-' * len(section) + '\n') in value:
@@ -542,10 +544,20 @@ class BlockParser(IParser):
542544
follower: Union['IParser', None] = None
543545
_buffer: List[str]
544546
_block_started: bool
547+
_indent: Union[int, None]
548+
should_measure_indent = True
545549

546550
def __init__(self):
547551
self._buffer = []
548552
self._block_started = False
553+
self._indent = None
554+
555+
def measure_indent(self, line: str):
556+
line_indent = len(line) - len(line.lstrip())
557+
if self._indent is None:
558+
self._indent = line_indent
559+
else:
560+
self._indent = min(line_indent, self._indent)
549561

550562
@abstractmethod
551563
def can_parse(self, line: str) -> bool:
@@ -558,24 +570,33 @@ def _start_block(self, language: str):
558570
def consume(self, line: str):
559571
if not self._block_started:
560572
raise ValueError('Block has not started') # pragma: no cover
573+
if self.should_measure_indent:
574+
self.measure_indent(line)
561575
self._buffer.append(line)
562576

563577
def finish_consumption(self, final: bool) -> str:
564578
# if the last line is empty (e.g. a separator of intended block), discard it
565579
if self._buffer[len(self._buffer) - 1].strip() == '':
566580
self._buffer.pop()
567581
self._buffer.append(self.enclosure + '\n')
568-
result = '\n'.join(self._buffer)
582+
indent = " " * (self._indent or 0)
583+
intermediate = '\n'.join(self._buffer)
584+
result = '\n'.join([
585+
(indent + line) if line else line
586+
for line in intermediate.splitlines()
587+
]) if indent else intermediate
569588
if not final:
570589
result += '\n'
571590
self._buffer = []
572591
self._block_started = False
592+
self._indent = None
573593
return result
574594

575595

576596
class IndentedBlockParser(BlockParser, ABC):
577597
_is_block_beginning: bool
578598
_block_indent_size: Union[int, None]
599+
should_measure_indent = False
579600

580601
def __init__(self):
581602
super(IndentedBlockParser, self).__init__()
@@ -599,6 +620,7 @@ def consume(self, line: str):
599620
return
600621
if self._block_indent_size is None:
601622
self._block_indent_size = len(line) - len(line.lstrip())
623+
self.measure_indent(line)
602624
super().consume(line[self._block_indent_size:])
603625

604626
def finish_consumption(self, final: bool) -> str:
@@ -684,6 +706,7 @@ def can_parse(self, line: str):
684706
return line.strip() in self.directives
685707

686708
def initiate_parsing(self, line: str, current_language: str):
709+
self.measure_indent(line)
687710
admonition = self.directives[line.strip()]
688711
self._start_block(f'\n{admonition.block_markdown}\n')
689712
return IBlockBeginning(remainder='')
@@ -694,6 +717,7 @@ def can_parse(self, line: str) -> bool:
694717
return re.match(CODE_BLOCK_PATTERN, line) is not None
695718

696719
def initiate_parsing(self, line: str, current_language: str) -> IBlockBeginning:
720+
self.measure_indent(line)
697721
match = re.match(CODE_BLOCK_PATTERN, line)
698722
# already checked in can_parse
699723
assert match
@@ -753,6 +777,8 @@ def rst_to_markdown(text: str, extract_signature: bool = True) -> str:
753777
most_recent_section: Union[str, None] = None
754778
is_first_line = True
755779

780+
text = dedent(text)
781+
756782
def flush_buffer():
757783
nonlocal lines_buffer
758784
lines = '\n'.join(lines_buffer)
@@ -766,7 +792,8 @@ def flush_buffer():
766792
lines_buffer = []
767793
return lines
768794

769-
for line in text.split('\n'):
795+
lines = text.split('\n')
796+
for i, line in enumerate(lines):
770797
if is_first_line:
771798
if extract_signature:
772799
signature_match = re.match(r'^(?P<name>\S+)\((?P<params>.*)\)$', line)
@@ -809,7 +836,9 @@ def flush_buffer():
809836
else:
810837
if most_recent_section in SECTION_DIRECTIVES:
811838
for section_directive in SECTION_DIRECTIVES[most_recent_section]:
812-
if re.match(section_directive.pattern, trimmed_line):
839+
next_line = lines[i + 1] if i + 1 < len(lines) else ""
840+
is_next_line_section = set(next_line.strip()) == {"-"}
841+
if re.match(section_directive.pattern, line) and not is_next_line_section:
813842
line = re.sub(section_directive.pattern, section_directive.replacement, trimmed_line)
814843
break
815844
if trimmed_line.rstrip() in RST_SECTIONS:

tests/test_rst.py

+126-1
Original file line numberDiff line numberDiff line change
@@ -337,7 +337,7 @@ def func(): pass
337337
338338
- `x`: array_like
339339
Input array.
340-
- `**kwargs`
340+
- `**kwargs`:
341341
For other keyword-only arguments, see the ufunc docs.
342342
"""
343343

@@ -638,6 +638,119 @@ def func(): pass
638638
"""
639639

640640

641+
# this format is often used by polars
642+
PARAMETERS_WITHOUT_TYPE = """
643+
Parameters
644+
----------
645+
source
646+
Path(s) to a file or directory
647+
When needing to authenticate for scanning cloud locations, see the
648+
`storage_options` parameter.
649+
columns
650+
Columns to select. Accepts a list of column indices (starting at zero) or a list
651+
of column names.
652+
n_rows
653+
Stop reading from parquet file after reading `n_rows`.
654+
Only valid when `use_pyarrow=False`.
655+
656+
Returns
657+
-------
658+
DataFrame
659+
"""
660+
661+
PARAMETERS_WITHOUT_TYPE_MARKDOWN = """
662+
#### Parameters
663+
664+
- `source`:
665+
Path(s) to a file or directory
666+
When needing to authenticate for scanning cloud locations, see the
667+
`storage_options` parameter.
668+
- `columns`:
669+
Columns to select. Accepts a list of column indices (starting at zero) or a list
670+
of column names.
671+
- `n_rows`:
672+
Stop reading from parquet file after reading `n_rows`.
673+
Only valid when `use_pyarrow=False`.
674+
675+
#### Returns
676+
677+
DataFrame
678+
"""
679+
680+
INDENTED_DOCSTRING = """
681+
Parameters
682+
----------
683+
glob
684+
Expand path given via globbing rules.
685+
"""
686+
687+
INDENTED_DOCSTRING_MARKDOWN = """
688+
#### Parameters
689+
690+
- `glob`:
691+
Expand path given via globbing rules.
692+
"""
693+
694+
695+
WARNINGS_IN_PARAMETERS = """
696+
Parameters
697+
----------
698+
glob
699+
Expand path given via globbing rules.
700+
schema
701+
Specify the datatypes of the columns. The datatypes must match the
702+
datatypes in the file(s). If there are extra columns that are not in the
703+
file(s), consider also enabling `allow_missing_columns`.
704+
705+
.. warning::
706+
This functionality is considered **unstable**. It may be changed
707+
at any point without it being considered a breaking change.
708+
hive_schema
709+
The column names and data types of the columns by which the data is partitioned.
710+
If set to `None` (default), the schema of the Hive partitions is inferred.
711+
712+
.. warning::
713+
This functionality is considered **unstable**. It may be changed
714+
at any point without it being considered a breaking change.
715+
try_parse_hive_dates
716+
Whether to try parsing hive values as date/datetime types.
717+
"""
718+
719+
720+
WARNINGS_IN_PARAMETERS_MARKDOWN = """
721+
#### Parameters
722+
723+
- `glob`:
724+
Expand path given via globbing rules.
725+
- `schema`:
726+
Specify the datatypes of the columns. The datatypes must match the
727+
datatypes in the file(s). If there are extra columns that are not in the
728+
file(s), consider also enabling `allow_missing_columns`.
729+
730+
731+
---
732+
⚠️ **Warning**
733+
734+
This functionality is considered **unstable**. It may be changed
735+
at any point without it being considered a breaking change.
736+
737+
---
738+
- `hive_schema`:
739+
The column names and data types of the columns by which the data is partitioned.
740+
If set to `None` (default), the schema of the Hive partitions is inferred.
741+
742+
743+
---
744+
⚠️ **Warning**
745+
746+
This functionality is considered **unstable**. It may be changed
747+
at any point without it being considered a breaking change.
748+
749+
---
750+
- `try_parse_hive_dates`:
751+
Whether to try parsing hive values as date/datetime types.
752+
"""
753+
641754
NESTED_PARAMETERS = """
642755
Parameters
643756
----------
@@ -887,6 +1000,18 @@ def foo():
8871000
'rst': NESTED_PARAMETERS,
8881001
'md': NESTED_PARAMETERS_MARKDOWN
8891002
},
1003+
'converts parameter without type': {
1004+
'rst': PARAMETERS_WITHOUT_TYPE,
1005+
'md': PARAMETERS_WITHOUT_TYPE_MARKDOWN
1006+
},
1007+
'converts indented parameters lists': {
1008+
'rst': INDENTED_DOCSTRING,
1009+
'md': INDENTED_DOCSTRING_MARKDOWN
1010+
},
1011+
'converts warnings in parameters lists': {
1012+
'rst': WARNINGS_IN_PARAMETERS,
1013+
'md': WARNINGS_IN_PARAMETERS_MARKDOWN
1014+
},
8901015
'converts sphinx signatures': {
8911016
'rst': SPHINX_SIGNATURE,
8921017
'md': SPHINX_SIGNATURE_MARKDOWN

0 commit comments

Comments
 (0)