Skip to content

Commit

Permalink
Parse attributes for Sphinx-style docstrings (#7)
Browse files Browse the repository at this point in the history
  • Loading branch information
jsh9 authored Jun 24, 2024
1 parent 8f95747 commit af6c3bb
Show file tree
Hide file tree
Showing 8 changed files with 287 additions and 16 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-20.04, macOS-10.15, windows-2019]
python-version: [3.6, 3.7, 3.8, 3.9, "3.10", 3.11, 3.12.0-rc.2]
python-version: [3.8, 3.9, "3.10", 3.11, 3.12.0-rc.2]
steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
Expand Down
11 changes: 9 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
# (Fork) 0.0.8 (2024-06-23)

- Added support for parsing attributes from Sphinx-style docstrings
- Dropped support for Python 3.6 because it doesn't support data classes

# (Fork) 0.0.7 (2024-06-22)

- Made "Attributes" a separate section from "Parameters" (for Google, Numpy, and Sphinx
styles)
styles)

# (Fork) 0.0.6 (2024-06-22)

Expand All @@ -24,6 +29,8 @@
# (Fork) 0.0.3 (2023-08-28)

- Google, Numpy, Sphinx: Make "Yields" an official parsed section (`DocstringYields`)
- This corresponds to a PR in the upstream repo that was open
since June 2023 (https://github.com/rr-/docstring_parser/pull/79)


# (Fork) 0.0.2 (2023-08-26)
Expand All @@ -33,7 +40,7 @@

# (Fork) 0.0.1 (2023-08-18)

- Google: Fixed a bug where union style return types (such as `int | str`) are not parsed correctly
- Google: Fixed a bug where union style return types (such as `int | str`) are not parsed correctly (https://github.com/rr-/docstring_parser/issues/81)

# 0.15 (2022-09-05)

Expand Down
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
docstring_parser_fork
================

This is a fork of [docstring_parser](https://github.com/rr-/docstring_parser). The reason I'm forking that is to quickly get some bug fixes out for users of [pydoclint](https://github.com/jsh9/pydoclint).
This is a fork of [docstring_parser](https://github.com/rr-/docstring_parser).

This fork fixes bugs that the upstream library has not fixed, and it also
offers additional functionalities. To inspect the difference between this
fort and the upstream, go to [CHANGELOG.md](./CHANGELOG.md) and read the
entries that start with "(Fork)".

------

Expand Down
40 changes: 33 additions & 7 deletions docstring_parser/rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

from .common import (
DEPRECATION_KEYWORDS,
ATTR_KEYWORDS,
PARAM_KEYWORDS,
RAISES_KEYWORDS,
RETURNS_KEYWORDS,
Expand All @@ -24,11 +23,13 @@
RenderingStyle,
)

from docstring_parser.rest_attr_parser import Attribute, parse_attributes


def _build_meta(args: T.List[str], desc: str) -> DocstringMeta:
key = args[0]

if key in PARAM_KEYWORDS | ATTR_KEYWORDS:
if key in PARAM_KEYWORDS:
if len(args) == 3:
key, type_name, arg_name = args
if type_name.endswith("?"):
Expand All @@ -48,11 +49,7 @@ def _build_meta(args: T.List[str], desc: str) -> DocstringMeta:
match = re.match(r".*defaults to (.+)", desc, flags=re.DOTALL)
default = match.group(1).rstrip(".") if match else None

DocstringSectionType = (
DocstringParam if key in PARAM_KEYWORDS else DocstringAttr
)

return DocstringSectionType(
return DocstringParam(
args=args,
description=desc,
arg_name=arg_name,
Expand Down Expand Up @@ -133,6 +130,21 @@ def parse(text: str) -> Docstring:
return ret

text = inspect.cleandoc(text)

parsed_attrs: T.List[Attribute]
line_nums_with_attrs: T.List[int]
parsed_attrs, line_nums_with_attrs = parse_attributes(text)

# Exclude lines with attributes, because they can interfere with
# other contents
text_lines: T.List[str] = text.split('\n')
lines_without_attr = []
for i, line in enumerate(text_lines):
if i not in line_nums_with_attrs:
lines_without_attr.append(line)

text = '\n'.join(lines_without_attr)

match = re.search("^:", text, flags=re.M)
if match:
desc_chunk = text[: match.start()]
Expand Down Expand Up @@ -201,6 +213,20 @@ def parse(text: str) -> Docstring:
)
)



ret.meta.extend([
DocstringAttr(
args=['attr', _.name],
description=_.description,
arg_name=_.name,
type_name=_.type,
is_optional=None,
default=None,
)
for _ in parsed_attrs
])

return ret


Expand Down
116 changes: 116 additions & 0 deletions docstring_parser/rest_attr_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
"""Parser for attributes in ReST-style docstrings"""
from typing import List, Optional, Tuple
from dataclasses import dataclass

@dataclass
class Attribute:
name: str
type: Optional[str] = None
description: Optional[str] = None


def parse_attributes(docstring: str) -> Tuple[List[Attribute], List[int]]:
attributes = []
lines = docstring.split('\n')

current_attr_lines = []
current_attr_line_nums = []
inside_attribute_block = False

all_line_nums_with_attr: List[int] = []

for i, line in enumerate(lines):
stripped_line = line.strip()

if stripped_line.startswith(".. attribute ::"):
if current_attr_lines:
attrs, line_nums_with_actual_attr = parse_attribute_block(
current_attr_lines, current_attr_line_nums
)
attributes.append(attrs)
all_line_nums_with_attr.extend(line_nums_with_actual_attr)
current_attr_lines = []
current_attr_line_nums = []

inside_attribute_block = True
current_attr_lines.append(line)
current_attr_line_nums.append(i)
elif inside_attribute_block:
if not stripped_line and current_attr_lines:
# Check if the next line is also blank indicating end of block
if current_attr_lines[-1].strip() == '':
inside_attribute_block = False
attrs, line_nums_with_actual_attr = parse_attribute_block(
current_attr_lines, current_attr_line_nums
)
attributes.append(attrs)
all_line_nums_with_attr.extend(line_nums_with_actual_attr)
current_attr_lines = []
current_attr_line_nums = []

current_attr_lines.append(line)
current_attr_line_nums.append(i)
elif stripped_line.startswith(":") and current_attr_lines:
# End the current attribute block if a new param or similar
# is detected
inside_attribute_block = False
attrs, line_nums_with_actual_attr = parse_attribute_block(
current_attr_lines, current_attr_line_nums
)
all_line_nums_with_attr.extend(line_nums_with_actual_attr)
attributes.append(attrs)
current_attr_lines = []
current_attr_line_nums = []

if current_attr_lines:
attrs, line_nums_with_actual_attr = parse_attribute_block(
current_attr_lines, current_attr_line_nums
)
attributes.append(attrs)
all_line_nums_with_attr.extend(line_nums_with_actual_attr)

return attributes, all_line_nums_with_attr


def parse_attribute_block(
lines: List[str],
global_line_nums: List[int],
) -> Tuple[Attribute, List[int]]:
name = None
type_ = None
description = []
description_started = False

line_nums_with_actual_attr: List[int] = []
lines_with_actual_attr: List[str] = []

# Get the base indentation level from the first line
base_indent_level = len(lines[0]) - len(lines[0].lstrip())

for j, line in zip(global_line_nums, lines):
stripped_line = line.strip()
current_indent_level = len(line) - len(line.lstrip())

if stripped_line.startswith(".. attribute ::"):
name = stripped_line[len(".. attribute ::"):].strip()
lines_with_actual_attr.append(line)
line_nums_with_actual_attr.append(j)
elif stripped_line.startswith(":type:"):
type_ = stripped_line[len(":type:"):].strip()
lines_with_actual_attr.append(line)
line_nums_with_actual_attr.append(j)
elif current_indent_level > base_indent_level:
# Include in the description if it has greater indentation or
# description has already started
if stripped_line or description_started:
description_started = True
description.append(stripped_line)
lines_with_actual_attr.append(line)
line_nums_with_actual_attr.append(j)

# Clean up the description, removing leading/trailing empty lines
description_text = '\n'.join(description).strip() if description else None

attr = Attribute(name=name, type=type_, description=description_text)

return attr, line_nums_with_actual_attr
8 changes: 6 additions & 2 deletions docstring_parser/tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,12 @@ def test_rest() -> None:
:param spam: spam desc
:param int bla: bla desc
:param str yay:
:attr hello: hello world
:type hello: bool
.. attribute :: hello
:type: bool
hello world
:raises ValueError: exc desc
:returns tuple: ret desc
"""
Expand Down
114 changes: 114 additions & 0 deletions docstring_parser/tests/test_rest_attr_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
from typing import List

import pytest

from docstring_parser.rest_attr_parser import parse_attributes, Attribute


@pytest.mark.parametrize(
'docstring, expected_attributes, expected_lines_with_attributes',
[
(
'',
[],
[],
),
(
"""
My Class
:param name: My name
:type name: str
""",
[],
[],
),
(
"""
My Class
.. attribute :: attr_1
:type: str
.. attribute :: attr_2
:type: bool
Attr 2
.. attribute :: attr_3
Attr 3
.. attribute :: attr_4
.. attribute :: attr_5
.. attribute :: attr_6
:type: dict | list
!
:param name: My name
:type name: str
""",
[
Attribute(name='attr_1', type='str', description=None),
Attribute(name='attr_2', type='bool', description='Attr 2'),
Attribute(name='attr_3', type=None, description='Attr 3'),
Attribute(name='attr_4', type=None, description=None),
Attribute(name='attr_5', type=None, description=None),
Attribute(name='attr_6', type='dict | list', description='!'),
],
[3, 4, 6, 7, 9, 11, 13, 15, 16, 17, 18, 20],
),
(
"""
My Class
.. attribute :: attr_1
:type: str
.. attribute :: attr_2
:type: bool
Attr 2
:param bar: A param called "bar"
:type name: float
.. attribute :: attr_3
Attr 3
:param goo: A param called "goo"
:type name: bool
.. attribute :: attr_4
.. attribute :: attr_5
:param foo: A param called "foo"
:type name: float
.. attribute :: attr_6
:type: dict | list
!
:param name: My name
:type name: str
""",
[
Attribute(name='attr_1', type='str', description=None),
Attribute(name='attr_2', type='bool', description='Attr 2'),
Attribute(name='attr_3', type=None, description='Attr 3'),
Attribute(name='attr_4', type=None, description=None),
Attribute(name='attr_5', type=None, description=None),
Attribute(name='attr_6', type='dict | list', description='!'),
],
[3, 4, 6, 7, 9, 14, 16, 20, 21, 24, 25, 27],
),
],
)
def test_parser_attributes(
docstring: str,
expected_attributes: List[Attribute],
expected_lines_with_attributes: List[str],
) -> None:
attributes, lines_with_attributes = parse_attributes(docstring)
assert attributes == expected_attributes
assert lines_with_attributes == expected_lines_with_attributes
Loading

0 comments on commit af6c3bb

Please sign in to comment.