Parse attributes for Sphinx-style docstrings (#7)

jsh9 · Jun 24, 2024 · af6c3bb · af6c3bb
1 parent 8f95747
commit af6c3bb
Show file tree

Hide file tree

Showing 8 changed files with 287 additions and 16 deletions.
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -12,7 +12,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-20.04, macOS-10.15, windows-2019]
-        python-version: [3.6, 3.7, 3.8, 3.9, "3.10", 3.11, 3.12.0-rc.2]
+        python-version: [3.8, 3.9, "3.10", 3.11, 3.12.0-rc.2]
     steps:
     - uses: actions/checkout@v2
     - name: Set up Python ${{ matrix.python-version }}

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,7 +1,12 @@
+# (Fork) 0.0.8 (2024-06-23)
+
+- Added support for parsing attributes from Sphinx-style docstrings
+- Dropped support for Python 3.6 because it doesn't support data classes
+
 # (Fork) 0.0.7 (2024-06-22)
 
 - Made "Attributes" a separate section from "Parameters" (for Google, Numpy, and Sphinx
-  styles) 
+  styles)
 
 # (Fork) 0.0.6 (2024-06-22)
 
@@ -24,6 +29,8 @@
 # (Fork) 0.0.3 (2023-08-28)
 
 - Google, Numpy, Sphinx: Make "Yields" an official parsed section (`DocstringYields`)
+  - This corresponds to a PR in the upstream repo that was open
+    since June 2023 (https://github.com/rr-/docstring_parser/pull/79)
 
 
 # (Fork) 0.0.2 (2023-08-26)
@@ -33,7 +40,7 @@
 
 # (Fork) 0.0.1 (2023-08-18)
 
-- Google: Fixed a bug where union style return types (such as `int | str`) are not parsed correctly
+- Google: Fixed a bug where union style return types (such as `int | str`) are not parsed correctly (https://github.com/rr-/docstring_parser/issues/81)
 
 # 0.15 (2022-09-05)
 

diff --git a/README.md b/README.md
@@ -1,7 +1,12 @@
 docstring_parser_fork
 ================
 
-This is a fork of [docstring_parser](https://github.com/rr-/docstring_parser). The reason I'm forking that is to quickly get some bug fixes out for users of [pydoclint](https://github.com/jsh9/pydoclint).
+This is a fork of [docstring_parser](https://github.com/rr-/docstring_parser).
+
+This fork fixes bugs that the upstream library has not fixed, and it also
+offers additional functionalities. To inspect the difference between this
+fort and the upstream, go to [CHANGELOG.md](./CHANGELOG.md) and read the
+entries that start with "(Fork)".
 
 ------
 

diff --git a/docstring_parser/rest.py b/docstring_parser/rest.py
@@ -6,7 +6,6 @@
 
 from .common import (
     DEPRECATION_KEYWORDS,
-    ATTR_KEYWORDS,
     PARAM_KEYWORDS,
     RAISES_KEYWORDS,
     RETURNS_KEYWORDS,
@@ -24,11 +23,13 @@
     RenderingStyle,
 )
 
+from docstring_parser.rest_attr_parser import Attribute, parse_attributes
+
 
 def _build_meta(args: T.List[str], desc: str) -> DocstringMeta:
     key = args[0]
 
-    if key in PARAM_KEYWORDS | ATTR_KEYWORDS:
+    if key in PARAM_KEYWORDS:
         if len(args) == 3:
             key, type_name, arg_name = args
             if type_name.endswith("?"):
@@ -48,11 +49,7 @@ def _build_meta(args: T.List[str], desc: str) -> DocstringMeta:
         match = re.match(r".*defaults to (.+)", desc, flags=re.DOTALL)
         default = match.group(1).rstrip(".") if match else None
 
-        DocstringSectionType = (
-            DocstringParam if key in PARAM_KEYWORDS else DocstringAttr
-        )
-
-        return DocstringSectionType(
+        return DocstringParam(
             args=args,
             description=desc,
             arg_name=arg_name,
@@ -133,6 +130,21 @@ def parse(text: str) -> Docstring:
         return ret
 
     text = inspect.cleandoc(text)
+
+    parsed_attrs: T.List[Attribute]
+    line_nums_with_attrs: T.List[int]
+    parsed_attrs, line_nums_with_attrs = parse_attributes(text)
+
+    # Exclude lines with attributes, because they can interfere with
+    # other contents
+    text_lines: T.List[str] = text.split('\n')
+    lines_without_attr = []
+    for i, line in enumerate(text_lines):
+        if i not in line_nums_with_attrs:
+            lines_without_attr.append(line)
+
+    text = '\n'.join(lines_without_attr)
+
     match = re.search("^:", text, flags=re.M)
     if match:
         desc_chunk = text[: match.start()]
@@ -201,6 +213,20 @@ def parse(text: str) -> Docstring:
                 )
             )
 
+
+
+    ret.meta.extend([
+        DocstringAttr(
+            args=['attr', _.name],
+            description=_.description,
+            arg_name=_.name,
+            type_name=_.type,
+            is_optional=None,
+            default=None,
+        )
+        for _ in parsed_attrs
+    ])
+
     return ret
 
 

diff --git a/docstring_parser/rest_attr_parser.py b/docstring_parser/rest_attr_parser.py
@@ -0,0 +1,116 @@
+"""Parser for attributes in ReST-style docstrings"""
+from typing import List, Optional, Tuple
+from dataclasses import dataclass
+
+@dataclass
+class Attribute:
+    name: str
+    type: Optional[str] = None
+    description: Optional[str] = None
+
+
+def parse_attributes(docstring: str) -> Tuple[List[Attribute], List[int]]:
+    attributes = []
+    lines = docstring.split('\n')
+
+    current_attr_lines = []
+    current_attr_line_nums = []
+    inside_attribute_block = False
+
+    all_line_nums_with_attr: List[int] = []
+
+    for i, line in enumerate(lines):
+        stripped_line = line.strip()
+
+        if stripped_line.startswith(".. attribute ::"):
+            if current_attr_lines:
+                attrs, line_nums_with_actual_attr = parse_attribute_block(
+                    current_attr_lines, current_attr_line_nums
+                )
+                attributes.append(attrs)
+                all_line_nums_with_attr.extend(line_nums_with_actual_attr)
+                current_attr_lines = []
+                current_attr_line_nums = []
+
+            inside_attribute_block = True
+            current_attr_lines.append(line)
+            current_attr_line_nums.append(i)
+        elif inside_attribute_block:
+            if not stripped_line and current_attr_lines:
+                # Check if the next line is also blank indicating end of block
+                if current_attr_lines[-1].strip() == '':
+                    inside_attribute_block = False
+                    attrs, line_nums_with_actual_attr = parse_attribute_block(
+                        current_attr_lines, current_attr_line_nums
+                    )
+                    attributes.append(attrs)
+                    all_line_nums_with_attr.extend(line_nums_with_actual_attr)
+                    current_attr_lines = []
+                    current_attr_line_nums = []
+
+            current_attr_lines.append(line)
+            current_attr_line_nums.append(i)
+        elif stripped_line.startswith(":") and current_attr_lines:
+            # End the current attribute block if a new param or similar
+            # is detected
+            inside_attribute_block = False
+            attrs, line_nums_with_actual_attr = parse_attribute_block(
+                current_attr_lines, current_attr_line_nums
+            )
+            all_line_nums_with_attr.extend(line_nums_with_actual_attr)
+            attributes.append(attrs)
+            current_attr_lines = []
+            current_attr_line_nums = []
+
+    if current_attr_lines:
+        attrs, line_nums_with_actual_attr = parse_attribute_block(
+            current_attr_lines, current_attr_line_nums
+        )
+        attributes.append(attrs)
+        all_line_nums_with_attr.extend(line_nums_with_actual_attr)
+
+    return attributes, all_line_nums_with_attr
+
+
+def parse_attribute_block(
+        lines: List[str],
+        global_line_nums: List[int],
+) -> Tuple[Attribute, List[int]]:
+    name = None
+    type_ = None
+    description = []
+    description_started = False
+
+    line_nums_with_actual_attr: List[int] = []
+    lines_with_actual_attr: List[str] = []
+
+    # Get the base indentation level from the first line
+    base_indent_level = len(lines[0]) - len(lines[0].lstrip())
+
+    for j, line in zip(global_line_nums, lines):
+        stripped_line = line.strip()
+        current_indent_level = len(line) - len(line.lstrip())
+
+        if stripped_line.startswith(".. attribute ::"):
+            name = stripped_line[len(".. attribute ::"):].strip()
+            lines_with_actual_attr.append(line)
+            line_nums_with_actual_attr.append(j)
+        elif stripped_line.startswith(":type:"):
+            type_ = stripped_line[len(":type:"):].strip()
+            lines_with_actual_attr.append(line)
+            line_nums_with_actual_attr.append(j)
+        elif current_indent_level > base_indent_level:
+            # Include in the description if it has greater indentation or
+            # description has already started
+            if stripped_line or description_started:
+                description_started = True
+                description.append(stripped_line)
+                lines_with_actual_attr.append(line)
+                line_nums_with_actual_attr.append(j)
+
+    # Clean up the description, removing leading/trailing empty lines
+    description_text = '\n'.join(description).strip() if description else None
+
+    attr = Attribute(name=name, type=type_, description=description_text)
+
+    return attr, line_nums_with_actual_attr
diff --git a/docstring_parser/tests/test_parser.py b/docstring_parser/tests/test_parser.py
@@ -20,8 +20,12 @@ def test_rest() -> None:
         :param spam: spam desc
         :param int bla: bla desc
         :param str yay:
-        :attr hello: hello world
-        :type hello: bool
+
+        .. attribute :: hello
+            :type: bool
+            
+            hello world
+
         :raises ValueError: exc desc
         :returns tuple: ret desc
         """

diff --git a/docstring_parser/tests/test_rest_attr_parser.py b/docstring_parser/tests/test_rest_attr_parser.py
@@ -0,0 +1,114 @@
+from typing import List
+
+import pytest
+
+from docstring_parser.rest_attr_parser import parse_attributes, Attribute
+
+
+@pytest.mark.parametrize(
+    'docstring, expected_attributes, expected_lines_with_attributes',
+    [
+        (
+            '',
+            [],
+            [],
+        ),
+        (
+            """
+            My Class
+
+            :param name: My name
+            :type name: str
+            """,
+            [],
+            [],
+        ),
+        (
+            """
+            My Class
+
+            .. attribute :: attr_1
+                :type: str
+
+            .. attribute :: attr_2
+                :type: bool
+
+                Attr 2
+
+            .. attribute :: attr_3
+
+                Attr 3
+
+            .. attribute :: attr_4
+            .. attribute :: attr_5
+            .. attribute :: attr_6
+                :type: dict | list
+
+                !
+
+            :param name: My name
+            :type name: str
+            """,
+            [
+                Attribute(name='attr_1', type='str', description=None),
+                Attribute(name='attr_2', type='bool', description='Attr 2'),
+                Attribute(name='attr_3', type=None, description='Attr 3'),
+                Attribute(name='attr_4', type=None, description=None),
+                Attribute(name='attr_5', type=None, description=None),
+                Attribute(name='attr_6', type='dict | list', description='!'),
+            ],
+            [3, 4, 6, 7, 9, 11, 13, 15, 16, 17, 18, 20],
+        ),
+        (
+            """
+            My Class
+
+            .. attribute :: attr_1
+                :type: str
+
+            .. attribute :: attr_2
+                :type: bool
+
+                Attr 2
+
+            :param bar: A param called "bar"
+            :type name: float
+
+            .. attribute :: attr_3
+
+                Attr 3
+            :param goo: A param called "goo"
+            :type name: bool
+
+            .. attribute :: attr_4
+            .. attribute :: attr_5
+            :param foo: A param called "foo"
+            :type name: float
+            .. attribute :: attr_6
+                :type: dict | list
+
+                !
+
+            :param name: My name
+            :type name: str
+            """,
+            [
+                Attribute(name='attr_1', type='str', description=None),
+                Attribute(name='attr_2', type='bool', description='Attr 2'),
+                Attribute(name='attr_3', type=None, description='Attr 3'),
+                Attribute(name='attr_4', type=None, description=None),
+                Attribute(name='attr_5', type=None, description=None),
+                Attribute(name='attr_6', type='dict | list', description='!'),
+            ],
+            [3, 4, 6, 7, 9, 14, 16, 20, 21, 24, 25, 27],
+        ),
+    ],
+)
+def test_parser_attributes(
+        docstring: str,
+        expected_attributes: List[Attribute],
+        expected_lines_with_attributes: List[str],
+) -> None:
+    attributes, lines_with_attributes = parse_attributes(docstring)
+    assert attributes == expected_attributes
+    assert lines_with_attributes == expected_lines_with_attributes