Support FunctionDef transformations (#610)

I've tested all of the edge cases I know of: type comments in various locations, non-type-comments, arity mismatches where we should skip, etc. Assuming that all type comments parse, this should work as far as I know. I'll make a separate PR to deal with SyntaxErrors when parsing types, because that is cross-cutting and not specific to FunctionDef.
Instagram · Jan 18, 2022 · 9563b4a · 9563b4a
1 parent 0c509b3
commit 9563b4a
Show file tree

Hide file tree

Showing 2 changed files with 411 additions and 10 deletions.
diff --git a/libcst/codemod/commands/convert_type_comments.py b/libcst/codemod/commands/convert_type_comments.py
@@ -5,9 +5,10 @@
 
 import ast
 import builtins
+import dataclasses
 import functools
 import sys
-from typing import List, Optional, Sequence, Set, Tuple, Union
+from typing import Any, Dict, List, Optional, Sequence, Set, Tuple, Union
 
 from typing_extensions import TypeAlias
 
@@ -58,6 +59,25 @@ def _convert_annotation(raw: str) -> cst.Annotation:
         return cst.Annotation(annotation=cst.SimpleString(f'"{raw}"'))
 
 
+def _is_type_comment(comment: Optional[cst.Comment]) -> bool:
+    """
+    Determine whether a comment is a type comment.
+
+    Unfortunately, to strip type comments in a location-invariant way requires
+    finding them from pure libcst data. We only use this in function defs, where
+    the precise cst location of the type comment cna be hard to predict.
+    """
+    if comment is None:
+        return False
+    value = comment.value[1:].strip()
+    if not value.startswith("type:"):
+        return False
+    suffix = value.removeprefix("type:").strip().split()
+    if len(suffix) > 0 and suffix[0] == "ignore":
+        return False
+    return True
+
+
 class _FailedToApplyAnnotation:
     pass
 
@@ -228,17 +248,144 @@ def convert_Assign(
         ]
 
 
+@dataclasses.dataclass(frozen=True)
+class FunctionTypeInfo:
+    arguments: Dict[str, Optional[str]]
+    returns: Optional[str]
+
+    def is_empty(self) -> bool:
+        return self.returns is None and self.arguments == {}
+
+    @classmethod
+    def from_cst(
+        cls,
+        node_cst: cst.FunctionDef,
+    ) -> "FunctionTypeInfo":
+        """
+        Using the `ast` type comment extraction logic, get type information
+        for a function definition.
+
+        To understand edge case behavior see the `leave_FunctionDef` docstring.
+        """
+        # pyre-ignore[33]: ast doesn't have complete stubs
+        node_ast: Any = ast.parse(_code_for_node(node_cst), type_comments=True).body[0]
+        # Note: this is guaranteed to have the correct arity.
+        args = [
+            *node_ast.args.posonlyargs,
+            *node_ast.args.args,
+            *(
+                []
+                if node_ast.args.vararg is None
+                else [
+                    node_ast.args.vararg,
+                ]
+            ),
+            *node_ast.args.kwonlyargs,
+            *(
+                []
+                if node_ast.args.kwarg is None
+                else [
+                    node_ast.args.kwarg,
+                ]
+            ),
+        ]
+        function_type_comment = node_ast.type_comment
+        if function_type_comment is None:
+            return cls(
+                arguments={arg.arg: arg.type_comment for arg in args},
+                returns=None,
+            )
+        else:
+            # pyre-ignore[33]: ast doesn't have complete stubs
+            function_type_ast: Any = ast.parse(
+                node_ast.type_comment,
+                "<type_comment>",
+                mode="func_type",
+            )
+            argtypes = function_type_ast.argtypes
+            returns = ast.unparse(function_type_ast.returns)
+            if (
+                len(argtypes) == 1
+                and isinstance(argtypes[0], ast.Constant)
+                and argtypes[0].value is Ellipsis
+            ):
+                # Only use the return type if the comment was like `(...) -> R`
+                return cls(
+                    arguments={arg.arg: arg.type_comment for arg in args},
+                    returns=returns,
+                )
+            elif len(argtypes) == len(args):
+                # Merge the type comments, preferring inline comments where available
+                return cls(
+                    arguments={
+                        arg.arg: arg.type_comment or ast.unparse(from_func_type)
+                        for arg, from_func_type in zip(args, argtypes)
+                    },
+                    returns=returns,
+                )
+            else:
+                # On arity mismatches, ignore the type information
+                return cls({}, None)
+
+
 class ConvertTypeComments(VisitorBasedCodemodCommand):
     """
-    Codemod that converts type comments, as described in
-    https://www.python.org/dev/peps/pep-0484/#type-comments,
-    into PEP 526 annotated assignments.
-
-    This is a work in progress: we intend to also support
-    function type comments, with statements, and for statements
-    but those are not yet implemented.
+    Codemod that converts type comments into Python 3.6+ style
+    annotations.
+
+    We can handle type comments in the following statement types:
+    - Assign
+      - This is converted into a single AnnAssign when possible
+      - In more complicated cases it will produce multiple AnnAssign
+        nodes with no value (i.e. "type declaration" statements)
+        followed by an Assign
+    - For and With
+      - We prepend both of these with type declaration statements.
+    - FunctionDef
+      - We apply all the types we can find. If we find several:
+        - We prefer any existing annotations to type comments
+        - For parameters, we prefer inline type comments to
+          function-level type comments if we find both.
+
+    We always apply the type comments as quoted annotations, unless
+    we know that it refers to a builtin. We do not guarantee that
+    the resulting string annotations would parse, but they should
+    never cause failures at module import time.
+
+    We attempt to:
+    - Always strip type comments for statements where we successfully
+      applied types.
+    - Never strip type comments for statements where we failed to
+      apply types.
+
+    There are many edge case possible where the arity of a type
+    hint (which is either a tuple or a func_type) might not match
+    the code. In these cases we generally give up:
+    - For Assign, For, and With, we require that every target of
+      bindings (e.g. a tuple of names being bound) must have exactly
+      the same arity as the comment.
+      - So, for example, we would skip an assignment statement such as
+        ``x = y, z = 1, 2  # type: int, int`` because the arity
+        of ``x`` does not match the arity of the hint.
+    - For FunctionDef, we do *not* check arity of inline parameter
+      type comments but we do skip the transform if the arity of
+      the function does not match the function-level comment.
     """
 
+    # Finding the location of a type comment in a FunctionDef is difficult.
+    #
+    # As a result, if when visiting a FunctionDef header we are able to
+    # successfully extrct type information then we aggressively strip type
+    # comments until we reach the first statement in the body.
+    #
+    # Once we get there we have to stop, so that we don't unintentionally remove
+    # unprocessed type comments.
+    #
+    # This state handles tracking everything we need for this.
+    function_type_info_stack: List[FunctionTypeInfo]
+    function_body_stack: List[cst.BaseSuite]
+    aggressively_strip_type_comments: bool
+
     def __init__(self, context: CodemodContext) -> None:
         if (sys.version_info.major, sys.version_info.minor) < (3, 9):
             # The ast module did not get `unparse` until Python 3.9,
@@ -256,6 +403,9 @@ def __init__(self, context: CodemodContext) -> None:
                 + "it is only libcst that needs a new Python version."
             )
         super().__init__(context)
+        self.function_type_info_stack = []
+        self.function_body_stack = []
+        self.aggressively_strip_type_comments = False
 
     def _strip_TrailingWhitespace(
         self,
@@ -430,3 +580,143 @@ def leave_With(
                 ),
             ]
         )
+
+    # Handle function definitions -------------------------
+
+    # **Implementation Notes**
+    #
+    # It is much harder to predict where exactly type comments will live
+    # in function definitions than in Assign / For / With.
+    #
+    # As a result, we use two different patterns:
+    # (A) we aggressively strip out type comments from whitespace between the
+    #     start of a function define and the start of the body, whenever we were
+    #     able to extract type information. This is done via mutable state and the
+    #     usual visitor pattern.
+    # (B) we also manually reach down to the first statement inside of the
+    #     funciton body and aggressively strip type comments from leading
+    #     whitespaces
+
+    def visit_FunctionDef(
+        self,
+        node: cst.FunctionDef,
+    ) -> None:
+        """
+        Set up the data we need to handle function definitions:
+        - Parse the type comments.
+        - Store the resulting function type info on the stack, where it will
+          remain until we use it in `leave_FunctionDef`
+        - Set that we are aggressively stripping type comments, which will
+          remain true until we visit the body.
+        """
+        function_type_info = FunctionTypeInfo.from_cst(node)
+        self.aggressively_strip_type_comments = not function_type_info.is_empty()
+        self.function_type_info_stack.append(function_type_info)
+        self.function_body_stack.append(node.body)
+
+    def leave_TrailingWhitespace(
+        self,
+        original_node: cst.TrailingWhitespace,
+        updated_node: cst.TrailingWhitespace,
+    ) -> Union[cst.TrailingWhitespace]:
+        "Aggressively remove type comments when in header if we extracted types."
+        if self.aggressively_strip_type_comments and _is_type_comment(
+            updated_node.comment
+        ):
+            return cst.TrailingWhitespace()
+        else:
+            return updated_node
+
+    def leave_EmptyLine(
+        self,
+        original_node: cst.EmptyLine,
+        updated_node: cst.EmptyLine,
+    ) -> Union[cst.EmptyLine, cst.RemovalSentinel]:
+        "Aggressively remove type comments when in header if we extracted types."
+        if self.aggressively_strip_type_comments and _is_type_comment(
+            updated_node.comment
+        ):
+            return cst.RemovalSentinel.REMOVE
+        else:
+            return updated_node
+
+    def visit_FunctionDef_body(
+        self,
+        node: cst.FunctionDef,
+    ) -> None:
+        "Turn off aggressive type comment removal when we've leaved the header."
+        self.aggressively_strip_type_comments = False
+
+    def leave_IndentedBlock(
+        self,
+        original_node: cst.IndentedBlock,
+        updated_node: cst.IndentedBlock,
+    ) -> cst.IndentedBlock:
+        "When appropriate, strip function type comment from the function body."
+        # abort unless this is the body of a function we are transforming
+        if len(self.function_body_stack) == 0:
+            return updated_node
+        if original_node is not self.function_body_stack[-1]:
+            return updated_node
+        if self.function_type_info_stack[-1].is_empty():
+            return updated_node
+        # The comment will be in the body header if it was on the same line
+        # as the colon.
+        if _is_type_comment(updated_node.header.comment):
+            updated_node = updated_node.with_changes(
+                header=cst.TrailingWhitespace(),
+            )
+        # The comment will be in a leading line of the first body statement
+        # if it was on the first line after the colon.
+        first_statement = updated_node.body[0]
+        if not hasattr(first_statement, "leading_lines"):
+            return updated_node
+        return updated_node.with_changes(
+            body=[
+                first_statement.with_changes(
+                    leading_lines=[
+                        line
+                        # pyre-ignore[16]: we refined via `hasattr`
+                        for line in first_statement.leading_lines
+                        if not _is_type_comment(line.comment)
+                    ]
+                ),
+                *updated_node.body[1:],
+            ]
+        )
+
+    # Methods for adding type annotations ----
+    #
+    # By the time we get here, all type comments should already be stripped.
+
+    def leave_Param(
+        self,
+        original_node: cst.Param,
+        updated_node: cst.Param,
+    ) -> cst.Param:
+        # ignore type comments if there's already an annotation
+        if updated_node.annotation is not None:
+            return updated_node
+        # find out if there's a type comment and apply it if so
+        function_type_info = self.function_type_info_stack[-1]
+        raw_annotation = function_type_info.arguments.get(updated_node.name.value)
+        if raw_annotation is not None:
+            return updated_node.with_changes(
+                annotation=_convert_annotation(raw=raw_annotation)
+            )
+        else:
+            return updated_node
+
+    def leave_FunctionDef(
+        self,
+        original_node: cst.FunctionDef,
+        updated_node: cst.FunctionDef,
+    ) -> cst.FunctionDef:
+        self.function_body_stack.pop()
+        function_type_info = self.function_type_info_stack.pop()
+        if updated_node.returns is None and function_type_info.returns is not None:
+            return updated_node.with_changes(
+                returns=_convert_annotation(raw=function_type_info.returns)
+            )
+        else:
+            return updated_node