Skip to content

Commit

Permalink
Support FunctionDef transformations (#610)
Browse files Browse the repository at this point in the history
I've tested all of the edge cases I know of: type comments in various
locations, non-type-comments, arity mismatches where we should skip,
etc.

Assuming that all type comments parse, this should work as far as I
know. I'll make a separate PR to deal with SyntaxErrors when parsing
types, because that is cross-cutting and not specific to FunctionDef.
  • Loading branch information
stroxler authored Jan 18, 2022
1 parent 0c509b3 commit 9563b4a
Show file tree
Hide file tree
Showing 2 changed files with 411 additions and 10 deletions.
306 changes: 298 additions & 8 deletions libcst/codemod/commands/convert_type_comments.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@

import ast
import builtins
import dataclasses
import functools
import sys
from typing import List, Optional, Sequence, Set, Tuple, Union
from typing import Any, Dict, List, Optional, Sequence, Set, Tuple, Union

from typing_extensions import TypeAlias

Expand Down Expand Up @@ -58,6 +59,25 @@ def _convert_annotation(raw: str) -> cst.Annotation:
return cst.Annotation(annotation=cst.SimpleString(f'"{raw}"'))


def _is_type_comment(comment: Optional[cst.Comment]) -> bool:
"""
Determine whether a comment is a type comment.
Unfortunately, to strip type comments in a location-invariant way requires
finding them from pure libcst data. We only use this in function defs, where
the precise cst location of the type comment cna be hard to predict.
"""
if comment is None:
return False
value = comment.value[1:].strip()
if not value.startswith("type:"):
return False
suffix = value.removeprefix("type:").strip().split()
if len(suffix) > 0 and suffix[0] == "ignore":
return False
return True


class _FailedToApplyAnnotation:
pass

Expand Down Expand Up @@ -228,17 +248,144 @@ def convert_Assign(
]


@dataclasses.dataclass(frozen=True)
class FunctionTypeInfo:
arguments: Dict[str, Optional[str]]
returns: Optional[str]

def is_empty(self) -> bool:
return self.returns is None and self.arguments == {}

@classmethod
def from_cst(
cls,
node_cst: cst.FunctionDef,
) -> "FunctionTypeInfo":
"""
Using the `ast` type comment extraction logic, get type information
for a function definition.
To understand edge case behavior see the `leave_FunctionDef` docstring.
"""
# pyre-ignore[33]: ast doesn't have complete stubs
node_ast: Any = ast.parse(_code_for_node(node_cst), type_comments=True).body[0]
# Note: this is guaranteed to have the correct arity.
args = [
*node_ast.args.posonlyargs,
*node_ast.args.args,
*(
[]
if node_ast.args.vararg is None
else [
node_ast.args.vararg,
]
),
*node_ast.args.kwonlyargs,
*(
[]
if node_ast.args.kwarg is None
else [
node_ast.args.kwarg,
]
),
]
function_type_comment = node_ast.type_comment
if function_type_comment is None:
return cls(
arguments={arg.arg: arg.type_comment for arg in args},
returns=None,
)
else:
# pyre-ignore[33]: ast doesn't have complete stubs
function_type_ast: Any = ast.parse(
node_ast.type_comment,
"<type_comment>",
mode="func_type",
)
argtypes = function_type_ast.argtypes
returns = ast.unparse(function_type_ast.returns)
if (
len(argtypes) == 1
and isinstance(argtypes[0], ast.Constant)
and argtypes[0].value is Ellipsis
):
# Only use the return type if the comment was like `(...) -> R`
return cls(
arguments={arg.arg: arg.type_comment for arg in args},
returns=returns,
)
elif len(argtypes) == len(args):
# Merge the type comments, preferring inline comments where available
return cls(
arguments={
arg.arg: arg.type_comment or ast.unparse(from_func_type)
for arg, from_func_type in zip(args, argtypes)
},
returns=returns,
)
else:
# On arity mismatches, ignore the type information
return cls({}, None)


class ConvertTypeComments(VisitorBasedCodemodCommand):
"""
Codemod that converts type comments, as described in
https://www.python.org/dev/peps/pep-0484/#type-comments,
into PEP 526 annotated assignments.
This is a work in progress: we intend to also support
function type comments, with statements, and for statements
but those are not yet implemented.
Codemod that converts type comments into Python 3.6+ style
annotations.
We can handle type comments in the following statement types:
- Assign
- This is converted into a single AnnAssign when possible
- In more complicated cases it will produce multiple AnnAssign
nodes with no value (i.e. "type declaration" statements)
followed by an Assign
- For and With
- We prepend both of these with type declaration statements.
- FunctionDef
- We apply all the types we can find. If we find several:
- We prefer any existing annotations to type comments
- For parameters, we prefer inline type comments to
function-level type comments if we find both.
We always apply the type comments as quoted annotations, unless
we know that it refers to a builtin. We do not guarantee that
the resulting string annotations would parse, but they should
never cause failures at module import time.
We attempt to:
- Always strip type comments for statements where we successfully
applied types.
- Never strip type comments for statements where we failed to
apply types.
There are many edge case possible where the arity of a type
hint (which is either a tuple or a func_type) might not match
the code. In these cases we generally give up:
- For Assign, For, and With, we require that every target of
bindings (e.g. a tuple of names being bound) must have exactly
the same arity as the comment.
- So, for example, we would skip an assignment statement such as
``x = y, z = 1, 2 # type: int, int`` because the arity
of ``x`` does not match the arity of the hint.
- For FunctionDef, we do *not* check arity of inline parameter
type comments but we do skip the transform if the arity of
the function does not match the function-level comment.
"""

# Finding the location of a type comment in a FunctionDef is difficult.
#
# As a result, if when visiting a FunctionDef header we are able to
# successfully extrct type information then we aggressively strip type
# comments until we reach the first statement in the body.
#
# Once we get there we have to stop, so that we don't unintentionally remove
# unprocessed type comments.
#
# This state handles tracking everything we need for this.
function_type_info_stack: List[FunctionTypeInfo]
function_body_stack: List[cst.BaseSuite]
aggressively_strip_type_comments: bool

def __init__(self, context: CodemodContext) -> None:
if (sys.version_info.major, sys.version_info.minor) < (3, 9):
# The ast module did not get `unparse` until Python 3.9,
Expand All @@ -256,6 +403,9 @@ def __init__(self, context: CodemodContext) -> None:
+ "it is only libcst that needs a new Python version."
)
super().__init__(context)
self.function_type_info_stack = []
self.function_body_stack = []
self.aggressively_strip_type_comments = False

def _strip_TrailingWhitespace(
self,
Expand Down Expand Up @@ -430,3 +580,143 @@ def leave_With(
),
]
)

# Handle function definitions -------------------------

# **Implementation Notes**
#
# It is much harder to predict where exactly type comments will live
# in function definitions than in Assign / For / With.
#
# As a result, we use two different patterns:
# (A) we aggressively strip out type comments from whitespace between the
# start of a function define and the start of the body, whenever we were
# able to extract type information. This is done via mutable state and the
# usual visitor pattern.
# (B) we also manually reach down to the first statement inside of the
# funciton body and aggressively strip type comments from leading
# whitespaces

def visit_FunctionDef(
self,
node: cst.FunctionDef,
) -> None:
"""
Set up the data we need to handle function definitions:
- Parse the type comments.
- Store the resulting function type info on the stack, where it will
remain until we use it in `leave_FunctionDef`
- Set that we are aggressively stripping type comments, which will
remain true until we visit the body.
"""
function_type_info = FunctionTypeInfo.from_cst(node)
self.aggressively_strip_type_comments = not function_type_info.is_empty()
self.function_type_info_stack.append(function_type_info)
self.function_body_stack.append(node.body)

def leave_TrailingWhitespace(
self,
original_node: cst.TrailingWhitespace,
updated_node: cst.TrailingWhitespace,
) -> Union[cst.TrailingWhitespace]:
"Aggressively remove type comments when in header if we extracted types."
if self.aggressively_strip_type_comments and _is_type_comment(
updated_node.comment
):
return cst.TrailingWhitespace()
else:
return updated_node

def leave_EmptyLine(
self,
original_node: cst.EmptyLine,
updated_node: cst.EmptyLine,
) -> Union[cst.EmptyLine, cst.RemovalSentinel]:
"Aggressively remove type comments when in header if we extracted types."
if self.aggressively_strip_type_comments and _is_type_comment(
updated_node.comment
):
return cst.RemovalSentinel.REMOVE
else:
return updated_node

def visit_FunctionDef_body(
self,
node: cst.FunctionDef,
) -> None:
"Turn off aggressive type comment removal when we've leaved the header."
self.aggressively_strip_type_comments = False

def leave_IndentedBlock(
self,
original_node: cst.IndentedBlock,
updated_node: cst.IndentedBlock,
) -> cst.IndentedBlock:
"When appropriate, strip function type comment from the function body."
# abort unless this is the body of a function we are transforming
if len(self.function_body_stack) == 0:
return updated_node
if original_node is not self.function_body_stack[-1]:
return updated_node
if self.function_type_info_stack[-1].is_empty():
return updated_node
# The comment will be in the body header if it was on the same line
# as the colon.
if _is_type_comment(updated_node.header.comment):
updated_node = updated_node.with_changes(
header=cst.TrailingWhitespace(),
)
# The comment will be in a leading line of the first body statement
# if it was on the first line after the colon.
first_statement = updated_node.body[0]
if not hasattr(first_statement, "leading_lines"):
return updated_node
return updated_node.with_changes(
body=[
first_statement.with_changes(
leading_lines=[
line
# pyre-ignore[16]: we refined via `hasattr`
for line in first_statement.leading_lines
if not _is_type_comment(line.comment)
]
),
*updated_node.body[1:],
]
)

# Methods for adding type annotations ----
#
# By the time we get here, all type comments should already be stripped.

def leave_Param(
self,
original_node: cst.Param,
updated_node: cst.Param,
) -> cst.Param:
# ignore type comments if there's already an annotation
if updated_node.annotation is not None:
return updated_node
# find out if there's a type comment and apply it if so
function_type_info = self.function_type_info_stack[-1]
raw_annotation = function_type_info.arguments.get(updated_node.name.value)
if raw_annotation is not None:
return updated_node.with_changes(
annotation=_convert_annotation(raw=raw_annotation)
)
else:
return updated_node

def leave_FunctionDef(
self,
original_node: cst.FunctionDef,
updated_node: cst.FunctionDef,
) -> cst.FunctionDef:
self.function_body_stack.pop()
function_type_info = self.function_type_info_stack.pop()
if updated_node.returns is None and function_type_info.returns is not None:
return updated_node.with_changes(
returns=_convert_annotation(raw=function_type_info.returns)
)
else:
return updated_node
Loading

0 comments on commit 9563b4a

Please sign in to comment.