Skip to content

Commit

Permalink
[fortran] Bug fixes for ingestion TIE-GCM source (#567)
Browse files Browse the repository at this point in the history
## TIE-GCM bug fixes
There were a number of small bugs in the Fortran->Gromet pipeline that
were discovered when determining which TIE-GCM files are currently able
to be ingested.

### Node Helper
- Fixes missing comma in CONTROL_CHARACTERS list causing '>' to be
ignored.
- Adds additional set of control characters to CONTROL_CHARACTERS list
that weren't previously seen in any examples
- Adds remove_comments function to remove comment nodes from tree-sitter
parse tree. This prevents comments from being processed by ts2cast and
creating unexpected None values in the CAST.

### Preprocessor
- Fixes bug in include directory path when running script from directory
other than the system root.
- Runs gcc processor from system root if there are no `#include`
directives.

### TS2CAST
- Fixes bug in function call handler to support ingesting functions with
no arguments.
- Fixes bug when in function call handler when the function call is part
of a unary expression `+foo()`
- Fixes bug in conditional handler to support ingesting single line
conditionals
- Adds support for `not` operator

Resolves #563
  • Loading branch information
vincentraymond-ua authored Oct 14, 2023
1 parent 386494b commit 7ae4f25
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 25 deletions.
22 changes: 21 additions & 1 deletion skema/program_analysis/CAST/fortran/node_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,19 @@
"*",
"**",
"/",
"/="
"/=",
">",
"<",
"<=",
">=",
"only",
"\.not\.",
"\.gt\.",
"\.ge\.",
"\.lt\.",
"\.le\.",
"\.eq\.",
"\.ne\.",
]

class NodeHelper():
Expand Down Expand Up @@ -62,6 +69,19 @@ def get_identifier(self, node: Node) -> str:

return identifier

def remove_comments(node: Node):
"""Remove comment nodes from tree-sitter parse tree"""
# NOTE: tree-sitter Node objects are read-only, so we have to be careful about how we remove comments
# The below has been carefully designed to work around this restriction.
to_remove = sorted([index for index,child in enumerate(node.children) if child.type == "comment"], reverse=True)
for index in to_remove:
del node.children[index]

for i in range(len(node.children)):
node.children[i] = remove_comments(node.children[i])

return node

def get_first_child_by_type(node: Node, type: str, recurse=False):
"""Takes in a node and a type string as inputs and returns the first child matching that type. Otherwise, return None
When the recurse argument is set, it will also recursivly search children nodes as well.
Expand Down
15 changes: 9 additions & 6 deletions skema/program_analysis/CAST/fortran/preprocessor/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def preprocess(
"""
# NOTE: The order of preprocessing steps does matter. We have to run the GCC preprocessor before correcting the continuation lines or there could be issues

# TODO: Create single location for generating include base path
source = source_path.read_text()

# Get paths for intermediate products
Expand Down Expand Up @@ -66,18 +67,21 @@ def preprocess(

# Step 2: Correct include directives to remove system references
source = fix_include_directives(source)

# Step 3: Process with gcc c-preprocessor
source = run_c_preprocessor(source, source_path.parent)
include_base_directory = Path(source_path.parent, f"include_{source_path.stem}")
if not include_base_directory.exists():
include_base_directory = include_base_directory.parent
source = run_c_preprocessor(source, include_base_directory)
if out_gcc:
gcc_path.write_text(source)

# Step 4: Prepare for tree-sitter
# This step removes any additional preprocessor directives added or not removed by GCC
source = "\n".join(
["!" + line if line.startswith("#") else line for line in source.splitlines()]
)

# Step 5: Check for unsupported idioms
if out_unsupported:
unsupported_path.write_text(
Expand Down Expand Up @@ -181,7 +185,6 @@ def run_c_preprocessor(source: str, include_base_path: Path) -> str:

def convert_to_free_form(source: str) -> str:
"""If fixed-form Fortran source, convert to free-form"""

def validate_parse_tree(source: str) -> bool:
"""Parse source with tree-sitter and check if an error is returned."""
language = Language(INSTALLED_LANGUAGES_FILEPATH, "fortran")
Expand All @@ -201,7 +204,7 @@ def validate_parse_tree(source: str) -> bool:
)
if validate_parse_tree(free_source):
return free_source

return source


Expand Down
62 changes: 44 additions & 18 deletions skema/program_analysis/CAST/fortran/ts2cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from skema.program_analysis.CAST.fortran.variable_context import VariableContext
from skema.program_analysis.CAST.fortran.node_helper import (
NodeHelper,
remove_comments,
get_children_by_types,
get_first_child_by_type,
get_control_children,
Expand Down Expand Up @@ -58,16 +59,19 @@ def __init__(self, source_file_path: str):
)
)
self.tree = parser.parse(bytes(self.source, "utf8"))

self.root_node = remove_comments(self.tree.root_node)
#print(self.root_node.sexp())
# Walking data
self.variable_context = VariableContext()
self.node_helper = NodeHelper(self.source, self.source_file_name)

# Start visiting
self.out_cast = self.generate_cast()
#print(self.out_cast[0].to_json_str())

def generate_cast(self) -> List[CAST]:
'''Interface for generating CAST.'''
modules = self.run(self.tree.root_node)
modules = self.run(self.root_node)
return [CAST([generate_dummy_source_refs(module)], "Fortran") for module in modules]

def run(self, root) -> List[Module]:
Expand All @@ -77,7 +81,6 @@ def run(self, root) -> List[Module]:
# 2. A program body
# 3. Everything else (defined functions)
modules = []

contexts = get_children_by_types(root, ["module", "program"])
for context in contexts:
modules.append(self.visit(context))
Expand All @@ -100,6 +103,7 @@ def run(self, root) -> List[Module]:
source_refs=[self.node_helper.get_source_ref(root)]
))


return modules

def visit(self, node: Node):
Expand Down Expand Up @@ -293,12 +297,15 @@ def visit_function_def(self, node):

def visit_function_call(self, node):
# Pull relevent nodes
if node.type == "subroutine_call":
function_node = node.children[1]
arguments_node = node.children[2]
elif node.type == "call_expression":
function_node = node.children[0]
arguments_node = node.children[1]
# A subroutine and function won't neccessarily have an arguments node.
# So we should be careful about trying to access it.
function_node = get_children_by_types(node, ["unary_expression", "subroutine", "identifier",])[0]
arguments_node = get_first_child_by_type(node, "argument_list")

# If this is a unary expression (+foo()) the identifier will be nested.
# TODO: If this is a non '+' unary expression, how do we add it to the CAST?
if function_node.type == "unary_expression":
function_node = get_first_child_by_type(node, "identifier", recurse=True)

function_identifier = self.node_helper.get_identifier(function_node)

Expand All @@ -319,10 +326,11 @@ def visit_function_call(self, node):

# Add arguments to arguments list
arguments = []
for argument in arguments_node.children:
child_cast = self.visit(argument)
if child_cast:
arguments.append(child_cast)
if arguments_node:
for argument in arguments_node.children:
child_cast = self.visit(argument)
if child_cast:
arguments.append(child_cast)

return Call(
func=func,
Expand Down Expand Up @@ -432,7 +440,9 @@ def visit_do_loop_statement(self, node) -> Loop:
if while_statement_node:
return self._visit_while(node)

# The first body node will be the node after the loop_control_expression
# If there is a loop control expression, the first body node will be the node after the loop_control_expression
# It is valid Fortran to have a single itteration do loop as well.
# TODO: Add support for single itteration do-loop
# NOTE: This code is for the creation of the main body. The do loop will still add some additional nodes at the end of this body.
body = []
body_start_index = 1 + get_first_child_index(node, "loop_control_expression")
Expand Down Expand Up @@ -575,6 +585,15 @@ def visit_if_statement(self, node):
else:
body_stop_index = else_index

# Single line if conditions don't have a 'then' or 'end if' clause.
# So the starting index for the body can either be 2 or 3.
then_index = get_first_child_index(node, "then")
if then_index:
body_start_index = then_index+1
else:
body_start_index = 2
body_stop_index = len(node.children)

prev = None
orelse = None
# If there are else_if statements, they need
Expand All @@ -586,7 +605,7 @@ def visit_if_statement(self, node):
continue
elseif_expr = self.visit(condition.children[2])
elseif_body = [self.visit(child) for child in condition.children[4:]]

prev.orelse = ModelIf(elseif_expr, elseif_body, [])
prev = prev.orelse

Expand All @@ -599,17 +618,25 @@ def visit_if_statement(self, node):
else:
orelse = else_body

# TODO: This orelse logic has gotten a little complex, we might want to refactor this.
if isinstance(orelse, ModelIf):
orelse = orelse.orelse
if orelse:
if isinstance(orelse, ModelIf):
orelse = [orelse]

return ModelIf(
expr=self.visit(node.children[1]),
body=[self.visit(child) for child in node.children[3:body_stop_index]],
orelse=[orelse] if orelse else [],
body=[self.visit(child) for child in node.children[body_start_index:body_stop_index]],
orelse=orelse if orelse else [],
)

def visit_logical_expression(self, node):
"""Visitior for logical expression (i.e. true and false) which is used in compound conditional"""
# If this is a .not. operator, we need to pass it on to the math_expression visitor
if len(node.children) < 3:
return self.visit_math_expression(node)

literal_value_false = LiteralValue("Boolean", False)
literal_value_true = LiteralValue("Boolean", True)

Expand Down Expand Up @@ -733,7 +760,6 @@ def visit_math_expression(self, node):
op = self.node_helper.get_identifier(
get_control_children(node)[0]
) # The operator will be the first control character

operands = []
for operand in get_non_control_children(node):
operands.append(self.visit(operand))
Expand Down

0 comments on commit 7ae4f25

Please sign in to comment.