From ffd8060b30d8fa0fed69690d92826cdaf0ff7cc8 Mon Sep 17 00:00:00 2001 From: wpbonelli Date: Sat, 9 Mar 2024 20:50:37 -0500 Subject: [PATCH 1/2] refactor: split out modules for separate concerns * organize the codebase into modules * use absolute instead of relative imports * no need for classes to inherit from object * use consistent camel case for class names * remove imports from __future__ from test script (not needed) * run black and isort on python files * add .DS_Store to .gitignore --- .gitignore | 3 + README.md | 1 + fprettify/__init__.py | 2102 ++--------------------------------- fprettify/aligner.py | 180 +++ fprettify/constants.py | 19 + fprettify/exceptions.py | 22 + fprettify/formatter.py | 1140 +++++++++++++++++++ fprettify/fparse_utils.py | 305 ----- fprettify/indenter.py | 217 ++++ fprettify/parser.py | 1140 +++++++++++++++++++ fprettify/preprocessor.py | 73 ++ fprettify/tests/__init__.py | 656 ++++++----- fprettify/utils.py | 372 +++++++ 13 files changed, 3623 insertions(+), 2607 deletions(-) create mode 100644 fprettify/aligner.py create mode 100644 fprettify/constants.py create mode 100644 fprettify/exceptions.py create mode 100644 fprettify/formatter.py delete mode 100644 fprettify/fparse_utils.py create mode 100644 fprettify/indenter.py create mode 100644 fprettify/parser.py create mode 100644 fprettify/preprocessor.py create mode 100644 fprettify/utils.py diff --git a/.gitignore b/.gitignore index 672366a..f9889f8 100644 --- a/.gitignore +++ b/.gitignore @@ -97,3 +97,6 @@ _version.py # fortran temporary test files fortran_tests/ + +# mac files +**.DS_Store \ No newline at end of file diff --git a/README.md b/README.md index a1ea7f8..b2bccf6 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,7 @@ [![CI](https://github.com/pseewald/fprettify/actions/workflows/test.yml/badge.svg)](https://github.com/pseewald/fprettify/actions/workflows/test.yml) [![Coverage Status](https://coveralls.io/repos/github/pseewald/fprettify/badge.svg?branch=master)](https://coveralls.io/github/pseewald/fprettify?branch=master) ![PyPI - License](https://img.shields.io/pypi/l/fprettify) +![PyPI - Versions](https://img.shields.io/pypi/pyversions/fprettify) ![PyPI](https://img.shields.io/pypi/v/fprettify) [![Code Climate](https://codeclimate.com/github/pseewald/fprettify/badges/gpa.svg)](https://codeclimate.com/github/pseewald/fprettify) diff --git a/fprettify/__init__.py b/fprettify/__init__.py index d6450a3..8812d1e 100644 --- a/fprettify/__init__.py +++ b/fprettify/__init__.py @@ -65,2002 +65,54 @@ whitespaces - open files only when needed """ -import re -import sys +import io import logging import os -import io +import sys -sys.stdin = io.TextIOWrapper( - sys.stdin.detach(), encoding='UTF-8', line_buffering=True) +from fprettify.constants import * +from fprettify.exceptions import * +from fprettify.formatter import reformat_inplace +from fprettify.utils import ( + build_ws_dict, + get_arg_parser, + get_config_files, + get_parser_args, + log_exception, + set_logger, +) + +sys.stdin = io.TextIOWrapper(sys.stdin.detach(), encoding="UTF-8", line_buffering=True) sys.stdout = io.TextIOWrapper( - sys.stdout.detach(), encoding='UTF-8', line_buffering=True) - - -from .fparse_utils import (VAR_DECL_RE, OMP_COND_RE, OMP_DIR_RE, - InputStream, CharFilter, - FprettifyException, FprettifyParseException, FprettifyInternalException, - CPP_RE, NOTFORTRAN_LINE_RE, NOTFORTRAN_FYPP_LINE_RE, FYPP_LINE_RE, RE_FLAGS, - STR_OPEN_RE, parser_re, FYPP_WITHOUT_PREPRO_RE) - -# recognize fortran files by extension -FORTRAN_EXTENSIONS = [".f", ".for", ".ftn", - ".f90", ".f95", ".f03", ".fpp"] -FORTRAN_EXTENSIONS += [_.upper() for _ in FORTRAN_EXTENSIONS] - -# constants, mostly regular expressions: -FORMATTER_ERROR_MESSAGE = (" Wrong usage of formatting-specific directives" - " '&', '!&', '!&<' or '!&>'.") -LINESPLIT_MESSAGE = ("auto indentation failed due to chars limit, " - "line should be split") - -EOL_STR = r"\s*;?\s*$" # end of fortran line -EOL_SC = r"\s*;\s*$" # whether line is ended with semicolon -SOL_STR = r"^\s*" # start of fortran line - -STATEMENT_LABEL_RE = re.compile(r"^\s*(\d+\s)(?!"+EOL_STR+")", RE_FLAGS) - -# regular expressions for parsing statements that start, continue or end a -# subunit: -IF_RE = re.compile( - SOL_STR + r"(\w+\s*:)?\s*IF\s*\(.*\)\s*THEN" + EOL_STR, RE_FLAGS) -ELSE_RE = re.compile( - SOL_STR + r"ELSE(\s*IF\s*\(.*\)\s*THEN)?" + EOL_STR, RE_FLAGS) -ENDIF_RE = re.compile(SOL_STR + r"END\s*IF(\s+\w+)?" + EOL_STR, RE_FLAGS) - -DO_RE = re.compile(SOL_STR + r"(\w+\s*:)?\s*DO(" + EOL_STR + r"|\s+\w)", RE_FLAGS) -ENDDO_RE = re.compile(SOL_STR + r"END\s*DO(\s+\w+)?" + EOL_STR, RE_FLAGS) - -SELCASE_RE = re.compile( - SOL_STR + r"SELECT\s*(CASE|RANK|TYPE)\s*\(.*\)" + EOL_STR, RE_FLAGS) -CASE_RE = re.compile( - SOL_STR + r"((CASE|RANK|TYPE\s+IS|CLASS\s+IS)\s*(\(.*\)|DEFAULT)|CLASS\s+DEFAULT)" + EOL_STR, RE_FLAGS) -ENDSEL_RE = re.compile(SOL_STR + r"END\s*SELECT" + EOL_STR, RE_FLAGS) - -ASSOCIATE_RE = re.compile(SOL_STR + r"ASSOCIATE\s*\(.*\)" + EOL_STR, RE_FLAGS) -ENDASSOCIATE_RE = re.compile(SOL_STR + r"END\s*ASSOCIATE" + EOL_STR, RE_FLAGS) - -BLK_RE = re.compile(SOL_STR + r"(\w+\s*:)?\s*BLOCK" + EOL_STR, RE_FLAGS) -ENDBLK_RE = re.compile(SOL_STR + r"END\s*BLOCK(\s+\w+)?" + EOL_STR, RE_FLAGS) - -SUBR_RE = re.compile( - r"^([^\"']* )?SUBROUTINE\s+\w+\s*(\(.*\))?" + EOL_STR, RE_FLAGS) -ENDSUBR_RE = re.compile( - SOL_STR + r"END\s*SUBROUTINE(\s+\w+)?" + EOL_STR, RE_FLAGS) - -FCT_RE = re.compile( - r"^([^\"']* )?FUNCTION\s+\w+\s*(\(.*\))?(\s*RESULT\s*\(\w+\))?" + EOL_STR, - RE_FLAGS) -ENDFCT_RE = re.compile( - SOL_STR + r"END\s*FUNCTION(\s+\w+)?" + EOL_STR, RE_FLAGS) - -MOD_RE = re.compile(SOL_STR + r"MODULE\s+\w+" + EOL_STR, RE_FLAGS) -ENDMOD_RE = re.compile(SOL_STR + r"END\s*MODULE(\s+\w+)?" + EOL_STR, RE_FLAGS) - -SMOD_RE = re.compile(SOL_STR + r"SUBMODULE\s*\(\w+\)\s+\w+" + EOL_STR, RE_FLAGS) -ENDSMOD_RE = re.compile(SOL_STR + r"END\s*SUBMODULE(\s+\w+)?" + EOL_STR, RE_FLAGS) - -TYPE_RE = re.compile( - SOL_STR + - r"TYPE(\s*,\s*(BIND\s*\(\s*C\s*\)|EXTENDS\s*\(.*\)|ABSTRACT|PUBLIC|PRIVATE))*(\s*,\s*)?(\s*::\s*|\s+)\w+" + EOL_STR, - RE_FLAGS) -ENDTYPE_RE = re.compile(SOL_STR + r"END\s*TYPE(\s+\w+)?" + EOL_STR, RE_FLAGS) - -PROG_RE = re.compile(SOL_STR + r"PROGRAM\s+\w+" + EOL_STR, RE_FLAGS) -ENDPROG_RE = re.compile( - SOL_STR + r"END\s*PROGRAM(\s+\w+)?" + EOL_STR, RE_FLAGS) - -INTERFACE_RE = re.compile( - r"^([^\"']* )?INTERFACE(\s+\w+|\s+(OPERATOR|ASSIGNMENT)\s*\(.*\))?" + EOL_STR, RE_FLAGS) -ENDINTERFACE_RE = re.compile( - SOL_STR + r"END\s*INTERFACE(\s+\w+|\s+(OPERATOR|ASSIGNMENT)\s*\(.*\))?" + EOL_STR, RE_FLAGS) - -CONTAINS_RE = re.compile(SOL_STR + r"CONTAINS" + EOL_STR, RE_FLAGS) - -ENUM_RE = re.compile( - SOL_STR + r"ENUM(\s*,\s*(BIND\s*\(\s*C\s*\)))?((\s*::\s*|\s+)\w+)?" + EOL_STR, - RE_FLAGS) -ENDENUM_RE = re.compile(SOL_STR + r"END\s*ENUM(\s+\w+)?" + EOL_STR, RE_FLAGS) - -ENDANY_RE = re.compile(SOL_STR + r"END" + EOL_STR, RE_FLAGS) - -# Regular expressions for where and forall block constructs -FORALL_RE = re.compile(SOL_STR + r"(\w+\s*:)?\s*FORALL\s*\(.*\)" + EOL_STR, RE_FLAGS) -ENDFORALL_RE = re.compile(SOL_STR + r"END\s*FORALL(\s+\w+)?" + EOL_STR, RE_FLAGS) - -WHERE_RE = re.compile(SOL_STR + r"(\w+\s*:)?\s*WHERE\s*\(.*\)" + EOL_STR, RE_FLAGS) -ELSEWHERE_RE = re.compile(SOL_STR + r"ELSE\s*WHERE(\(.*\))?(\s*\w+)?" + EOL_STR, RE_FLAGS) -ENDWHERE_RE = re.compile(SOL_STR + r"END\s*WHERE(\s+\w+)?" + EOL_STR, RE_FLAGS) - -# Regular expressions for preprocessor directives - -FYPP_DEF_RE = re.compile(SOL_STR + r"#:DEF\s+", RE_FLAGS) -FYPP_ENDDEF_RE = re.compile(SOL_STR + r"#:ENDDEF", RE_FLAGS) - -FYPP_IF_RE = re.compile(SOL_STR + r"#:IF\s+", RE_FLAGS) -FYPP_ELIF_ELSE_RE = re.compile(SOL_STR + r"#:(ELIF\s+|ELSE)", RE_FLAGS) -FYPP_ENDIF_RE = re.compile(SOL_STR + r"#:ENDIF", RE_FLAGS) - -FYPP_FOR_RE = re.compile(SOL_STR + r"#:FOR\s+", RE_FLAGS) -FYPP_ENDFOR_RE = re.compile(SOL_STR + r"#:ENDFOR", RE_FLAGS) - -FYPP_BLOCK_RE = re.compile(SOL_STR + r"#:BLOCK\s+", RE_FLAGS) -FYPP_ENDBLOCK_RE = re.compile(SOL_STR + r"#:ENDBLOCK", RE_FLAGS) - -FYPP_CALL_RE = re.compile(SOL_STR + r"#:CALL\s+", RE_FLAGS) -FYPP_ENDCALL_RE = re.compile(SOL_STR + r"#:ENDCALL", RE_FLAGS) - -FYPP_MUTE_RE = re.compile(SOL_STR + r"#:MUTE", RE_FLAGS) -FYPP_ENDMUTE_RE = re.compile(SOL_STR + r"#:ENDMUTE", RE_FLAGS) - -PRIVATE_RE = re.compile(SOL_STR + r"PRIVATE\s*::", RE_FLAGS) -PUBLIC_RE = re.compile(SOL_STR + r"PUBLIC\s*::", RE_FLAGS) - -END_RE = re.compile(SOL_STR + r"(END)\s*(IF|DO|SELECT|ASSOCIATE|BLOCK|SUBROUTINE|FUNCTION|MODULE|SUBMODULE|TYPE|PROGRAM|INTERFACE|ENUM|WHERE|FORALL)", RE_FLAGS) - -# intrinsic statements with parenthesis notation that are not functions -INTR_STMTS_PAR = (r"(ALLOCATE|DEALLOCATE|" - r"OPEN|CLOSE|READ|WRITE|" - r"FLUSH|ENDFILE|REWIND|BACKSPACE|INQUIRE|" - r"FORALL|WHERE|ASSOCIATE|NULLIFY)") - -# regular expressions for parsing linebreaks -LINEBREAK_STR = r"(&)[\s]*(?:!.*)?$" - -# regular expressions for parsing operators -# Note: +/- in real literals and sign operator is ignored -PLUSMINUS_RE = re.compile(r"(?<=[\w\)\]])\s*(\+|-)\s*", RE_FLAGS) -# Note: ** or // (or any multiples of * or /) are ignored -# we also ignore any * or / before a :: because we may be seeing 'real*8' -MULTDIV_RE = re.compile( - r"(?<=[\w\)\]])\s*((?(?!=)|>=))\s*(?!\))", - RE_FLAGS) -LOG_OP_RE = re.compile(r"\s*(\.(?:AND|OR|EQV|NEQV)\.)\s*", RE_FLAGS) -PRINT_RE = re.compile(r"(?:(?<=\bPRINT)|(?<=\bREAD))\s*(\*,?)\s*", RE_FLAGS) - -# regular expressions for parsing delimiters -DEL_OPEN_STR = r"(\(\/?|\[)" -DEL_OPEN_RE = re.compile(r"^" + DEL_OPEN_STR, RE_FLAGS) -DEL_CLOSE_STR = r"(\/?\)|\])" -DEL_CLOSE_RE = re.compile(r"^" + DEL_CLOSE_STR, RE_FLAGS) - -# empty line regex -EMPTY_RE = re.compile(SOL_STR + r"$", RE_FLAGS) - -PREPRO_NEW_SCOPE = [parser_re(FYPP_DEF_RE), parser_re(FYPP_IF_RE), parser_re(FYPP_FOR_RE), - parser_re(FYPP_BLOCK_RE), parser_re(FYPP_CALL_RE), parser_re(FYPP_MUTE_RE)] -PREPRO_CONTINUE_SCOPE = [None, parser_re(FYPP_ELIF_ELSE_RE), None, None, None, None] -PREPRO_END_SCOPE = [parser_re(FYPP_ENDDEF_RE), parser_re(FYPP_ENDIF_RE), parser_re(FYPP_ENDFOR_RE), - parser_re(FYPP_ENDBLOCK_RE), parser_re(FYPP_ENDCALL_RE), - parser_re(FYPP_ENDMUTE_RE)] - -class plusminus_parser(parser_re): - """parser for +/- in addition - """ - def __init__(self, regex): - self._re = regex - self._re_excl = re.compile(r"\b(\d+\.?\d*|\d*\.?\d+)[de]" + EOL_STR, RE_FLAGS) - - def split(self, line): - partsplit = self._re.split(line) - partsplit_out = [] - - # exclude splits due to '+/-' in real literals - for n, part in enumerate(partsplit): - if re.search(r"^(\+|-)$", part): - if self._re_excl.search(partsplit[n-1]): - if n==1: partsplit_out = [partsplit[n-1]] - if n + 1 >= len(partsplit) or not partsplit_out: - raise FprettifyParseException("non-standard expression involving + or -",'',0) - partsplit_out[-1] += part + partsplit[n+1] - else: - if n==1: partsplit_out = [partsplit[n-1]] - if n + 1 >= len(partsplit): - raise FprettifyParseException("non-standard expression involving + or -",'',0) - partsplit_out += [part, partsplit[n+1]] - - if not partsplit_out: partsplit_out = partsplit - - return partsplit_out - -# two-sided operators -LR_OPS_RE = [REL_OP_RE, LOG_OP_RE, plusminus_parser(PLUSMINUS_RE), MULTDIV_RE, PRINT_RE] - -USE_RE = re.compile( - SOL_STR + "USE(\s+|(,.+?)?::\s*)\w+?((,.+?=>.+?)+|,\s*only\s*:.+?)?$" + EOL_STR, RE_FLAGS) - -# markups to deactivate formatter -NO_ALIGN_RE = re.compile(SOL_STR + r"&\s*[^\s*]+") - -class where_parser(parser_re): - """parser for where / forall construct - """ - def search(self, line): - match = self._re.search(line) - - if match: - level = 0 - for pos, char in CharFilter(line): - [what_del_open, what_del_close] = get_curr_delim(line, pos) - - if what_del_open: - if what_del_open.group() == r'(': level += 1 - - if what_del_close and what_del_close.group() == r')': - if level == 1: - if EMPTY_RE.search(line[pos+1:]): - return True - else: - return False - else: - level += -1 - - return False - -forall_parser = where_parser - -def build_scope_parser(fypp=True, mod=True): - parser = {} - parser['new'] = \ - [parser_re(IF_RE), parser_re(DO_RE), parser_re(SELCASE_RE), parser_re(SUBR_RE), - parser_re(FCT_RE), - parser_re(INTERFACE_RE), parser_re(TYPE_RE), parser_re(ENUM_RE), parser_re(ASSOCIATE_RE), - None, parser_re(BLK_RE), where_parser(WHERE_RE), forall_parser(FORALL_RE)] - - parser['continue'] = \ - [parser_re(ELSE_RE), None, parser_re(CASE_RE), parser_re(CONTAINS_RE), - parser_re(CONTAINS_RE), - None, parser_re(CONTAINS_RE), None, None, - None, None, parser_re(ELSEWHERE_RE), None] - - parser['end'] = \ - [parser_re(ENDIF_RE), parser_re(ENDDO_RE), parser_re(ENDSEL_RE), parser_re(ENDSUBR_RE), - parser_re(ENDFCT_RE), - parser_re(ENDINTERFACE_RE), parser_re(ENDTYPE_RE), parser_re(ENDENUM_RE), parser_re(ENDASSOCIATE_RE), - parser_re(ENDANY_RE,spec=False), parser_re(ENDBLK_RE), parser_re(ENDWHERE_RE), parser_re(ENDFORALL_RE)] - - if mod: - parser['new'].extend([parser_re(MOD_RE), parser_re(SMOD_RE), parser_re(PROG_RE)]) - parser['continue'].extend([parser_re(CONTAINS_RE), parser_re(CONTAINS_RE), parser_re(CONTAINS_RE)]) - parser['end'].extend([parser_re(ENDMOD_RE), parser_re(ENDSMOD_RE), parser_re(ENDPROG_RE)]) - - if fypp: - parser['new'].extend(PREPRO_NEW_SCOPE) - parser['continue'].extend(PREPRO_CONTINUE_SCOPE) - parser['end'].extend(PREPRO_END_SCOPE) - - return parser - -# match namelist names -NML_RE = re.compile(r"(/\w+/)", RE_FLAGS) -# find namelists and data statements -NML_STMT_RE = re.compile(SOL_STR + r"NAMELIST.*/.*/", RE_FLAGS) -DATA_STMT_RE = re.compile(SOL_STR + r"DATA\s+\w", RE_FLAGS) - -## Regexp for f90 keywords' -F90_KEYWORDS_RE = re.compile(r"\b(" + "|".join(( - "allocatable", "allocate", "assign", "assignment", "backspace", - "block", "call", "case", "character", "close", "common", "complex", - "contains", "continue", "cycle", "data", "deallocate", - "dimension", "do", "double", "else", "elseif", "elsewhere", "end", - "enddo", "endfile", "endif", "entry", "equivalence", "exit", - "external", "forall", "format", "function", "goto", "if", - "implicit", "include", "inquire", "integer", "intent", - "interface", "intrinsic", "logical", "module", "namelist", "none", - "nullify", "only", "open", "operator", "optional", "parameter", - "pause", "pointer", "precision", "print", "private", "procedure", - "program", "public", "read", "real", "recursive", "result", "return", - "rewind", "save", "select", "sequence", "stop", "subroutine", - "target", "then", "type", "use", "where", "while", "write", - ## F95 keywords. - "elemental", "pure", - ## F2003 - "abstract", "associate", "asynchronous", "bind", "class", - "deferred", "enum", "enumerator", "extends", "extends_type_of", - "final", "generic", "import", "non_intrinsic", "non_overridable", - "nopass", "pass", "protected", "same_type_as", "value", "volatile", - ## F2008. - "contiguous", "submodule", "concurrent", "codimension", - "sync all", "sync memory", "critical", "image_index", - )) + r")\b", RE_FLAGS) - -## Regexp whose first part matches F90 intrinsic procedures. -## Add a parenthesis to avoid catching non-procedures. -F90_PROCEDURES_RE = re.compile(r"\b(" + "|".join(( - "abs", "achar", "acos", "adjustl", "adjustr", "aimag", "aint", - "all", "allocated", "anint", "any", "asin", "associated", - "atan", "atan2", "bit_size", "btest", "ceiling", "char", "cmplx", - "conjg", "cos", "cosh", "count", "cshift", "date_and_time", "dble", - "digits", "dim", "dot_product", "dprod", "eoshift", "epsilon", - "exp", "exponent", "floor", "fraction", "huge", "iachar", "iand", - "ibclr", "ibits", "ibset", "ichar", "ieor", "index", "int", "ior", - "ishft", "ishftc", "kind", "lbound", "len", "len_trim", "lge", "lgt", - "lle", "llt", "log", "log10", "logical", "matmul", "max", - "maxexponent", "maxloc", "maxval", "merge", "min", "minexponent", - "minloc", "minval", "mod", "modulo", "mvbits", "nearest", "nint", - "not", "pack", "precision", "present", "product", "radix", - ## Real is taken out here to avoid highlighting declarations. - "random_number", "random_seed", "range", ## "real" - "repeat", "reshape", "rrspacing", "scale", "scan", - "selected_int_kind", "selected_real_kind", "set_exponent", - "shape", "sign", "sin", "sinh", "size", "spacing", "spread", "sqrt", - "sum", "system_clock", "tan", "tanh", "tiny", "transfer", - "transpose", "trim", "ubound", "unpack", "verify", - ## F95 intrinsic functions. - "null", "cpu_time", - ## F2003. - "move_alloc", "command_argument_count", "get_command", - "get_command_argument", "get_environment_variable", - "selected_char_kind", "wait", "flush", "new_line", - "extends", "extends_type_of", "same_type_as", "bind", - ## F2003 ieee_arithmetic intrinsic module. - "ieee_support_underflow_control", "ieee_get_underflow_mode", - "ieee_set_underflow_mode", - ## F2003 iso_c_binding intrinsic module. - "c_loc", "c_funloc", "c_associated", "c_f_pointer", - "c_f_procpointer", - ## F2008. - "bge", "bgt", "ble", "blt", "dshiftl", "dshiftr", "leadz", "popcnt", - "poppar", "trailz", "maskl", "maskr", "shifta", "shiftl", "shiftr", - "merge_bits", "iall", "iany", "iparity", "storage_size", - "bessel_j0", "bessel_j1", "bessel_jn", - "bessel_y0", "bessel_y1", "bessel_yn", - "erf", "erfc", "erfc_scaled", "gamma", "hypot", "log_gamma", - "norm2", "parity", "findloc", "is_contiguous", - "sync images", "lock", "unlock", "image_index", - "lcobound", "ucobound", "num_images", "this_image", - ## F2008 iso_fortran_env module. - "compiler_options", "compiler_version", - ## F2008 iso_c_binding module. - "c_sizeof" - - )) + r")\b", RE_FLAGS) - -F90_MODULES_RE = re.compile(r"\b(" + "|".join(( - ## F2003/F2008 module names - "iso_fortran_env", - "iso_c_binding", - "ieee_exceptions", - "ieee_arithmetic", - "ieee_features" - )) + r")\b", RE_FLAGS) - -## Regexp matching intrinsic operators -F90_OPERATORS_RE = re.compile(r"(" + "|".join([r"\." + a + r"\." for a in ( - "and", "eq", "eqv", "false", "ge", "gt", "le", "lt", "ne", - "neqv", "not", "or", "true" - )]) + r")", RE_FLAGS) - -## Regexp for Fortran intrinsic constants -F90_CONSTANTS_RE = re.compile(r"\b(" + "|".join(( - ## F2003 iso_fortran_env constants. - "input_unit", "output_unit", "error_unit", - "iostat_end", "iostat_eor", - "numeric_storage_size", "character_storage_size", - "file_storage_size", - ## F2003 iso_c_binding constants. - "c_int", "c_short", "c_long", "c_long_long", "c_signed_char", - "c_size_t", - "c_int8_t", "c_int16_t", "c_int32_t", "c_int64_t", - "c_int_least8_t", "c_int_least16_t", "c_int_least32_t", - "c_int_least64_t", - "c_int_fast8_t", "c_int_fast16_t", "c_int_fast32_t", - "c_int_fast64_t", - "c_intmax_t", "c_intptr_t", - "c_float", "c_double", "c_long_double", - "c_float_complex", "c_double_complex", "c_long_double_complex", - "c_bool", "c_char", - "c_null_char", "c_alert", "c_backspace", "c_form_feed", - "c_new_line", "c_carriage_return", "c_horizontal_tab", - "c_vertical_tab", - "c_ptr", "c_funptr", "c_null_ptr", "c_null_funptr", - ## F2008 iso_fortran_env constants. - "character_kinds", "int8", "int16", "int32", "int64", - "integer_kinds", "iostat_inquire_internal_unit", - "logical_kinds", "real_kinds", "real32", "real64", "real128", - "lock_type", "atomic_int_kind", "atomic_logical_kind", - )) + r")\b", RE_FLAGS) - -F90_INT_RE = r"[-+]?[0-9]+" -F90_FLOAT_RE = r"[-+]?([0-9]+\.[0-9]*|\.[0-9]+)" -F90_NUMBER_RE = "(" + F90_INT_RE + "|" + F90_FLOAT_RE + ")" -F90_FLOAT_EXP_RE = F90_NUMBER_RE + r"[eEdD]" + F90_NUMBER_RE -F90_NUMBER_ALL_RE = "(" + F90_NUMBER_RE + "|" + F90_FLOAT_EXP_RE + ")" -F90_NUMBER_ALL_REC = re.compile(F90_NUMBER_ALL_RE, RE_FLAGS) - -## F90_CONSTANTS_TYPES_RE = re.compile(r"\b" + F90_NUMBER_ALL_RE + "_(" + "|".join([a + r"\b" for a in ( -F90_CONSTANTS_TYPES_RE = re.compile( - r"(" + F90_NUMBER_ALL_RE + ")*_(" + "|".join(( - ## F2003 iso_fortran_env constants. - ## F2003 iso_c_binding constants. - "c_int", "c_short", "c_long", "c_long_long", "c_signed_char", - "c_size_t", - "c_int8_t", "c_int16_t", "c_int32_t", "c_int64_t", - "c_int_least8_t", "c_int_least16_t", "c_int_least32_t", - "c_int_least64_t", - "c_int_fast8_t", "c_int_fast16_t", "c_int_fast32_t", - "c_int_fast64_t", - "c_intmax_t", "c_intptr_t", - "c_float", "c_double", "c_long_double", - "c_float_complex", "c_double_complex", "c_long_double_complex", - "c_bool", "c_char", - ## F2008 iso_fortran_env constants. - "character_kinds", "int8", "int16", "int32", "int64", - "integer_kinds", - "logical_kinds", "real_kinds", "real32", "real64", "real128", - "lock_type", "atomic_int_kind", "atomic_logical_kind", - )) + r")\b", RE_FLAGS) - - -class F90Indenter(object): - """ - Parses encapsulation of subunits / scopes line by line - and updates the indentation. - """ - - def __init__(self, scope_parser, first_indent, rel_indent, filename): - # scopes / subunits: - self._scope_storage = [] - # indents for all fortran lines: - self._indent_storage = [] - # indents of actual lines of current fortran line - self._line_indents = [] - - self._parser = scope_parser - - self._filename = filename - self._aligner = F90Aligner(filename) - - # no lines have been processed yet: - self._initial = True - - # implicit scopes: we define implicit scopes, as many as match - # first_indent and rel_indent. This allows for, e.g., a properly - # indented "END FUNCTION" without matching "FUNCTION" statement: - if rel_indent > 0: - for n_impl in range(first_indent % rel_indent, first_indent + 1, rel_indent): - self._indent_storage += [n_impl] - - if not self._indent_storage: - self._indent_storage = [0] - - def process_lines_of_fline(self, f_line, lines, rel_ind, rel_ind_con, - line_nr, indent_fypp=True, manual_lines_indent=None): - """ - Process all lines that belong to a Fortran line `f_line`. - - Impose a relative indent of `rel_ind` for current Fortran line, - and `rel_ind_con` for line continuation. - By default line continuations are auto-aligned by F90Aligner - :param f_line: fortran line - :param lines: actual lines belonging to f_line - :param rel_ind: relative scope indent size for this line - :rel_ind_con: relative continuation indent size for this line - :line_nr: line number - :indent_fypp: whether or not to include fypp preprocessor lines - :manual_lines_indent: don't use F90Aligner but manually impose - indents for continuations - """ - - if (self._initial and - (PROG_RE.match(f_line) or MOD_RE.match(f_line))): - self._indent_storage[-1] = 0 - - self._line_indents = [0] * len(lines) - br_indent_list = [0] * len(lines) - - # local variables to avoid self hassle: - line_indents = self._line_indents - - scopes = self._scope_storage - indents = self._indent_storage - filename = self._filename - - # check statements that start new scope - is_new = False - valid_new = False - - f_filter = CharFilter(f_line, filter_fypp=not indent_fypp) - f_line_filtered = f_filter.filter_all() - - for new_n, newre in enumerate(self._parser['new']): - if newre and newre.search(f_line_filtered) and \ - not self._parser['end'][new_n].search(f_line_filtered): - what_new = new_n - is_new = True - valid_new = True - scopes.append(what_new) - log_message("{}: {}".format(what_new, f_line), - "debug", filename, line_nr) - - # check statements that continue scope - is_con = False - valid_con = False - for con_n, conre in enumerate(self._parser['continue']): - if conre and conre.search(f_line_filtered): - what_con = con_n - is_con = True - log_message("{}: {}".format( - what_con, f_line), "debug", filename, line_nr) - if len(scopes) > 0: - what = scopes[-1] - if what == what_con or indent_fypp: - valid_con = True - - # check statements that end scope - is_end = False - valid_end = False - for end_n, endre in enumerate(self._parser['end']): - if endre and endre.search(f_line_filtered): - what_end = end_n - is_end = True - log_message("{}: {}".format( - what_end, f_line), "debug", filename, line_nr) - if len(scopes) > 0: - what = scopes.pop() - if (what == what_end or not self._parser['end'][what_end].spec - or indent_fypp): - valid_end = True - log_message("{}: {}".format( - what_end, f_line), "debug", filename, line_nr) - else: - valid_end = True - - # fypp preprocessor scopes may be within continuation lines - if indent_fypp and len(lines) > 1 and not FYPP_LINE_RE.search(f_line_filtered): - - for new_n, newre in enumerate(PREPRO_NEW_SCOPE): - for l in lines: - if(newre and newre.search(l)): - is_new = True - valid_new = True - scopes.append(new_n) - - for end_n, endre in enumerate(PREPRO_END_SCOPE): - for l in lines: - if(endre and endre.search(l)): - is_end = True - valid_end = True - if len(scopes) > 0: - what = scopes.pop() - - # deal with line breaks - if not manual_lines_indent: - self._aligner.process_lines_of_fline( - f_line, lines, rel_ind_con, line_nr) - br_indent_list = self._aligner.get_lines_indent() - else: - br_indent_list = manual_lines_indent - - for pos in range(0, len(lines) - 1): - line_indents[pos + 1] = br_indent_list[pos + 1] - - if is_new and not is_end: - if not valid_new: - log_message('invalid scope opening statement', - "info", filename, line_nr) - - line_indents = [ind + indents[-1] for ind in line_indents] - - indents.append(rel_ind + indents[-1]) - - elif (not is_new) and (is_con or is_end): - valid = valid_con if is_con else valid_end - - if not valid: - line_indents = [ind + indents[-1] for ind in line_indents] - log_message('invalid scope closing statement', - "info", filename, line_nr) - else: - if len(indents) > 1 or self._initial: - line_indents = [ind + indents[-2 + self._initial] - for ind in line_indents] - - if is_end and valid: - if len(indents) > 1: - indents.pop() - else: - indents[-1] = 0 - - else: - line_indents = [ind + indents[-1] for ind in line_indents] - - # we have processed first line: - self._initial = False - - # reassigning self.* to the updated variables - self._line_indents = line_indents - self._scope_storage = scopes - self._indent_storage = indents - - def get_fline_indent(self): - """after processing, retrieve the indentation of the full Fortran line.""" - return self._indent_storage[-1] - - def get_lines_indent(self): - """after processing, retrieve the indents of all line parts.""" - return self._line_indents - - -class F90Aligner(object): - """ - Alignment of continuations of a broken line, - based on the following heuristics: - - if line break in brackets - We are parsing the level of nesting - and align to most inner bracket delimiter. - - else if line is an assignment - alignment to '=' or '=>'. - note: assignment operator recognized as any '=' that is not - part of another operator and that is not enclosed in bracket - - else if line is a declaration - alignment to '::' - - else - default indent - """ - - def __init__(self, filename): - self._filename = filename - self.__init_line(0) - - def __init_line(self, line_nr): - """initialization before processing new line""" - self._line_nr = line_nr - self._line_indents = [0] - self._level = 0 - self._br_indent_list = [0] - - def process_lines_of_fline(self, f_line, lines, rel_ind, line_nr): - """ - process all lines that belong to a Fortran line `f_line`, - `rel_ind` is the relative indentation size. - """ - - self.__init_line(line_nr) - - is_decl = VAR_DECL_RE.search(f_line) or PUBLIC_RE.search(f_line) or PRIVATE_RE.match(f_line) - is_use = USE_RE.search(f_line) - for pos, line in enumerate(lines): - self.__align_line_continuations( - line, is_decl, is_use, rel_ind, self._line_nr + pos) - if pos + 1 < len(lines): - self._line_indents.append(self._br_indent_list[-1]) - - if len(self._br_indent_list) > 2 or self._level: - log_message('unpaired bracket delimiters', - "info", self._filename, self._line_nr) - - def get_lines_indent(self): - """after processing, retrieve the indents of all line parts.""" - return self._line_indents - - def __align_line_continuations(self, line, is_decl, is_use, indent_size, line_nr): - """align continuation lines.""" - - indent_list = self._br_indent_list - level = self._level - filename = self._filename - - pos_eq = 0 - pos_ldelim = [] - pos_rdelim = [] - ldelim = [] - rdelim = [] - - # find delimiters that are not ended on this line. - # find proper alignment to most inner delimiter - # or alignment to assignment operator - rel_ind = indent_list[-1] # indentation of prev. line - - end_of_delim = -1 - - for pos, char in CharFilter(line): - - what_del_open = None - what_del_close = None - if pos > end_of_delim: - [what_del_open, what_del_close] = get_curr_delim(line, pos) - - if what_del_open: - what_del_open = what_del_open.group() - end_of_delim = pos + len(what_del_open) - 1 - level += 1 - indent_list.append(pos + len(what_del_open) + rel_ind) - pos_ldelim.append(pos) - ldelim.append(what_del_open) - if what_del_close: - what_del_close = what_del_close.group() - end_of_delim = pos + len(what_del_close) - 1 - if level > 0: - level += -1 - indent_list.pop() - else: - log_message('unpaired bracket delimiters', - "info", filename, line_nr) - - if pos_ldelim: - pos_ldelim.pop() - what_del_open = ldelim.pop() - valid = False - if what_del_open == r"(": - valid = what_del_close == r")" - if what_del_open == r"(/": - valid = what_del_close == r"/)" - if what_del_open == r"[": - valid = what_del_close == r"]" - if not valid: - log_message('unpaired bracket delimiters', - "info", filename, line_nr) - - else: - pos_rdelim.append(pos) - rdelim.append(what_del_close) - if char == ',' and not level and pos_eq > 0: - # a top level comma removes previous alignment position. - # (see issue #11) - pos_eq = 0 - indent_list.pop() - if not level and not is_decl and char == '=' and not REL_OP_RE.search( - line[max(0, pos - 1):min(pos + 2, len(line))]): - # should only have one assignment per line! - if pos_eq > 0: - raise FprettifyInternalException( - "found more than one assignment in the same Fortran line", filename, line_nr) - is_pointer = line[pos + 1] == '>' - pos_eq = pos + 1 - # don't align if assignment operator directly before - # line break - if not re.search(r"=>?\s*" + LINEBREAK_STR, line, - RE_FLAGS): - indent_list.append( - pos_eq + 1 + is_pointer + indent_list[-1]) - elif is_decl and line[pos:pos + 2] == '::' and not re.search(r"::\s*" + LINEBREAK_STR, line, RE_FLAGS): - indent_list.append(pos + 3 + indent_list[-1]) - elif is_use and line[pos] == ':' and not re.search(r":\s*" + LINEBREAK_STR, line, RE_FLAGS): - indent_list.append(pos + 2 + indent_list[-1]) - - # Don't align if delimiter opening directly before line break - if level and re.search(DEL_OPEN_STR + r"\s*" + LINEBREAK_STR, line, - RE_FLAGS): - if len(indent_list) > 1: - indent_list[-1] = indent_list[-2] - else: - indent_list[-1] = 0 - - if not indent_list[-1]: - indent_list[-1] = indent_size - - self._level = level - - -def inspect_ffile_format(infile, indent_size, strict_indent, indent_fypp=False, orig_filename=None): - """ - Determine indentation by inspecting original Fortran file. - - This is mainly for finding aligned blocks of DO/IF statements. - Also check if it has f77 constructs. - :param infile: open file - :param indent_size: the default indent size - :orig_filename: filename used for messages - :returns: [ target indent sizes for each line, - indent of first line (offset) ] - """ - if not orig_filename: - orig_filename = infile.name - - num_labels = False - indents = [] - stream = InputStream(infile, filter_fypp=not indent_fypp, orig_filename=orig_filename) - prev_offset = 0 - first_indent = -1 - has_fypp = False - - while 1: - f_line, _, lines = stream.next_fortran_line() - if not lines: - break - - if FYPP_LINE_RE.search(f_line): has_fypp = True - - f_line, lines, label = preprocess_labels(f_line, lines) - - offset = len(lines[0]) - len(lines[0].lstrip(' ')) - if f_line.strip() and first_indent == -1: - first_indent = offset - indents.append(offset - prev_offset) - - # don't impose indentation for blocked do/if constructs: - if (IF_RE.search(f_line) or DO_RE.search(f_line)): - if (prev_offset != offset or strict_indent): - indents[-1] = indent_size - else: - indents[-1] = indent_size - - prev_offset = offset - - return indents, first_indent, has_fypp - - -def replace_relational_single_fline(f_line, cstyle): - """ - format a single Fortran line - replaces scalar relational - operators in logical expressions to either Fortran or C-style. - .lt. <--> < - .le. <--> <= - .gt. <--> > - .ge. <--> >= - .eq. <--> == - .ne. <--> /= - """ - - new_line = f_line - - # only act on lines that do contain a relation - if REL_OP_RE.search(f_line): - # check that relation is not inside quotes, a string, or commented - # (think of underlining a heading with === or things like markup being printed which we do not replace) - pos_prev = -1 - pos = -1 - line_parts = [''] - for pos, char in CharFilter(f_line): - if pos > pos_prev + 1: # skipped string - line_parts.append(f_line[pos_prev + 1:pos].strip()) # append string - line_parts.append('') - - line_parts[-1] += char - - pos_prev = pos - - if pos + 1 < len(f_line): - line_parts.append(f_line[pos + 1:]) - - for pos, part in enumerate(line_parts): - # exclude comments, strings: - if not STR_OPEN_RE.match(part): - # also exclude / if we see a namelist and data statement - if cstyle: - part = re.sub(r"\.LT\.", "< ", part, flags=RE_FLAGS) - part = re.sub(r"\.LE\.", "<= ", part, flags=RE_FLAGS) - part = re.sub(r"\.GT\.", "> ", part, flags=RE_FLAGS) - part = re.sub(r"\.GE\.", ">= ", part, flags=RE_FLAGS) - part = re.sub(r"\.EQ\.", "== ", part, flags=RE_FLAGS) - part = re.sub(r"\.NE\.", "/= ", part, flags=RE_FLAGS) - else: - part = re.sub(r"<=", ".le.", part, flags=RE_FLAGS) - part = re.sub(r"<", ".lt.", part, flags=RE_FLAGS) - part = re.sub(r">=", ".ge.", part, flags=RE_FLAGS) - part = re.sub(r">", ".gt.", part, flags=RE_FLAGS) - part = re.sub(r"==", ".eq.", part, flags=RE_FLAGS) - part = re.sub(r"\/=", ".ne.", part, flags=RE_FLAGS) - - line_parts[pos] = part - - new_line = ''.join(line_parts) - - return new_line - - -def replace_keywords_single_fline(f_line, case_dict): - """ - format a single Fortran line - change case of keywords - """ - - new_line = f_line - - # Collect words list - pos_prev = -1 - pos = -1 - line_parts = [''] - for pos, char in CharFilter(f_line): - if pos > pos_prev + 1: # skipped string - line_parts.append(f_line[pos_prev + 1:pos].strip()) # append string - line_parts.append('') - - line_parts[-1] += char - - pos_prev = pos - - if pos + 1 < len(f_line): - line_parts.append(f_line[pos + 1:]) - - line_parts = [[a] if STR_OPEN_RE.match(a) else re.split(F90_OPERATORS_RE,a) - for a in line_parts] # problem, split "." - line_parts = [b for a in line_parts for b in a] - - ## line_parts = [[a] if STR_OPEN_RE.match(a) else re.split('(\W)',a) - ## for a in line_parts] # problem, split "." - line_parts = [[a] if STR_OPEN_RE.match(a) - else re.split('([^a-zA-Z0-9_.])',a) - for a in line_parts] - line_parts = [b for a in line_parts for b in a] - - swapcase = lambda s, a: s if a==0 else (s.lower() if a==1 else s.upper()) - - nbparts = len(line_parts) - for pos, part in enumerate(line_parts): - # exclude comments, strings: - if part.strip() and not STR_OPEN_RE.match(part): - if F90_KEYWORDS_RE.match(part): - part = swapcase(part, case_dict['keywords']) - elif F90_MODULES_RE.match(part): - part = swapcase(part, case_dict['procedures']) - elif F90_PROCEDURES_RE.match(part): - ok = False - for pos2 in range(pos+1, nbparts): - part2 = line_parts[pos2] - if part2.strip() and not (part2 == '\n' or STR_OPEN_RE.match(part2)): - ok = (part2 == '(') - break - if ok: - part = swapcase(part, case_dict['procedures']) - elif F90_OPERATORS_RE.match(part): - part = swapcase(part, case_dict['operators']) - elif F90_CONSTANTS_RE.match(part): - part = swapcase(part, case_dict['constants']) - elif F90_CONSTANTS_TYPES_RE.match(part): - part = swapcase(part, case_dict['constants']) - elif F90_NUMBER_ALL_REC.match(part): - part = swapcase(part, case_dict['constants']) - - line_parts[pos] = part - - new_line = ''.join(line_parts) - - return new_line - - -def format_single_fline(f_line, whitespace, whitespace_dict, linebreak_pos, - ampersand_sep, scope_parser, format_decl, filename, line_nr, - auto_format=True): - """ - format a single Fortran line - imposes white space formatting - and inserts linebreaks. - Takes a logical Fortran line `f_line` as input as well as the positions - of the linebreaks (`linebreak_pos`), and the number of - separating whitespace characters before ampersand (`ampersand_sep`). - `filename` and `line_nr` just for error messages. - The higher `whitespace`, the more white space characters inserted - - whitespace = 0, 1, 2, 3 are currently supported. - whitespace formatting can additionally controlled more fine-grained - via a dictionary of bools (whitespace_dict) - auto formatting can be turned off by setting `auto_format` to False. - """ - - # define whether to put whitespaces around operators: - mapping = { - 'comma': 0, # 0: comma, semicolon - 'assignments': 1, # 1: assignment operators - 'relational': 2, # 2: relational operators - 'logical': 3, # 3: logical operators - 'plusminus': 4, # 4: arithm. operators plus and minus - 'multdiv': 5, # 5: arithm. operators multiply and divide - 'print': 6, # 6: print / read statements - 'type': 7, # 7: select type components - 'intrinsics': 8, # 8: intrinsics - 'decl': 9 # 9: declarations - } - - if whitespace == 0: - spacey = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - elif whitespace == 1: - spacey = [1, 1, 1, 1, 0, 0, 1, 0, 1, 1] - elif whitespace == 2: - spacey = [1, 1, 1, 1, 1, 0, 1, 0, 1, 1] - elif whitespace == 3: - spacey = [1, 1, 1, 1, 1, 1, 1, 0, 1, 1] - elif whitespace == 4: - spacey = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1] - else: - raise NotImplementedError("unknown value for whitespace") - - if whitespace_dict: - # iterate over dictionary and override settings for 'spacey' - for key, value in mapping.items(): - if whitespace_dict[key] == True: - spacey[value] = 1 - elif whitespace_dict[key] == False: - spacey[value] = 0 - - line = f_line - line_orig = line - - if auto_format: - - line = rm_extra_whitespace(line, format_decl) - line = add_whitespace_charwise(line, spacey, scope_parser, format_decl, filename, line_nr) - line = add_whitespace_context(line, spacey) - - lines_out = split_reformatted_line( - line_orig, linebreak_pos, ampersand_sep, line, filename, line_nr) - return lines_out - - -def rm_extra_whitespace(line, format_decl): - """rm all unneeded whitespace chars, except for declarations""" - line_ftd = '' - pos_prev = -1 - pos = -1 - for pos, char in CharFilter(line): - if format_decl: - is_decl = False - else: - is_decl = line[pos:].lstrip().startswith('::') or line[ - :pos].rstrip().endswith('::') - - if pos > pos_prev + 1: # skipped string - line_ftd = line_ftd + line[pos_prev + 1:pos] - - if char == ' ': - # remove double spaces: - if line_ftd and (re.search(r'[\w]', line_ftd[-1]) or is_decl): - line_ftd = line_ftd + char - else: - if (line_ftd and line_ftd[-1] == ' ' and - (not re.search(r'[\w]', char) and not is_decl)): - line_ftd = line_ftd[:-1] # remove spaces except between words - line_ftd = line_ftd + char - pos_prev = pos - - line_ftd = line_ftd + line[pos+1:] - return line_ftd - - -def add_whitespace_charwise(line, spacey, scope_parser, format_decl, filename, line_nr): - """add whitespace character wise (no need for context aware parsing)""" - line_ftd = line - pos_eq = [] - end_of_delim = -1 - level = 0 - for pos, char in CharFilter(line): - # offset w.r.t. unformatted line - offset = len(line_ftd) - len(line) - - # format delimiters - what_del_open = None - what_del_close = None - if pos > end_of_delim: - [what_del_open, what_del_close] = get_curr_delim(line, pos) - - if what_del_open or what_del_close: - sep1 = 0 - sep2 = 0 - - if what_del_open: - delim = what_del_open.group() - else: - delim = what_del_close.group() - - lhs = line_ftd[:pos + offset] - rhs = line_ftd[pos + len(delim) + offset:] - - # format opening delimiters - if what_del_open: - level += 1 # new scope - # add separating whitespace before opening delimiter - # with some exceptions: - # FIXME: duplication of regex, better to include them into - # INTR_STMTS_PAR - if ((not re.search((r"(" + DEL_OPEN_STR + - r"|[\w\*/=\+\-:])\s*$"), - line[:pos], RE_FLAGS) and - not EMPTY_RE.search(line[:pos])) or - re.search(SOL_STR + r"(\w+\s*:)?(ELSE)?\s*IF\s*$", - line[:pos], RE_FLAGS) or - re.search(SOL_STR + r"(\w+\s*:)?\s*DO\s+WHILE\s*$", - line[:pos], RE_FLAGS) or - re.search(SOL_STR + r"(SELECT)?\s*CASE\s*$", - line[:pos], RE_FLAGS) or - re.search(SOL_STR + r"(SELECT)?\s*RANK\s*$", - line[:pos], RE_FLAGS) or - re.search(SOL_STR + r"SELECT\s*TYPE\s*$", - line[:pos], RE_FLAGS) or - re.search(SOL_STR + r"CLASS\s*DEFAULT\s*$", - line[:pos], RE_FLAGS) or - re.search(SOL_STR + r"(TYPE|CLASS)\s+IS\s*$", - line[:pos], RE_FLAGS) or - re.search(r"(? 0: - level += -1 # close scope - else: - log_message('unpaired bracket delimiters', - "info", filename, line_nr) - - # add separating whitespace after closing delimiter - # with some exceptions: - if not re.search(r"^\s*(" + DEL_CLOSE_STR + r"|[,%:/\*])", - line[pos + 1:], RE_FLAGS): - sep2 = 1 - elif re.search(r"^\s*::", line[pos + 1:], RE_FLAGS): - sep2 = len(rhs) - len(rhs.lstrip(' ')) if not format_decl else 1 - - # where delimiter token ends - end_of_delim = pos + len(delim) - 1 - - line_ftd = lhs.rstrip(' ') + ' ' * sep1 + \ - delim + ' ' * sep2 + rhs.lstrip(' ') - - # format commas and semicolons - if char in [',', ';']: - lhs = line_ftd[:pos + offset] - rhs = line_ftd[pos + 1 + offset:] - line_ftd = lhs.rstrip(' ') + char + ' ' * \ - spacey[0] + rhs.lstrip(' ') - line_ftd = line_ftd.rstrip(' ') - - # format type selector % - if char == "%": - lhs = line_ftd[:pos + offset] - rhs = line_ftd[pos + 1 + offset:] - line_ftd = lhs.rstrip(' ') \ - + ' ' * spacey[7] \ - + char \ - + ' ' * spacey[7] \ - + rhs.lstrip(' ') - line_ftd = line_ftd.rstrip(' ') - - # format '::' - if format_decl and line[pos:pos+2] == "::": - lhs = line_ftd[:pos + offset] - rhs = line_ftd[pos + 2 + offset:] - line_ftd = lhs.rstrip(' ') \ - + ' ' * spacey[9] \ - + '::' + ' ' * spacey[9] \ - + rhs.lstrip(' ') - line_ftd = line_ftd.rstrip(' ') - - # format .NOT. - if re.search(r"^\.NOT\.", line[pos:pos + 5], RE_FLAGS): - lhs = line_ftd[:pos + offset] - rhs = line_ftd[pos + 5 + offset:] - line_ftd = lhs.rstrip( - ' ') + line[pos:pos + 5] + ' ' * spacey[3] + rhs.lstrip(' ') - - # strip whitespaces from '=' and prepare assignment operator - # formatting: - if char == '=' and not REL_OP_RE.search(line[pos - 1:pos + 2]): - lhs = line_ftd[:pos + offset] - rhs = line_ftd[pos + 1 + offset:] - line_ftd = lhs.rstrip(' ') + '=' + rhs.lstrip(' ') - is_pointer = line[pos + 1] == '>' - if (not level) or is_pointer: # remember position of assignment operator - pos_eq.append(len(lhs.rstrip(' '))) - - line = line_ftd - - for pos in pos_eq: - offset = len(line_ftd) - len(line) - is_pointer = line[pos + 1] == '>' - lhs = line_ftd[:pos + offset] - rhs = line_ftd[pos + 1 + is_pointer + offset:] - if is_pointer: - assign_op = '=>' # pointer assignment - else: - assign_op = '=' # assignment - line_ftd = (lhs.rstrip(' ') + - ' ' * spacey[1] + assign_op + - ' ' * spacey[1] + rhs.lstrip(' ')) - # offset w.r.t. unformatted line - - is_end = False - if END_RE.search(line_ftd): - for endre in scope_parser['end']: - if endre and endre.search(line_ftd): - is_end = True - if is_end: - line_ftd = END_RE.sub(r'\1' + ' '*spacey[8] + r'\2', line_ftd) - - if level != 0: - log_message('unpaired bracket delimiters', "info", filename, line_nr) - - return line_ftd - - -def add_whitespace_context(line, spacey): - """ - for context aware whitespace formatting we extract line parts that are - not comments or strings in order to be able to apply a context aware regex. - """ - - - pos_prev = -1 - pos = -1 - line_parts = [''] - for pos, char in CharFilter(line): - if pos > pos_prev + 1: # skipped string - line_parts.append(line[pos_prev + 1:pos].strip()) # append string - line_parts.append('') - - line_parts[-1] += char - - pos_prev = pos - - if pos + 1 < len(line): - line_parts.append(line[pos + 1:]) - - # format namelists with spaces around / - if NML_STMT_RE.match(line): - for pos, part in enumerate(line_parts): - # exclude comments, strings: - if not STR_OPEN_RE.match(part): - partsplit = NML_RE.split(part) - line_parts[pos] = (' '.join(partsplit)) - - # Two-sided operators - for n_op, lr_re in enumerate(LR_OPS_RE): - for pos, part in enumerate(line_parts): - # exclude comments, strings: - if not STR_OPEN_RE.match(part): - # also exclude / if we see a namelist and data statement - if not ( NML_STMT_RE.match(line) or DATA_STMT_RE.match(line) ): - partsplit = lr_re.split(part) - line_parts[pos] = (' ' * spacey[n_op + 2]).join(partsplit) - - line = ''.join(line_parts) - - for newre in [IF_RE, DO_RE, BLK_RE]: - if newre.search(line) and re.search(SOL_STR + r"\w+\s*:", line): - line = ': '.join(_.strip() for _ in line.split(':', 1)) - - # format ':' for labels and use only statements - if USE_RE.search(line): - line = re.sub(r'(only)\s*:\s*', r'\g<1>:' + ' ' * - spacey[0], line, flags=RE_FLAGS) - - return line - - -def split_reformatted_line(line_orig, linebreak_pos_orig, ampersand_sep, line, filename, line_nr): - """ - Infer linebreak positions of formatted line from linebreak positions in - original line and split line. - """ - # shift line break positions from original to reformatted line - pos_new = 0 - pos_old = 0 - linebreak_pos_orig.sort(reverse=True) - linebreak_pos_ftd = [] - while 1: - - if pos_new == len(line) or pos_old == len(line_orig): - break - - if line[pos_new] != line_orig[pos_old]: - raise FprettifyInternalException( - "failed at finding line break position", filename, line_nr) - - if linebreak_pos_orig and pos_old > linebreak_pos_orig[-1]: - linebreak_pos_orig.pop() - linebreak_pos_ftd.append(pos_new) - continue - - pos_new += 1 - while pos_new < len(line) and line[pos_new] == ' ': - pos_new += 1 - - pos_old += 1 - while pos_old < len(line_orig) and line_orig[pos_old] == ' ': - pos_old += 1 - - linebreak_pos_ftd.insert(0, 0) - - # We split line into parts and we insert ampersands at line end, but not - # for empty lines and comment lines - lines_split = [(line[l:r].rstrip(' ') + - ' ' * ampersand_sep[pos] + '&' * min(1, r - l)) - for pos, (l, r) in enumerate(zip(linebreak_pos_ftd[0:-1], - linebreak_pos_ftd[1:]))] - - lines_split.append(line[linebreak_pos_ftd[-1]:]) - - return lines_split - - -def diff(a, b, a_name, b_name): - # type: (str, str, str, str) -> str - - """Return a unified diff string between strings `a` and `b`.""" - import difflib - - a_lines = [line + "\n" for line in a.splitlines()] - b_lines = [line + "\n" for line in b.splitlines()] - return "".join( - difflib.unified_diff(a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5) - ) - -def reformat_inplace(filename, stdout=False, diffonly=False, **kwargs): # pragma: no cover - """reformat a file in place.""" - if filename == '-': - infile = io.StringIO() - infile.write(sys.stdin.read()) - else: - infile = io.open(filename, 'r', encoding='utf-8') - - newfile = io.StringIO() - reformat_ffile(infile, newfile, - orig_filename=filename, **kwargs) - - if diffonly: - infile.seek(0) - newfile.seek(0) - diff_contents=diff(infile.read(),newfile.read(),filename,filename) - sys.stdout.write(diff_contents) - else: - - if stdout: - sys.stdout.write(newfile.getvalue()) - else: - outfile = io.open(filename, 'r', encoding='utf-8') - - # write to outfile only if content has changed - - import hashlib - hash_new = hashlib.md5() - hash_new.update(newfile.getvalue().encode('utf-8')) - hash_old = hashlib.md5() - hash_old.update(outfile.read().encode('utf-8')) - - outfile.close() - - if hash_new.digest() != hash_old.digest(): - outfile = io.open(filename, 'w', encoding='utf-8') - outfile.write(newfile.getvalue()) - -def reformat_ffile(infile, outfile, impose_indent=True, indent_size=3, strict_indent=False, impose_whitespace=True, - case_dict={}, - impose_replacements=False, cstyle=False, whitespace=2, whitespace_dict={}, llength=132, - strip_comments=False, format_decl=False, orig_filename=None, indent_fypp=True, indent_mod=True): - """main method to be invoked for formatting a Fortran file.""" - - # note: whitespace formatting and indentation may require different parsing rules - # (e.g. preprocessor statements may be indented but not whitespace formatted) - # therefore we invoke reformat_ffile independently for: - # 1) whitespace formatting - # 2) indentation - - if not orig_filename: - orig_filename = infile.name - - # 1) whitespace formatting - oldfile = infile - newfile = infile - - if impose_whitespace: - _impose_indent = False - - newfile = io.StringIO() - reformat_ffile_combined(oldfile, newfile, _impose_indent, indent_size, strict_indent, impose_whitespace, - case_dict, - impose_replacements, cstyle, whitespace, whitespace_dict, llength, - strip_comments, format_decl, orig_filename, indent_fypp, indent_mod) - oldfile = newfile - - # 2) indentation - if impose_indent: - - _impose_whitespace = False - _impose_replacements = False - - newfile = io.StringIO() - reformat_ffile_combined(oldfile, newfile, impose_indent, indent_size, strict_indent, _impose_whitespace, - case_dict, - _impose_replacements, cstyle, whitespace, whitespace_dict, llength, - strip_comments, format_decl, orig_filename, indent_fypp, indent_mod) - - - outfile.write(newfile.getvalue()) - - -def reformat_ffile_combined(infile, outfile, impose_indent=True, indent_size=3, strict_indent=False, impose_whitespace=True, - case_dict={}, - impose_replacements=False, cstyle=False, whitespace=2, whitespace_dict={}, llength=132, - strip_comments=False, format_decl=False, orig_filename=None, indent_fypp=True, indent_mod=True): - - if not orig_filename: - orig_filename = infile.name - - if not impose_indent: - indent_fypp = False - - - infile.seek(0) - req_indents, first_indent, has_fypp = inspect_ffile_format( - infile, indent_size, strict_indent, indent_fypp, orig_filename) - infile.seek(0) - - if not has_fypp: indent_fypp = False - - scope_parser = build_scope_parser(fypp=indent_fypp, mod=indent_mod) - - # initialization - - # special cases for indentation: - # indent_special = 0: parse syntax and impose indent - # indent_special = 1: no indentation - # indent_special = 2: use indent from previous line - # indent_special = 3: take indent from input file (leave as is) - indent_special = 0 - - if impose_indent: - indenter = F90Indenter(scope_parser, first_indent, indent_size, orig_filename) - else: - indent_special = 3 - - impose_case = not all(v == 0 for v in case_dict.values()) - - nfl = 0 # fortran line counter - use_same_line = False - stream = InputStream(infile, not indent_fypp, orig_filename=orig_filename) - skip_blank = False - in_format_off_block = False - - while 1: - f_line, comments, lines = stream.next_fortran_line() - - if not lines: - break - - nfl += 1 - orig_lines = lines - - f_line, lines, is_omp_conditional = preprocess_omp( - f_line, lines) - f_line, lines, label = preprocess_labels(f_line, lines) - - if indent_special != 3: - indent = [0] * len(lines) - else: - indent = [len(l) - len((l.lstrip(' ')).lstrip('&')) for l in lines] - - comment_lines = format_comments(lines, comments, strip_comments) - - auto_align, auto_format, in_format_off_block = parse_fprettify_directives( - lines, comment_lines, in_format_off_block, orig_filename, stream.line_nr) - - lines, do_format, prev_indent, is_blank, is_special = preprocess_line( - f_line, lines, comments, orig_filename, stream.line_nr, indent_fypp) - - if is_special[0]: - indent_special = 3 - - if prev_indent and indent_special == 0: - indent_special = 2 - - if is_blank and skip_blank: - continue - if (not do_format): - if indent_special == 2: - # inherit indent from previous line - indent[:] = [indenter.get_fline_indent()]*len(indent) - elif indent_special == 0: - indent_special = 1 - else: - - if not auto_align: - manual_lines_indent = get_manual_alignment(lines) - else: - manual_lines_indent = [] - - lines, pre_ampersand, ampersand_sep = remove_pre_ampersands( - lines, is_special, orig_filename, stream.line_nr) - - linebreak_pos = get_linebreak_pos(lines, filter_fypp=not indent_fypp) - - f_line = f_line.strip(' ') - - if impose_replacements: - f_line = replace_relational_single_fline(f_line, cstyle) - - if impose_case: - f_line = replace_keywords_single_fline(f_line, case_dict) - - if impose_whitespace: - lines = format_single_fline( - f_line, whitespace, whitespace_dict, linebreak_pos, ampersand_sep, - scope_parser, format_decl, orig_filename, stream.line_nr, auto_format) - - lines = append_comments(lines, comment_lines, is_special) - - # target indent for next line - rel_indent = req_indents[nfl] if nfl < len(req_indents) else 0 - - if indent_special != 3: - indenter.process_lines_of_fline( - f_line, lines, rel_indent, indent_size, - stream.line_nr, indent_fypp, manual_lines_indent) - indent = indenter.get_lines_indent() - - lines, indent = prepend_ampersands(lines, indent, pre_ampersand) - - if any(is_special): - for pos, line in enumerate(lines): - if is_special[pos]: - indent[pos] = len(line) - len(line.lstrip(' ')) - lines[pos] = line.lstrip(' ') - - lines = remove_trailing_whitespace(lines) - - # need to shift indents if label wider than first indent - if label and impose_indent: - if indent[0] < len(label): - indent = [ind + len(label) - indent[0] for ind in indent] - - write_formatted_line(outfile, indent, lines, orig_lines, indent_special, llength, - use_same_line, is_omp_conditional, label, orig_filename, stream.line_nr) - - do_indent, use_same_line = pass_defaults_to_next_line(f_line) - - if impose_indent: - if do_indent: - indent_special = 0 - else: - indent_special = 1 - - # rm subsequent blank lines - skip_blank = EMPTY_RE.search( - f_line) and not any(comments) and not is_omp_conditional and not label - - -def format_comments(lines, comments, strip_comments): - comments_ftd = [] - for line, comment in zip(lines, comments): - has_comment = bool(comment.strip()) - if has_comment: - if strip_comments: - sep = not comment.strip() == line.strip() - else: - line_minus_comment = line.replace(comment,"") - sep = len(line_minus_comment.rstrip('\n')) - len(line_minus_comment.rstrip()) - else: - sep = 0 - - if line.strip(): # empty lines between linebreaks are ignored - comments_ftd.append(' ' * sep + comment.strip()) - return comments_ftd - - -def parse_fprettify_directives(lines, comment_lines, in_format_off_block, filename, line_nr): - """ - parse formatter directives '!&' and line continuations starting with an - ampersand. - """ - auto_align = not any(NO_ALIGN_RE.search(_) for _ in lines) - auto_format = not (in_format_off_block or any( - _.lstrip().startswith('!&') for _ in comment_lines)) - if not auto_format: - auto_align = False - if (len(lines)) == 1: - valid_directive = True - if lines[0].strip().startswith('!&<'): - if in_format_off_block: - valid_directive = False - else: - in_format_off_block = True - if lines[0].strip().startswith('!&>'): - if not in_format_off_block: - valid_directive = False - else: - in_format_off_block = False - if not valid_directive: - raise FprettifyParseException( - FORMATTER_ERROR_MESSAGE, filename, line_nr) - - return [auto_align, auto_format, in_format_off_block] - - -def preprocess_omp(f_line, lines): - """convert omp conditional to normal fortran""" - - is_omp_conditional = bool(OMP_COND_RE.search(f_line)) - if is_omp_conditional: - f_line = OMP_COND_RE.sub(' ', f_line, count=1) - lines = [OMP_COND_RE.sub(' ', l, count=1) for l in lines] - - return [f_line, lines, is_omp_conditional] - -def preprocess_labels(f_line, lines): - """remove statement labels""" - - match = STATEMENT_LABEL_RE.search(f_line) - if match: - label = match.group(1) - else: - label = '' - - if label: - f_line = STATEMENT_LABEL_RE.sub(len(label)*' ', f_line, count=1) - lines[0] = STATEMENT_LABEL_RE.sub(len(label)*' ', lines[0], count=1) - - return [f_line, lines, label] - -def preprocess_line(f_line, lines, comments, filename, line_nr, indent_fypp): - """preprocess lines: identification and formatting of special cases""" - is_blank = False - prev_indent = False - do_format = False - - # is_special: special directives that should not be treated as Fortran - # currently supported: fypp preprocessor directives or comments for FORD documentation - is_special = [False]*len(lines) - - for pos, line in enumerate(lines): - line_strip = line.lstrip() - if indent_fypp: - is_special[pos] = line_strip.startswith('!!') or \ - (FYPP_LINE_RE.search(line_strip) if pos > 0 else False) - else: - is_special[pos] = FYPP_LINE_RE.search(line_strip) or line_strip.startswith('!!') - - # if first line is special, all lines should be special - if is_special[0]: is_special = [True]*len(lines) - - if EMPTY_RE.search(f_line): # empty lines including comment lines - if any(comments): - if lines[0].startswith(' ') and not OMP_DIR_RE.search(lines[0]): - # indent comment lines only if they were not indented before. - prev_indent = True - else: - is_blank = True - lines = [l.strip(' ') if not is_special[n] else l for n, l in enumerate(lines)] - else: - do_format = True - - return [lines, do_format, prev_indent, is_blank, is_special] - - -def pass_defaults_to_next_line(f_line): - """defaults to be transferred from f_line to next line""" - if re.search(r";\s*$", f_line, RE_FLAGS): - # if line ended with semicolon, don't indent next line - do_indent = False - use_same_line = True - else: - do_indent = True - use_same_line = False - - return [do_indent, use_same_line] - - -def remove_trailing_whitespace(lines): - """remove trailing whitespaces from lines""" - lines = [re.sub(r"\s+$", '\n', l, RE_FLAGS) - for l in lines] - return lines - - -def prepend_ampersands(lines, indent, pre_ampersand): - """prepend ampersands and correct indent""" - for pos, line in enumerate(lines): - amp_insert = pre_ampersand[pos] - if amp_insert: - indent[pos] += -1 - lines[pos] = amp_insert + line.lstrip() - - return [lines, indent] - - -def append_comments(lines, comment_lines, is_special): - """append comments to lines""" - for pos, (line, comment) in enumerate(zip(lines, comment_lines)): - if pos < len(lines) - 1: - has_nl = True # has next line - if not line.strip() and not is_special[pos]: comment = comment.lstrip() - else: - has_nl = not re.search(EOL_SC, line) - lines[pos] = lines[pos].rstrip(' ') + comment + '\n' * has_nl - - return lines - - -def get_linebreak_pos(lines, filter_fypp=True): - """extract linebreak positions in Fortran line from lines""" - linebreak_pos = [] - if filter_fypp: - notfortran_re = NOTFORTRAN_LINE_RE - else: - notfortran_re = NOTFORTRAN_FYPP_LINE_RE - - for line in lines: - found = None - for char_pos, _ in CharFilter(line, filter_strings=False): - if re.match(LINEBREAK_STR, line[char_pos:], RE_FLAGS): - found = char_pos - if found: - linebreak_pos.append(found) - elif notfortran_re.search(line.lstrip(' ')): - linebreak_pos.append(0) - - linebreak_pos = [sum(linebreak_pos[0:_ + 1]) - - 1 for _ in range(0, len(linebreak_pos))] - - return linebreak_pos - - -def remove_pre_ampersands(lines, is_special, filename, line_nr): - """ - remove and return preceding ampersands ('pre_ampersand'). Also return - number of whitespace characters before ampersand of previous line - ('ampersand_sep'). - - Note: Don't do any whitespace formatting on ampersands if next line starts - with an ampersand but remember the original number of spaces - (ampersand_sep). This "special rule" is necessary since ampersands starting - a line can be used to break literals, so changing the number of whitespaces - before the ampersand ending the previous line may lead to invalid syntax or - may change the number of whitespace characters in a string. - """ - pre_ampersand = [] - ampersand_sep = [] - - for pos, line in enumerate(lines): - match = re.search(SOL_STR + r'(&\s*)', line) - if match: - pre_ampersand.append(match.group(1)) - # amount of whitespace before ampersand of previous line: - m = re.search(r'(\s*)&[\s]*(?:!.*)?$', lines[pos - 1]) - if not m: - raise FprettifyParseException( - "Bad continuation line format", filename, line_nr) - sep = len(m.group(1)) - - ampersand_sep.append(sep) - else: - pre_ampersand.append('') - if pos > 0: - # use default 1 whitespace character before ampersand - ampersand_sep.append(1) - - lines = [l.strip(' ').strip('&') if not s else l for l, s in zip(lines, is_special)] - return [lines, pre_ampersand, ampersand_sep] - - -def get_manual_alignment(lines): - """extract manual indents for line continuations from line""" - manual_lines_indent = [ - len(l) - len(l.lstrip(' ').lstrip('&')) for l in lines] - manual_lines_indent = [ind - manual_lines_indent[0] - for ind in manual_lines_indent] - return manual_lines_indent - - -def write_formatted_line(outfile, indent, lines, orig_lines, indent_special, llength, use_same_line, is_omp_conditional, label, filename, line_nr): - """Write reformatted line to file""" - - for ind, line, orig_line in zip(indent, lines, orig_lines): - - # get actual line length excluding comment: - line_length = 0 - for line_length, _ in CharFilter(line): - pass - line_length += 1 - - if indent_special != 1: - ind_use = ind - else: - if use_same_line: - ind_use = 1 - else: - ind_use = 0 - - if CPP_RE.search(line.lstrip()): - ind_use = 0 - - if label: - label_use = label - label = '' # no label for continuation lines - else: - label_use = '' - - if ind_use + line_length <= (llength+1): # llength (default 132) plus 1 newline char - outfile.write('!$ ' * is_omp_conditional + label_use + - ' ' * (ind_use - 3 * is_omp_conditional - len(label_use) + - len(line) - len(line.lstrip(' '))) + - line.lstrip(' ')) - elif line_length <= (llength+1): - outfile.write('!$ ' * is_omp_conditional + label_use + ' ' * - ((llength+1) - 3 * is_omp_conditional - len(label_use) - - len(line.lstrip(' '))) + line.lstrip(' ')) - - log_message(LINESPLIT_MESSAGE+" (limit: "+str(llength)+")", "warning", - filename, line_nr) - else: - outfile.write(orig_line) - log_message(LINESPLIT_MESSAGE+" (limit: "+str(llength)+")", "warning", - filename, line_nr) - - -def get_curr_delim(line, pos): - """get delimiter token in line starting at pos, if it exists""" - what_del_open = DEL_OPEN_RE.search(line[pos:pos + 2]) - what_del_close = DEL_CLOSE_RE.search(line[pos:pos + 2]) - return [what_del_open, what_del_close] - - -def set_fprettify_logger(level): - """setup custom logger""" - logger = logging.getLogger('fprettify-logger') - logger.setLevel(level) - stream_handler = logging.StreamHandler() - stream_handler.setLevel(level) - formatter = logging.Formatter( - '%(levelname)s: File %(ffilename)s, line %(fline)s\n %(message)s') - stream_handler.setFormatter(formatter) - logger.addHandler(stream_handler) - - -def log_exception(e, message): - """log an exception and a message""" - log_message(message, "exception", e.filename, e.line_nr) - - -def log_message(message, level, filename, line_nr): - """log a message""" - - logger = logging.getLogger('fprettify-logger') - logger_d = {'ffilename': filename, 'fline': line_nr} - logger_to_use = getattr(logger, level) - logger_to_use(message, extra=logger_d) + sys.stdout.detach(), encoding="UTF-8", line_buffering=True +) def run(argv=sys.argv): # pragma: no cover """Command line interface""" - try: - import configargparse as argparse - except ImportError: - import argparse - - def str2bool(str): - """helper function to convert strings to bool""" - if str.lower() in ('yes', 'true', 't', 'y', '1'): - return True - elif str.lower() in ('no', 'false', 'f', 'n', '0'): - return False - else: - return None - - def get_config_file_list(filename): - """helper function to create list of config files found in parent directories""" - config_file_list = [] - dir = os.path.dirname(filename) - while True: - config_file = os.path.join(dir, '.fprettify.rc') - if os.path.isfile(config_file): - config_file_list.insert(0, config_file) - parent=os.path.dirname(dir) - if parent == dir: - break - dir = parent - return config_file_list - - arguments = {'prog': argv[0], - 'description': 'Auto-format modern Fortran source files.', - 'formatter_class': argparse.ArgumentDefaultsHelpFormatter} - - if argparse.__name__ == "configargparse": - arguments['args_for_setting_config_path'] = ['-c', '--config-file'] - arguments['description'] = arguments['description'] + " Config files ('.fprettify.rc') in the home (~) directory and any such files located in parent directories of the input file will be used. When the standard input is used, the search is started from the current directory." - - def get_arg_parser(args): - """helper function to create the parser object""" - parser = argparse.ArgumentParser(**args) - - parser.add_argument("-i", "--indent", type=int, default=3, - help="relative indentation width") - parser.add_argument("-l", "--line-length", type=int, default=132, - help="column after which a line should end, viz. -ffree-line-length-n for GCC") - parser.add_argument("-w", "--whitespace", type=int, - choices=range(0, 5), default=2, help="Presets for the amount of whitespace - " - " 0: minimal whitespace" - " | 1: operators (except arithmetic), print/read" - " | 2: operators, print/read, plus/minus" - " | 3: operators, print/read, plus/minus, muliply/divide" - " | 4: operators, print/read, plus/minus, muliply/divide, type component selector") - parser.add_argument("--whitespace-comma", type=str2bool, nargs="?", default="None", const=True, - help="boolean, en-/disable whitespace for comma/semicolons") - parser.add_argument("--whitespace-assignment", type=str2bool, nargs="?", default="None", const=True, - help="boolean, en-/disable whitespace for assignments") - parser.add_argument("--whitespace-decl", type=str2bool, nargs="?", default="None", const=True, - help="boolean, en-/disable whitespace for declarations (requires '--enable-decl')") - parser.add_argument("--whitespace-relational", type=str2bool, nargs="?", default="None", const=True, - help="boolean, en-/disable whitespace for relational operators") - parser.add_argument("--whitespace-logical", type=str2bool, nargs="?", default="None", const=True, - help="boolean, en-/disable whitespace for logical operators") - parser.add_argument("--whitespace-plusminus", type=str2bool, nargs="?", default="None", const=True, - help="boolean, en-/disable whitespace for plus/minus arithmetic") - parser.add_argument("--whitespace-multdiv", type=str2bool, nargs="?", default="None", const=True, - help="boolean, en-/disable whitespace for multiply/divide arithmetic") - parser.add_argument("--whitespace-print", type=str2bool, nargs="?", default="None", const=True, - help="boolean, en-/disable whitespace for print/read statements") - parser.add_argument("--whitespace-type", type=str2bool, nargs="?", default="None", const=True, - help="boolean, en-/disable whitespace for select type components") - parser.add_argument("--whitespace-intrinsics", type=str2bool, nargs="?", default="None", const=True, - help="boolean, en-/disable whitespace for intrinsics like if/write/close") - parser.add_argument("--strict-indent", action='store_true', default=False, help="strictly impose indentation even for nested loops") - parser.add_argument("--enable-decl", action="store_true", default=False, help="enable whitespace formatting of declarations ('::' operator).") - parser.add_argument("--disable-indent", action='store_true', default=False, help="don't impose indentation") - parser.add_argument("--disable-whitespace", action='store_true', default=False, help="don't impose whitespace formatting") - parser.add_argument("--enable-replacements", action='store_true', default=False, help="replace relational operators (e.g. '.lt.' <--> '<')") - parser.add_argument("--c-relations", action='store_true', default=False, help="C-style relational operators ('<', '<=', ...)") - parser.add_argument("--case", nargs=4, default=[0,0,0,0], type=int, help="Enable letter case formatting of intrinsics by specifying which of " - "keywords, procedures/modules, operators and constants (in this order) should be lowercased or uppercased - " - " 0: do nothing" - " | 1: lowercase" - " | 2: uppercase") - - parser.add_argument("--strip-comments", action='store_true', default=False, help="strip whitespaces before comments") - parser.add_argument('--disable-fypp', action='store_true', default=False, - help="Disables the indentation of fypp preprocessor blocks.") - parser.add_argument('--disable-indent-mod', action='store_true', default=False, - help="Disables the indentation after module / program.") - - parser.add_argument("-d","--diff", action='store_true', default=False, - help="Write file differences to stdout instead of formatting inplace") - parser.add_argument("-s", "--stdout", action='store_true', default=False, - help="Write to stdout instead of formatting inplace") - - group = parser.add_mutually_exclusive_group() - group.add_argument("-S", "--silent", "--no-report-errors", action='store_true', - default=False, help="Don't write any errors or warnings to stderr") - group.add_argument("-D", "--debug", action='store_true', - default=False, help=argparse.SUPPRESS) - parser.add_argument("path", type=str, nargs='*', - help="Paths to files to be formatted inplace. If no paths are given, stdin (-) is used by default. Path can be a directory if --recursive is used.", default=['-']) - parser.add_argument('-r', '--recursive', action='store_true', - default=False, help="Recursively auto-format all Fortran files in subdirectories of specified path; recognized filename extensions: {}". format(", ".join(FORTRAN_EXTENSIONS))) - parser.add_argument('-e', '--exclude', action='append', - default=[], type=str, - help="File or directory patterns to be excluded when searching for Fortran files to format") - parser.add_argument('-f', '--fortran', type=str, action='append', default=[], - help="Overrides default fortran extensions recognized by --recursive. Repeat this option to specify more than one extension.") - parser.add_argument('--version', action='version', - version='%(prog)s 0.3.7') - return parser - - parser = get_arg_parser(arguments) - + parser_args = get_parser_args() + parser = get_arg_parser(parser_args) args = parser.parse_args(argv[1:]) - def build_ws_dict(args): - """helper function to build whitespace dictionary""" - ws_dict = {} - ws_dict['comma'] = args.whitespace_comma - ws_dict['assignments'] = args.whitespace_assignment - ws_dict['decl'] = args.whitespace_decl - ws_dict['relational'] = args.whitespace_relational - ws_dict['logical'] = args.whitespace_logical - ws_dict['plusminus'] = args.whitespace_plusminus - ws_dict['multdiv'] = args.whitespace_multdiv - ws_dict['print'] = args.whitespace_print - ws_dict['type'] = args.whitespace_type - ws_dict['intrinsics'] = args.whitespace_intrinsics - return ws_dict - # support legacy input: - if 'stdin' in args.path and not os.path.isfile('stdin'): - args.path = ['-' if _ == 'stdin' else _ for _ in args.path] + if "stdin" in args.path and not os.path.isfile("stdin"): + args.path = ["-" if _ == "stdin" else _ for _ in args.path] for directory in args.path: - if directory == '-': + if directory == "-": if args.recursive: sys.stderr.write("--recursive requires a directory.\n") sys.exit(1) else: if not os.path.exists(directory): - sys.stderr.write("directory " + directory + - " does not exist!\n") + sys.stderr.write("directory " + directory + " does not exist!\n") sys.exit(1) - if not os.path.isfile(directory) and directory != '-' and not args.recursive: + if ( + not os.path.isfile(directory) + and directory != "-" + and not args.recursive + ): sys.stderr.write("file " + directory + " does not exist!\n") sys.exit(1) @@ -2075,40 +127,56 @@ def build_ws_dict(args): from fnmatch import fnmatch - for dirpath, dirnames, files in os.walk(directory,topdown=True): + for dirpath, dirnames, files in os.walk(directory, topdown=True): # Prune excluded patterns from list of child directories - dirnames[:] = [dirname for dirname in dirnames if not any( - [fnmatch(dirname,exclude_pattern) or fnmatch(os.path.join(dirpath,dirname),exclude_pattern) - for exclude_pattern in args.exclude] - )] - - for ffile in [os.path.join(dirpath, f) for f in files - if any(f.endswith(_) for _ in ext) - and not any([ - fnmatch(f,exclude_pattern) - for exclude_pattern in args.exclude])]: + dirnames[:] = [ + dirname + for dirname in dirnames + if not any( + [ + fnmatch(dirname, exclude_pattern) + or fnmatch(os.path.join(dirpath, dirname), exclude_pattern) + for exclude_pattern in args.exclude + ] + ) + ] + + for ffile in [ + os.path.join(dirpath, f) + for f in files + if any(f.endswith(_) for _ in ext) + and not any( + [ + fnmatch(f, exclude_pattern) + for exclude_pattern in args.exclude + ] + ) + ]: filenames.append(ffile) for filename in filenames: # reparse arguments using the file's list of config files - filearguments = arguments - if argparse.__name__ == "configargparse": - filearguments['default_config_files'] = ['~/.fprettify.rc'] + get_config_file_list(os.path.abspath(filename) if filename != '-' else os.getcwd()) + filearguments = parser_args + filearguments["default_config_files"] = ["~/.fprettify.rc"] + list( + get_config_files( + os.path.abspath(filename) if filename != "-" else os.getcwd() + ) + ) file_argparser = get_arg_parser(filearguments) file_args = file_argparser.parse_args(argv[1:]) ws_dict = build_ws_dict(file_args) case_dict = { - 'keywords' : file_args.case[0], - 'procedures' : file_args.case[1], - 'operators' : file_args.case[2], - 'constants' : file_args.case[3] - } + "keywords": file_args.case[0], + "procedures": file_args.case[1], + "operators": file_args.case[2], + "constants": file_args.case[3], + } - stdout = file_args.stdout or directory == '-' - diffonly=file_args.diff + stdout = file_args.stdout or directory == "-" + diffonly = file_args.diff if file_args.debug: level = logging.DEBUG @@ -2117,26 +185,30 @@ def build_ws_dict(args): else: level = logging.WARNING - set_fprettify_logger(level) + set_logger(level) try: - reformat_inplace(filename, - stdout=stdout, - diffonly=diffonly, - impose_indent=not file_args.disable_indent, - indent_size=file_args.indent, - strict_indent=file_args.strict_indent, - impose_whitespace=not file_args.disable_whitespace, - impose_replacements=file_args.enable_replacements, - cstyle=file_args.c_relations, - case_dict=case_dict, - whitespace=file_args.whitespace, - whitespace_dict=ws_dict, - llength=1024 if file_args.line_length == 0 else file_args.line_length, - strip_comments=file_args.strip_comments, - format_decl=file_args.enable_decl, - indent_fypp=not file_args.disable_fypp, - indent_mod=not file_args.disable_indent_mod) + reformat_inplace( + filename, + stdout=stdout, + diffonly=diffonly, + impose_indent=not file_args.disable_indent, + indent_size=file_args.indent, + strict_indent=file_args.strict_indent, + impose_whitespace=not file_args.disable_whitespace, + impose_replacements=file_args.enable_replacements, + cstyle=file_args.c_relations, + case_dict=case_dict, + whitespace=file_args.whitespace, + whitespace_dict=ws_dict, + llength=( + 1024 if file_args.line_length == 0 else file_args.line_length + ), + strip_comments=file_args.strip_comments, + format_decl=file_args.enable_decl, + indent_fypp=not file_args.disable_fypp, + indent_mod=not file_args.disable_indent_mod, + ) except FprettifyException as e: log_exception(e, "Fatal error occured") sys.exit(1) diff --git a/fprettify/aligner.py b/fprettify/aligner.py new file mode 100644 index 0000000..7e4b27a --- /dev/null +++ b/fprettify/aligner.py @@ -0,0 +1,180 @@ +from fprettify.exceptions import FprettifyInternalException +from fprettify.parser import * +from fprettify.utils import log_message + + +class F90Aligner: + """ + Alignment of continuations of a broken line, + based on the following heuristics: + + if line break in brackets + We are parsing the level of nesting + and align to most inner bracket delimiter. + + else if line is an assignment + alignment to '=' or '=>'. + note: assignment operator recognized as any '=' that is not + part of another operator and that is not enclosed in bracket + + else if line is a declaration + alignment to '::' + + else + default indent + """ + + def __init__(self, filename): + self._filename = filename + self.__init_line(0) + + def __init_line(self, line_nr): + """initialization before processing new line""" + self._line_nr = line_nr + self._line_indents = [0] + self._level = 0 + self._br_indent_list = [0] + + def process_lines_of_fline(self, f_line, lines, rel_ind, line_nr): + """ + process all lines that belong to a Fortran line `f_line`, + `rel_ind` is the relative indentation size. + """ + + self.__init_line(line_nr) + + is_decl = ( + VAR_DECL_RE.search(f_line) + or PUBLIC_RE.search(f_line) + or PRIVATE_RE.match(f_line) + ) + is_use = USE_RE.search(f_line) + for pos, line in enumerate(lines): + self.__align_line_continuations( + line, is_decl, is_use, rel_ind, self._line_nr + pos + ) + if pos + 1 < len(lines): + self._line_indents.append(self._br_indent_list[-1]) + + if len(self._br_indent_list) > 2 or self._level: + log_message( + "unpaired bracket delimiters", "info", self._filename, self._line_nr + ) + + def get_lines_indent(self): + """after processing, retrieve the indents of all line parts.""" + return self._line_indents + + def __align_line_continuations(self, line, is_decl, is_use, indent_size, line_nr): + """align continuation lines.""" + + indent_list = self._br_indent_list + level = self._level + filename = self._filename + + pos_eq = 0 + pos_ldelim = [] + pos_rdelim = [] + ldelim = [] + rdelim = [] + + # find delimiters that are not ended on this line. + # find proper alignment to most inner delimiter + # or alignment to assignment operator + rel_ind = indent_list[-1] # indentation of prev. line + + end_of_delim = -1 + + for pos, char in CharFilter(line): + + what_del_open = None + what_del_close = None + if pos > end_of_delim: + [what_del_open, what_del_close] = get_curr_delim(line, pos) + + if what_del_open: + what_del_open = what_del_open.group() + end_of_delim = pos + len(what_del_open) - 1 + level += 1 + indent_list.append(pos + len(what_del_open) + rel_ind) + pos_ldelim.append(pos) + ldelim.append(what_del_open) + if what_del_close: + what_del_close = what_del_close.group() + end_of_delim = pos + len(what_del_close) - 1 + if level > 0: + level += -1 + indent_list.pop() + else: + log_message( + "unpaired bracket delimiters", "info", filename, line_nr + ) + + if pos_ldelim: + pos_ldelim.pop() + what_del_open = ldelim.pop() + valid = False + if what_del_open == r"(": + valid = what_del_close == r")" + if what_del_open == r"(/": + valid = what_del_close == r"/)" + if what_del_open == r"[": + valid = what_del_close == r"]" + if not valid: + log_message( + "unpaired bracket delimiters", "info", filename, line_nr + ) + + else: + pos_rdelim.append(pos) + rdelim.append(what_del_close) + if char == "," and not level and pos_eq > 0: + # a top level comma removes previous alignment position. + # (see issue #11) + pos_eq = 0 + indent_list.pop() + if ( + not level + and not is_decl + and char == "=" + and not REL_OP_RE.search( + line[max(0, pos - 1) : min(pos + 2, len(line))] + ) + ): + # should only have one assignment per line! + if pos_eq > 0: + raise FprettifyInternalException( + "found more than one assignment in the same Fortran line", + filename, + line_nr, + ) + is_pointer = line[pos + 1] == ">" + pos_eq = pos + 1 + # don't align if assignment operator directly before + # line break + if not re.search(r"=>?\s*" + LINEBREAK_STR, line, RE_FLAGS): + indent_list.append(pos_eq + 1 + is_pointer + indent_list[-1]) + elif ( + is_decl + and line[pos : pos + 2] == "::" + and not re.search(r"::\s*" + LINEBREAK_STR, line, RE_FLAGS) + ): + indent_list.append(pos + 3 + indent_list[-1]) + elif ( + is_use + and line[pos] == ":" + and not re.search(r":\s*" + LINEBREAK_STR, line, RE_FLAGS) + ): + indent_list.append(pos + 2 + indent_list[-1]) + + # Don't align if delimiter opening directly before line break + if level and re.search(DEL_OPEN_STR + r"\s*" + LINEBREAK_STR, line, RE_FLAGS): + if len(indent_list) > 1: + indent_list[-1] = indent_list[-2] + else: + indent_list[-1] = 0 + + if not indent_list[-1]: + indent_list[-1] = indent_size + + self._level = level diff --git a/fprettify/constants.py b/fprettify/constants.py new file mode 100644 index 0000000..6152956 --- /dev/null +++ b/fprettify/constants.py @@ -0,0 +1,19 @@ +"""Miscellaneous constants""" + +FORTRAN_EXTENSIONS = [".f", ".for", ".ftn", ".f90", ".f95", ".f03", ".fpp"] +FORTRAN_EXTENSIONS += [_.upper() for _ in FORTRAN_EXTENSIONS] + +FORMATTER_ERROR_MESSAGE = ( + " Wrong usage of formatting-specific directives" " '&', '!&', '!&<' or '!&>'." +) +LINESPLIT_MESSAGE = ( + "auto indentation failed due to chars limit, " "line should be split" +) + +# intrinsic statements with parenthesis notation that are not functions +INTR_STMTS_PAR = ( + r"(ALLOCATE|DEALLOCATE|" + r"OPEN|CLOSE|READ|WRITE|" + r"FLUSH|ENDFILE|REWIND|BACKSPACE|INQUIRE|" + r"FORALL|WHERE|ASSOCIATE|NULLIFY)" +) diff --git a/fprettify/exceptions.py b/fprettify/exceptions.py new file mode 100644 index 0000000..0aafd05 --- /dev/null +++ b/fprettify/exceptions.py @@ -0,0 +1,22 @@ +"""Custom exception types""" + + +class FprettifyException(Exception): + """Base class for all custom exceptions""" + + def __init__(self, msg, filename, line_nr): + super(FprettifyException, self).__init__(msg) + self.filename = filename + self.line_nr = line_nr + + +class FprettifyParseException(FprettifyException): + """Exception for unparseable Fortran code (user's fault).""" + + pass + + +class FprettifyInternalException(FprettifyException): + """Exception for potential internal errors (fixme's).""" + + pass diff --git a/fprettify/formatter.py b/fprettify/formatter.py new file mode 100644 index 0000000..d790982 --- /dev/null +++ b/fprettify/formatter.py @@ -0,0 +1,1140 @@ +import io +import sys + +from fprettify.constants import * +from fprettify.exceptions import FprettifyInternalException +from fprettify.indenter import F90Indenter +from fprettify.parser import * +from fprettify.preprocessor import preprocess_labels, preprocess_line, preprocess_omp +from fprettify.utils import diff, log_message + + +def inspect_ffile_format( + infile, indent_size, strict_indent, indent_fypp=False, orig_filename=None +): + """ + Determine indentation by inspecting original Fortran file. + + This is mainly for finding aligned blocks of DO/IF statements. + Also check if it has f77 constructs. + :param infile: open file + :param indent_size: the default indent size + :orig_filename: filename used for messages + :returns: [ target indent sizes for each line, + indent of first line (offset) ] + """ + if not orig_filename: + orig_filename = infile.name + + indents = [] + stream = InputStream( + infile, filter_fypp=not indent_fypp, orig_filename=orig_filename + ) + prev_offset = 0 + first_indent = -1 + has_fypp = False + + while 1: + f_line, _, lines = stream.next_fortran_line() + if not lines: + break + + if FYPP_LINE_RE.search(f_line): + has_fypp = True + + f_line, lines, _ = preprocess_labels(f_line, lines) + + offset = len(lines[0]) - len(lines[0].lstrip(" ")) + if f_line.strip() and first_indent == -1: + first_indent = offset + indents.append(offset - prev_offset) + + # don't impose indentation for blocked do/if constructs: + if IF_RE.search(f_line) or DO_RE.search(f_line): + if prev_offset != offset or strict_indent: + indents[-1] = indent_size + else: + indents[-1] = indent_size + + prev_offset = offset + + return indents, first_indent, has_fypp + + +def replace_relational_single_fline(f_line, cstyle): + """ + format a single Fortran line - replaces scalar relational + operators in logical expressions to either Fortran or C-style. + .lt. <--> < + .le. <--> <= + .gt. <--> > + .ge. <--> >= + .eq. <--> == + .ne. <--> /= + """ + + new_line = f_line + + # only act on lines that do contain a relation + if REL_OP_RE.search(f_line): + # check that relation is not inside quotes, a string, or commented + # (think of underlining a heading with === or things like markup being printed which we do not replace) + pos_prev = -1 + pos = -1 + line_parts = [""] + for pos, char in CharFilter(f_line): + if pos > pos_prev + 1: # skipped string + line_parts.append(f_line[pos_prev + 1 : pos].strip()) # append string + line_parts.append("") + + line_parts[-1] += char + + pos_prev = pos + + if pos + 1 < len(f_line): + line_parts.append(f_line[pos + 1 :]) + + for pos, part in enumerate(line_parts): + # exclude comments, strings: + if not STR_OPEN_RE.match(part): + # also exclude / if we see a namelist and data statement + if cstyle: + part = re.sub(r"\.LT\.", "< ", part, flags=RE_FLAGS) + part = re.sub(r"\.LE\.", "<= ", part, flags=RE_FLAGS) + part = re.sub(r"\.GT\.", "> ", part, flags=RE_FLAGS) + part = re.sub(r"\.GE\.", ">= ", part, flags=RE_FLAGS) + part = re.sub(r"\.EQ\.", "== ", part, flags=RE_FLAGS) + part = re.sub(r"\.NE\.", "/= ", part, flags=RE_FLAGS) + else: + part = re.sub(r"<=", ".le.", part, flags=RE_FLAGS) + part = re.sub(r"<", ".lt.", part, flags=RE_FLAGS) + part = re.sub(r">=", ".ge.", part, flags=RE_FLAGS) + part = re.sub(r">", ".gt.", part, flags=RE_FLAGS) + part = re.sub(r"==", ".eq.", part, flags=RE_FLAGS) + part = re.sub(r"\/=", ".ne.", part, flags=RE_FLAGS) + + line_parts[pos] = part + + new_line = "".join(line_parts) + + return new_line + + +def replace_keywords_single_fline(f_line, case_dict): + """ + format a single Fortran line - change case of keywords + """ + + new_line = f_line + + # Collect words list + pos_prev = -1 + pos = -1 + line_parts = [""] + for pos, char in CharFilter(f_line): + if pos > pos_prev + 1: # skipped string + line_parts.append(f_line[pos_prev + 1 : pos].strip()) # append string + line_parts.append("") + + line_parts[-1] += char + + pos_prev = pos + + if pos + 1 < len(f_line): + line_parts.append(f_line[pos + 1 :]) + + line_parts = [ + [a] if STR_OPEN_RE.match(a) else re.split(F90_OPERATORS_RE, a) + for a in line_parts + ] # problem, split "." + line_parts = [b for a in line_parts for b in a] + + ## line_parts = [[a] if STR_OPEN_RE.match(a) else re.split('(\W)',a) + ## for a in line_parts] # problem, split "." + line_parts = [ + [a] if STR_OPEN_RE.match(a) else re.split("([^a-zA-Z0-9_.])", a) + for a in line_parts + ] + line_parts = [b for a in line_parts for b in a] + + swapcase = lambda s, a: s if a == 0 else (s.lower() if a == 1 else s.upper()) + + nbparts = len(line_parts) + for pos, part in enumerate(line_parts): + # exclude comments, strings: + if part.strip() and not STR_OPEN_RE.match(part): + if F90_KEYWORDS_RE.match(part): + part = swapcase(part, case_dict["keywords"]) + elif F90_MODULES_RE.match(part): + part = swapcase(part, case_dict["procedures"]) + elif F90_PROCEDURES_RE.match(part): + ok = False + for pos2 in range(pos + 1, nbparts): + part2 = line_parts[pos2] + if part2.strip() and not ( + part2 == "\n" or STR_OPEN_RE.match(part2) + ): + ok = part2 == "(" + break + if ok: + part = swapcase(part, case_dict["procedures"]) + elif F90_OPERATORS_RE.match(part): + part = swapcase(part, case_dict["operators"]) + elif F90_CONSTANTS_RE.match(part): + part = swapcase(part, case_dict["constants"]) + elif F90_CONSTANTS_TYPES_RE.match(part): + part = swapcase(part, case_dict["constants"]) + elif F90_NUMBER_ALL_REC.match(part): + part = swapcase(part, case_dict["constants"]) + + line_parts[pos] = part + + new_line = "".join(line_parts) + + return new_line + + +def format_single_fline( + f_line, + whitespace, + whitespace_dict, + linebreak_pos, + ampersand_sep, + scope_parser, + format_decl, + filename, + line_nr, + auto_format=True, +): + """ + format a single Fortran line - imposes white space formatting + and inserts linebreaks. + Takes a logical Fortran line `f_line` as input as well as the positions + of the linebreaks (`linebreak_pos`), and the number of + separating whitespace characters before ampersand (`ampersand_sep`). + `filename` and `line_nr` just for error messages. + The higher `whitespace`, the more white space characters inserted - + whitespace = 0, 1, 2, 3 are currently supported. + whitespace formatting can additionally controlled more fine-grained + via a dictionary of bools (whitespace_dict) + auto formatting can be turned off by setting `auto_format` to False. + """ + + # define whether to put whitespaces around operators: + mapping = { + "comma": 0, # 0: comma, semicolon + "assignments": 1, # 1: assignment operators + "relational": 2, # 2: relational operators + "logical": 3, # 3: logical operators + "plusminus": 4, # 4: arithm. operators plus and minus + "multdiv": 5, # 5: arithm. operators multiply and divide + "print": 6, # 6: print / read statements + "type": 7, # 7: select type components + "intrinsics": 8, # 8: intrinsics + "decl": 9, # 9: declarations + } + + if whitespace == 0: + spacey = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + elif whitespace == 1: + spacey = [1, 1, 1, 1, 0, 0, 1, 0, 1, 1] + elif whitespace == 2: + spacey = [1, 1, 1, 1, 1, 0, 1, 0, 1, 1] + elif whitespace == 3: + spacey = [1, 1, 1, 1, 1, 1, 1, 0, 1, 1] + elif whitespace == 4: + spacey = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1] + else: + raise NotImplementedError("unknown value for whitespace") + + if whitespace_dict: + # iterate over dictionary and override settings for 'spacey' + for key, value in mapping.items(): + if whitespace_dict[key] == True: + spacey[value] = 1 + elif whitespace_dict[key] == False: + spacey[value] = 0 + + line = f_line + line_orig = line + + if auto_format: + + line = rm_extra_whitespace(line, format_decl) + line = add_whitespace_charwise( + line, spacey, scope_parser, format_decl, filename, line_nr + ) + line = add_whitespace_context(line, spacey) + + lines_out = split_reformatted_line( + line_orig, linebreak_pos, ampersand_sep, line, filename, line_nr + ) + return lines_out + + +def rm_extra_whitespace(line, format_decl): + """rm all unneeded whitespace chars, except for declarations""" + line_ftd = "" + pos_prev = -1 + pos = -1 + for pos, char in CharFilter(line): + if format_decl: + is_decl = False + else: + is_decl = line[pos:].lstrip().startswith("::") or line[ + :pos + ].rstrip().endswith("::") + + if pos > pos_prev + 1: # skipped string + line_ftd = line_ftd + line[pos_prev + 1 : pos] + + if char == " ": + # remove double spaces: + if line_ftd and (re.search(r"[\w]", line_ftd[-1]) or is_decl): + line_ftd = line_ftd + char + else: + if ( + line_ftd + and line_ftd[-1] == " " + and (not re.search(r"[\w]", char) and not is_decl) + ): + line_ftd = line_ftd[:-1] # remove spaces except between words + line_ftd = line_ftd + char + pos_prev = pos + + line_ftd = line_ftd + line[pos + 1 :] + return line_ftd + + +def add_whitespace_charwise(line, spacey, scope_parser, format_decl, filename, line_nr): + """add whitespace character wise (no need for context aware parsing)""" + line_ftd = line + pos_eq = [] + end_of_delim = -1 + level = 0 + for pos, char in CharFilter(line): + # offset w.r.t. unformatted line + offset = len(line_ftd) - len(line) + + # format delimiters + what_del_open = None + what_del_close = None + if pos > end_of_delim: + [what_del_open, what_del_close] = get_curr_delim(line, pos) + + if what_del_open or what_del_close: + sep1 = 0 + sep2 = 0 + + if what_del_open: + delim = what_del_open.group() + else: + delim = what_del_close.group() + + lhs = line_ftd[: pos + offset] + rhs = line_ftd[pos + len(delim) + offset :] + + # format opening delimiters + if what_del_open: + level += 1 # new scope + # add separating whitespace before opening delimiter + # with some exceptions: + # FIXME: duplication of regex, better to include them into + # INTR_STMTS_PAR + if ( + ( + not re.search( + (r"(" + DEL_OPEN_STR + r"|[\w\*/=\+\-:])\s*$"), + line[:pos], + RE_FLAGS, + ) + and not EMPTY_RE.search(line[:pos]) + ) + or re.search( + SOL_STR + r"(\w+\s*:)?(ELSE)?\s*IF\s*$", line[:pos], RE_FLAGS + ) + or re.search( + SOL_STR + r"(\w+\s*:)?\s*DO\s+WHILE\s*$", line[:pos], RE_FLAGS + ) + or re.search( + SOL_STR + r"(SELECT)?\s*CASE\s*$", line[:pos], RE_FLAGS + ) + or re.search( + SOL_STR + r"(SELECT)?\s*RANK\s*$", line[:pos], RE_FLAGS + ) + or re.search(SOL_STR + r"SELECT\s*TYPE\s*$", line[:pos], RE_FLAGS) + or re.search(SOL_STR + r"CLASS\s*DEFAULT\s*$", line[:pos], RE_FLAGS) + or re.search( + SOL_STR + r"(TYPE|CLASS)\s+IS\s*$", line[:pos], RE_FLAGS + ) + or re.search( + r"(? 0: + level += -1 # close scope + else: + log_message( + "unpaired bracket delimiters", "info", filename, line_nr + ) + + # add separating whitespace after closing delimiter + # with some exceptions: + if not re.search( + r"^\s*(" + DEL_CLOSE_STR + r"|[,%:/\*])", line[pos + 1 :], RE_FLAGS + ): + sep2 = 1 + elif re.search(r"^\s*::", line[pos + 1 :], RE_FLAGS): + sep2 = len(rhs) - len(rhs.lstrip(" ")) if not format_decl else 1 + + # where delimiter token ends + end_of_delim = pos + len(delim) - 1 + + line_ftd = ( + lhs.rstrip(" ") + " " * sep1 + delim + " " * sep2 + rhs.lstrip(" ") + ) + + # format commas and semicolons + if char in [",", ";"]: + lhs = line_ftd[: pos + offset] + rhs = line_ftd[pos + 1 + offset :] + line_ftd = lhs.rstrip(" ") + char + " " * spacey[0] + rhs.lstrip(" ") + line_ftd = line_ftd.rstrip(" ") + + # format type selector % + if char == "%": + lhs = line_ftd[: pos + offset] + rhs = line_ftd[pos + 1 + offset :] + line_ftd = ( + lhs.rstrip(" ") + + " " * spacey[7] + + char + + " " * spacey[7] + + rhs.lstrip(" ") + ) + line_ftd = line_ftd.rstrip(" ") + + # format '::' + if format_decl and line[pos : pos + 2] == "::": + lhs = line_ftd[: pos + offset] + rhs = line_ftd[pos + 2 + offset :] + line_ftd = ( + lhs.rstrip(" ") + + " " * spacey[9] + + "::" + + " " * spacey[9] + + rhs.lstrip(" ") + ) + line_ftd = line_ftd.rstrip(" ") + + # format .NOT. + if re.search(r"^\.NOT\.", line[pos : pos + 5], RE_FLAGS): + lhs = line_ftd[: pos + offset] + rhs = line_ftd[pos + 5 + offset :] + line_ftd = ( + lhs.rstrip(" ") + + line[pos : pos + 5] + + " " * spacey[3] + + rhs.lstrip(" ") + ) + + # strip whitespaces from '=' and prepare assignment operator + # formatting: + if char == "=" and not REL_OP_RE.search(line[pos - 1 : pos + 2]): + lhs = line_ftd[: pos + offset] + rhs = line_ftd[pos + 1 + offset :] + line_ftd = lhs.rstrip(" ") + "=" + rhs.lstrip(" ") + is_pointer = line[pos + 1] == ">" + if (not level) or is_pointer: # remember position of assignment operator + pos_eq.append(len(lhs.rstrip(" "))) + + line = line_ftd + + for pos in pos_eq: + offset = len(line_ftd) - len(line) + is_pointer = line[pos + 1] == ">" + lhs = line_ftd[: pos + offset] + rhs = line_ftd[pos + 1 + is_pointer + offset :] + if is_pointer: + assign_op = "=>" # pointer assignment + else: + assign_op = "=" # assignment + line_ftd = ( + lhs.rstrip(" ") + + " " * spacey[1] + + assign_op + + " " * spacey[1] + + rhs.lstrip(" ") + ) + # offset w.r.t. unformatted line + + is_end = False + if END_RE.search(line_ftd): + for endre in scope_parser["end"]: + if endre and endre.search(line_ftd): + is_end = True + if is_end: + line_ftd = END_RE.sub(r"\1" + " " * spacey[8] + r"\2", line_ftd) + + if level != 0: + log_message("unpaired bracket delimiters", "info", filename, line_nr) + + return line_ftd + + +def add_whitespace_context(line, spacey): + """ + for context aware whitespace formatting we extract line parts that are + not comments or strings in order to be able to apply a context aware regex. + """ + + pos_prev = -1 + pos = -1 + line_parts = [""] + for pos, char in CharFilter(line): + if pos > pos_prev + 1: # skipped string + line_parts.append(line[pos_prev + 1 : pos].strip()) # append string + line_parts.append("") + + line_parts[-1] += char + + pos_prev = pos + + if pos + 1 < len(line): + line_parts.append(line[pos + 1 :]) + + # format namelists with spaces around / + if NML_STMT_RE.match(line): + for pos, part in enumerate(line_parts): + # exclude comments, strings: + if not STR_OPEN_RE.match(part): + partsplit = NML_RE.split(part) + line_parts[pos] = " ".join(partsplit) + + # Two-sided operators + for n_op, lr_re in enumerate(LR_OPS_RE): + for pos, part in enumerate(line_parts): + # exclude comments, strings: + if not STR_OPEN_RE.match(part): + # also exclude / if we see a namelist and data statement + if not (NML_STMT_RE.match(line) or DATA_STMT_RE.match(line)): + partsplit = lr_re.split(part) + line_parts[pos] = (" " * spacey[n_op + 2]).join(partsplit) + + line = "".join(line_parts) + + for newre in [IF_RE, DO_RE, BLK_RE]: + if newre.search(line) and re.search(SOL_STR + r"\w+\s*:", line): + line = ": ".join(_.strip() for _ in line.split(":", 1)) + + # format ':' for labels and use only statements + if USE_RE.search(line): + line = re.sub( + r"(only)\s*:\s*", r"\g<1>:" + " " * spacey[0], line, flags=RE_FLAGS + ) + + return line + + +def split_reformatted_line( + line_orig, linebreak_pos_orig, ampersand_sep, line, filename, line_nr +): + """ + Infer linebreak positions of formatted line from linebreak positions in + original line and split line. + """ + # shift line break positions from original to reformatted line + pos_new = 0 + pos_old = 0 + linebreak_pos_orig.sort(reverse=True) + linebreak_pos_ftd = [] + while 1: + + if pos_new == len(line) or pos_old == len(line_orig): + break + + if line[pos_new] != line_orig[pos_old]: + raise FprettifyInternalException( + "failed at finding line break position", filename, line_nr + ) + + if linebreak_pos_orig and pos_old > linebreak_pos_orig[-1]: + linebreak_pos_orig.pop() + linebreak_pos_ftd.append(pos_new) + continue + + pos_new += 1 + while pos_new < len(line) and line[pos_new] == " ": + pos_new += 1 + + pos_old += 1 + while pos_old < len(line_orig) and line_orig[pos_old] == " ": + pos_old += 1 + + linebreak_pos_ftd.insert(0, 0) + + # We split line into parts and we insert ampersands at line end, but not + # for empty lines and comment lines + lines_split = [ + (line[l:r].rstrip(" ") + " " * ampersand_sep[pos] + "&" * min(1, r - l)) + for pos, (l, r) in enumerate( + zip(linebreak_pos_ftd[0:-1], linebreak_pos_ftd[1:]) + ) + ] + + lines_split.append(line[linebreak_pos_ftd[-1] :]) + + return lines_split + + +def reformat_inplace( + filename, stdout=False, diffonly=False, **kwargs +): # pragma: no cover + """reformat a file in place.""" + if filename == "-": + infile = io.StringIO() + infile.write(sys.stdin.read()) + else: + infile = io.open(filename, "r", encoding="utf-8") + + newfile = io.StringIO() + reformat_ffile(infile, newfile, orig_filename=filename, **kwargs) + + if diffonly: + infile.seek(0) + newfile.seek(0) + diff_contents = diff(infile.read(), newfile.read(), filename, filename) + sys.stdout.write(diff_contents) + else: + + if stdout: + sys.stdout.write(newfile.getvalue()) + else: + outfile = io.open(filename, "r", encoding="utf-8") + + # write to outfile only if content has changed + + import hashlib + + hash_new = hashlib.md5() + hash_new.update(newfile.getvalue().encode("utf-8")) + hash_old = hashlib.md5() + hash_old.update(outfile.read().encode("utf-8")) + + outfile.close() + + if hash_new.digest() != hash_old.digest(): + outfile = io.open(filename, "w", encoding="utf-8") + outfile.write(newfile.getvalue()) + + +def reformat_ffile( + infile, + outfile, + impose_indent=True, + indent_size=3, + strict_indent=False, + impose_whitespace=True, + case_dict={}, + impose_replacements=False, + cstyle=False, + whitespace=2, + whitespace_dict={}, + llength=132, + strip_comments=False, + format_decl=False, + orig_filename=None, + indent_fypp=True, + indent_mod=True, +): + """main method to be invoked for formatting a Fortran file.""" + + # note: whitespace formatting and indentation may require different parsing rules + # (e.g. preprocessor statements may be indented but not whitespace formatted) + # therefore we invoke reformat_ffile independently for: + # 1) whitespace formatting + # 2) indentation + + if not orig_filename: + orig_filename = infile.name + + # 1) whitespace formatting + oldfile = infile + newfile = infile + + if impose_whitespace: + _impose_indent = False + + newfile = io.StringIO() + reformat_ffile_combined( + oldfile, + newfile, + _impose_indent, + indent_size, + strict_indent, + impose_whitespace, + case_dict, + impose_replacements, + cstyle, + whitespace, + whitespace_dict, + llength, + strip_comments, + format_decl, + orig_filename, + indent_fypp, + indent_mod, + ) + oldfile = newfile + + # 2) indentation + if impose_indent: + + _impose_whitespace = False + _impose_replacements = False + + newfile = io.StringIO() + reformat_ffile_combined( + oldfile, + newfile, + impose_indent, + indent_size, + strict_indent, + _impose_whitespace, + case_dict, + _impose_replacements, + cstyle, + whitespace, + whitespace_dict, + llength, + strip_comments, + format_decl, + orig_filename, + indent_fypp, + indent_mod, + ) + + outfile.write(newfile.getvalue()) + + +def reformat_ffile_combined( + infile, + outfile, + impose_indent=True, + indent_size=3, + strict_indent=False, + impose_whitespace=True, + case_dict={}, + impose_replacements=False, + cstyle=False, + whitespace=2, + whitespace_dict={}, + llength=132, + strip_comments=False, + format_decl=False, + orig_filename=None, + indent_fypp=True, + indent_mod=True, +): + + if not orig_filename: + orig_filename = infile.name + + if not impose_indent: + indent_fypp = False + + infile.seek(0) + req_indents, first_indent, has_fypp = inspect_ffile_format( + infile, indent_size, strict_indent, indent_fypp, orig_filename + ) + infile.seek(0) + + if not has_fypp: + indent_fypp = False + + scope_parser = build_scope_parser(fypp=indent_fypp, mod=indent_mod) + + # initialization + + # special cases for indentation: + # indent_special = 0: parse syntax and impose indent + # indent_special = 1: no indentation + # indent_special = 2: use indent from previous line + # indent_special = 3: take indent from input file (leave as is) + indent_special = 0 + + if impose_indent: + indenter = F90Indenter(scope_parser, first_indent, indent_size, orig_filename) + else: + indent_special = 3 + + impose_case = not all(v == 0 for v in case_dict.values()) + + nfl = 0 # fortran line counter + use_same_line = False + stream = InputStream(infile, not indent_fypp, orig_filename=orig_filename) + skip_blank = False + in_format_off_block = False + + while 1: + f_line, comments, lines = stream.next_fortran_line() + + if not lines: + break + + nfl += 1 + orig_lines = lines + + f_line, lines, is_omp_conditional = preprocess_omp(f_line, lines) + f_line, lines, label = preprocess_labels(f_line, lines) + + if indent_special != 3: + indent = [0] * len(lines) + else: + indent = [len(l) - len((l.lstrip(" ")).lstrip("&")) for l in lines] + + comment_lines = format_comments(lines, comments, strip_comments) + + auto_align, auto_format, in_format_off_block = parse_fprettify_directives( + lines, comment_lines, in_format_off_block, orig_filename, stream.line_nr + ) + + lines, do_format, prev_indent, is_blank, is_special = preprocess_line( + f_line, lines, comments, orig_filename, stream.line_nr, indent_fypp + ) + + if is_special[0]: + indent_special = 3 + + if prev_indent and indent_special == 0: + indent_special = 2 + + if is_blank and skip_blank: + continue + if not do_format: + if indent_special == 2: + # inherit indent from previous line + indent[:] = [indenter.get_fline_indent()] * len(indent) + elif indent_special == 0: + indent_special = 1 + else: + + if not auto_align: + manual_lines_indent = get_manual_alignment(lines) + else: + manual_lines_indent = [] + + lines, pre_ampersand, ampersand_sep = remove_pre_ampersands( + lines, is_special, orig_filename, stream.line_nr + ) + + linebreak_pos = get_linebreak_pos(lines, filter_fypp=not indent_fypp) + + f_line = f_line.strip(" ") + + if impose_replacements: + f_line = replace_relational_single_fline(f_line, cstyle) + + if impose_case: + f_line = replace_keywords_single_fline(f_line, case_dict) + + if impose_whitespace: + lines = format_single_fline( + f_line, + whitespace, + whitespace_dict, + linebreak_pos, + ampersand_sep, + scope_parser, + format_decl, + orig_filename, + stream.line_nr, + auto_format, + ) + + lines = append_comments(lines, comment_lines, is_special) + + # target indent for next line + rel_indent = req_indents[nfl] if nfl < len(req_indents) else 0 + + if indent_special != 3: + indenter.process_lines_of_fline( + f_line, + lines, + rel_indent, + indent_size, + stream.line_nr, + indent_fypp, + manual_lines_indent, + ) + indent = indenter.get_lines_indent() + + lines, indent = prepend_ampersands(lines, indent, pre_ampersand) + + if any(is_special): + for pos, line in enumerate(lines): + if is_special[pos]: + indent[pos] = len(line) - len(line.lstrip(" ")) + lines[pos] = line.lstrip(" ") + + lines = remove_trailing_whitespace(lines) + + # need to shift indents if label wider than first indent + if label and impose_indent: + if indent[0] < len(label): + indent = [ind + len(label) - indent[0] for ind in indent] + + write_formatted_line( + outfile, + indent, + lines, + orig_lines, + indent_special, + llength, + use_same_line, + is_omp_conditional, + label, + orig_filename, + stream.line_nr, + ) + + do_indent, use_same_line = pass_defaults_to_next_line(f_line) + + if impose_indent: + if do_indent: + indent_special = 0 + else: + indent_special = 1 + + # rm subsequent blank lines + skip_blank = ( + EMPTY_RE.search(f_line) + and not any(comments) + and not is_omp_conditional + and not label + ) + + +def format_comments(lines, comments, strip_comments): + comments_ftd = [] + for line, comment in zip(lines, comments): + has_comment = bool(comment.strip()) + if has_comment: + if strip_comments: + sep = not comment.strip() == line.strip() + else: + line_minus_comment = line.replace(comment, "") + sep = len(line_minus_comment.rstrip("\n")) - len( + line_minus_comment.rstrip() + ) + else: + sep = 0 + + if line.strip(): # empty lines between linebreaks are ignored + comments_ftd.append(" " * sep + comment.strip()) + return comments_ftd + + +def pass_defaults_to_next_line(f_line): + """defaults to be transferred from f_line to next line""" + if re.search(r";\s*$", f_line, RE_FLAGS): + # if line ended with semicolon, don't indent next line + do_indent = False + use_same_line = True + else: + do_indent = True + use_same_line = False + + return [do_indent, use_same_line] + + +def remove_trailing_whitespace(lines): + """remove trailing whitespaces from lines""" + lines = [re.sub(r"\s+$", "\n", l, RE_FLAGS) for l in lines] + return lines + + +def prepend_ampersands(lines, indent, pre_ampersand): + """prepend ampersands and correct indent""" + for pos, line in enumerate(lines): + amp_insert = pre_ampersand[pos] + if amp_insert: + indent[pos] += -1 + lines[pos] = amp_insert + line.lstrip() + + return [lines, indent] + + +def append_comments(lines, comment_lines, is_special): + """append comments to lines""" + for pos, (line, comment) in enumerate(zip(lines, comment_lines)): + if pos < len(lines) - 1: + has_nl = True # has next line + if not line.strip() and not is_special[pos]: + comment = comment.lstrip() + else: + has_nl = not re.search(EOL_SC, line) + lines[pos] = lines[pos].rstrip(" ") + comment + "\n" * has_nl + + return lines + + +def get_linebreak_pos(lines, filter_fypp=True): + """extract linebreak positions in Fortran line from lines""" + linebreak_pos = [] + if filter_fypp: + notfortran_re = NOTFORTRAN_LINE_RE + else: + notfortran_re = NOTFORTRAN_FYPP_LINE_RE + + for line in lines: + found = None + for char_pos, _ in CharFilter(line, filter_strings=False): + if re.match(LINEBREAK_STR, line[char_pos:], RE_FLAGS): + found = char_pos + if found: + linebreak_pos.append(found) + elif notfortran_re.search(line.lstrip(" ")): + linebreak_pos.append(0) + + linebreak_pos = [ + sum(linebreak_pos[0 : _ + 1]) - 1 for _ in range(0, len(linebreak_pos)) + ] + + return linebreak_pos + + +def remove_pre_ampersands(lines, is_special, filename, line_nr): + """ + remove and return preceding ampersands ('pre_ampersand'). Also return + number of whitespace characters before ampersand of previous line + ('ampersand_sep'). + + Note: Don't do any whitespace formatting on ampersands if next line starts + with an ampersand but remember the original number of spaces + (ampersand_sep). This "special rule" is necessary since ampersands starting + a line can be used to break literals, so changing the number of whitespaces + before the ampersand ending the previous line may lead to invalid syntax or + may change the number of whitespace characters in a string. + """ + pre_ampersand = [] + ampersand_sep = [] + + for pos, line in enumerate(lines): + match = re.search(SOL_STR + r"(&\s*)", line) + if match: + pre_ampersand.append(match.group(1)) + # amount of whitespace before ampersand of previous line: + m = re.search(r"(\s*)&[\s]*(?:!.*)?$", lines[pos - 1]) + if not m: + raise FprettifyParseException( + "Bad continuation line format", filename, line_nr + ) + sep = len(m.group(1)) + + ampersand_sep.append(sep) + else: + pre_ampersand.append("") + if pos > 0: + # use default 1 whitespace character before ampersand + ampersand_sep.append(1) + + lines = [l.strip(" ").strip("&") if not s else l for l, s in zip(lines, is_special)] + return [lines, pre_ampersand, ampersand_sep] + + +def get_manual_alignment(lines): + """extract manual indents for line continuations from line""" + manual_lines_indent = [len(l) - len(l.lstrip(" ").lstrip("&")) for l in lines] + manual_lines_indent = [ind - manual_lines_indent[0] for ind in manual_lines_indent] + return manual_lines_indent + + +def write_formatted_line( + outfile, + indent, + lines, + orig_lines, + indent_special, + llength, + use_same_line, + is_omp_conditional, + label, + filename, + line_nr, +): + """Write reformatted line to file""" + + for ind, line, orig_line in zip(indent, lines, orig_lines): + + # get actual line length excluding comment: + line_length = 0 + for line_length, _ in CharFilter(line): + pass + line_length += 1 + + if indent_special != 1: + ind_use = ind + else: + if use_same_line: + ind_use = 1 + else: + ind_use = 0 + + if CPP_RE.search(line.lstrip()): + ind_use = 0 + + if label: + label_use = label + label = "" # no label for continuation lines + else: + label_use = "" + + if ind_use + line_length <= ( + llength + 1 + ): # llength (default 132) plus 1 newline char + outfile.write( + "!$ " * is_omp_conditional + + label_use + + " " + * ( + ind_use + - 3 * is_omp_conditional + - len(label_use) + + len(line) + - len(line.lstrip(" ")) + ) + + line.lstrip(" ") + ) + elif line_length <= (llength + 1): + outfile.write( + "!$ " * is_omp_conditional + + label_use + + " " + * ( + (llength + 1) + - 3 * is_omp_conditional + - len(label_use) + - len(line.lstrip(" ")) + ) + + line.lstrip(" ") + ) + + log_message( + LINESPLIT_MESSAGE + " (limit: " + str(llength) + ")", + "warning", + filename, + line_nr, + ) + else: + outfile.write(orig_line) + log_message( + LINESPLIT_MESSAGE + " (limit: " + str(llength) + ")", + "warning", + filename, + line_nr, + ) diff --git a/fprettify/fparse_utils.py b/fprettify/fparse_utils.py deleted file mode 100644 index 68c3f9e..0000000 --- a/fprettify/fparse_utils.py +++ /dev/null @@ -1,305 +0,0 @@ -# -*- coding: utf-8 -*- -############################################################################### -# This file is part of fprettify. -# Copyright (C) 2016-2019 Patrick Seewald, CP2K developers group -# -# fprettify is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# fprettify is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with fprettify. If not, see . -############################################################################### - -"""This is a collection of Fortran parsing utilities.""" - -import re -from collections import deque - -RE_FLAGS = re.IGNORECASE | re.UNICODE - -# FIXME bad ass regex! -VAR_DECL_RE = re.compile( - r"^ *(?Pinteger(?: *\* *[0-9]+)?|logical|character(?: *\* *[0-9]+)?|real(?: *\* *[0-9]+)?|complex(?: *\* *[0-9]+)?|type) *(?P\((?:[^()]+|\((?:[^()]+|\([^()]*\))*\))*\))? *(?P(?: *, *[a-zA-Z_0-9]+(?: *\((?:[^()]+|\((?:[^()]+|\([^()]*\))*\))*\))?)+)? *(?P::)?(?P[^\n]+)\n?", RE_FLAGS) - -OMP_COND_RE = re.compile(r"^\s*(!\$ )", RE_FLAGS) -OMP_DIR_RE = re.compile(r"^\s*(!\$OMP)", RE_FLAGS) - -# supported preprocessors -FYPP_LINE_STR = r"^(#!|#:|\$:|@:)" -FYPP_WITHOUT_PREPRO_STR = r"^(#!|\$:|@:)" -CPP_STR = r"^#[^!:{}]" -COMMENT_LINE_STR = r"^!" -FYPP_OPEN_STR = r"(#{|\${|@{)" -FYPP_CLOSE_STR = r"(}#|}\$|}@)" -NOTFORTRAN_LINE_RE = re.compile(r"("+FYPP_LINE_STR+r"|"+CPP_STR+r"|"+COMMENT_LINE_STR+r")", RE_FLAGS) -NOTFORTRAN_FYPP_LINE_RE = re.compile(r"("+CPP_STR+r"|"+COMMENT_LINE_STR+r")", RE_FLAGS) -FYPP_LINE_RE = re.compile(FYPP_LINE_STR, RE_FLAGS) -FYPP_WITHOUT_PREPRO_RE = re.compile(FYPP_WITHOUT_PREPRO_STR, RE_FLAGS) -FYPP_OPEN_RE = re.compile(FYPP_OPEN_STR, RE_FLAGS) -FYPP_CLOSE_RE = re.compile(FYPP_CLOSE_STR, RE_FLAGS) - -STR_OPEN_RE = re.compile(r"("+FYPP_OPEN_STR+r"|"+r"'|\"|!)", RE_FLAGS) -CPP_RE = re.compile(CPP_STR, RE_FLAGS) - -class fline_parser(object): - def __init__(self): - pass - def search(self, line): - pass - -class parser_re(fline_parser): - def __init__(self, regex, spec=True): - self._re = regex - self.spec = spec - - def search(self, line): - return self._re.search(line) - - def split(self, line): - return self._re.split(line) - -class FprettifyException(Exception): - """Base class for all custom exceptions""" - - def __init__(self, msg, filename, line_nr): - super(FprettifyException, self).__init__(msg) - self.filename = filename - self.line_nr = line_nr - - -class FprettifyParseException(FprettifyException): - """Exception for unparseable Fortran code (user's fault).""" - - pass - - -class FprettifyInternalException(FprettifyException): - """Exception for potential internal errors (fixme's).""" - - pass - - -class CharFilter(object): - """ - An iterator to wrap the iterator returned by `enumerate(string)` - and ignore comments and characters inside strings - """ - - def __init__(self, string, filter_comments=True, filter_strings=True, - filter_fypp=True): - self._content = string - self._it = enumerate(self._content) - self._instring = '' - self._infypp = False - self._incomment = '' - self._instring = '' - self._filter_comments = filter_comments - self._filter_strings = filter_strings - if filter_fypp: - self._notfortran_re = NOTFORTRAN_LINE_RE - else: - self._notfortran_re = NOTFORTRAN_FYPP_LINE_RE - - - def update(self, string, filter_comments=True, filter_strings=True, - filter_fypp=True): - self._content = string - self._it = enumerate(self._content) - self._filter_comments = filter_comments - self._filter_strings = filter_strings - if filter_fypp: - self._notfortran_re = NOTFORTRAN_LINE_RE - else: - self._notfortran_re = NOTFORTRAN_FYPP_LINE_RE - - def __iter__(self): - return self - - def __next__(self): - - pos, char = next(self._it) - - char2 = self._content[pos:pos+2] - - if not self._instring: - if not self._incomment: - if FYPP_OPEN_RE.search(char2): - self._instring = char2 - self._infypp = True - elif (self._notfortran_re.search(char2)): - self._incomment = char - elif char in ['"', "'"]: - self._instring = char - else: - if self._infypp: - if FYPP_CLOSE_RE.search(char2): - self._instring = '' - self._infypp = False - if self._filter_strings: - self.__next__() - return self.__next__() - - elif char in ['"', "'"]: - if self._instring == char: - self._instring = '' - if self._filter_strings: - return self.__next__() - - if self._filter_comments: - if self._incomment: - raise StopIteration - - if self._filter_strings: - if self._instring: - return self.__next__() - - return (pos, char) - - def filter_all(self): - filtered_str = '' - for pos, char in self: - filtered_str += char - return filtered_str - - def instring(self): - return self._instring - -class InputStream(object): - """Class to read logical Fortran lines from a Fortran file.""" - - def __init__(self, infile, filter_fypp=True, orig_filename=None): - if not orig_filename: - orig_filename = infile.name - self.line_buffer = deque([]) - self.infile = infile - self.line_nr = 0 - self.filename = orig_filename - self.endpos = deque([]) - self.what_omp = deque([]) - if filter_fypp: - self.notfortran_re = NOTFORTRAN_LINE_RE - else: - self.notfortran_re = NOTFORTRAN_FYPP_LINE_RE - - def next_fortran_line(self): - """Reads a group of connected lines (connected with &, separated by newline or semicolon) - returns a touple with the joined line, and a list with the original lines. - Doesn't support multiline character constants! - """ - joined_line = "" - comments = [] - lines = [] - continuation = 0 - fypp_cont = 0 - instring = '' - - string_iter = CharFilter('') - fypp_cont = 0 - while 1: - if not self.line_buffer: - line = self.infile.readline().replace("\t", 8 * " ") - self.line_nr += 1 - # convert OMP-conditional fortran statements into normal fortran statements - # but remember to convert them back - - what_omp = OMP_COND_RE.search(line) - - if what_omp: - what_omp = what_omp.group(1) - else: - what_omp = '' - - if what_omp: - line = line.replace(what_omp, '', 1) - line_start = 0 - - pos = -1 - - # multiline string: prepend line continuation with '&' - if string_iter.instring() and not line.lstrip().startswith('&'): - line = '&' + line - - # update instead of CharFilter(line) to account for multiline strings - string_iter.update(line) - for pos, char in string_iter: - if char == ';' or pos + 1 == len(line): - self.endpos.append(pos - line_start) - self.line_buffer.append(line[line_start:pos + 1]) - self.what_omp.append(what_omp) - what_omp = '' - line_start = pos + 1 - - if pos + 1 < len(line): - if fypp_cont: - self.endpos.append(-1) - self.line_buffer.append(line) - self.what_omp.append(what_omp) - else: - for pos_add, char in CharFilter(line[pos+1:], filter_comments=False): - char2 = line[pos+1+pos_add:pos+3+pos_add] - if self.notfortran_re.search(char2): - self.endpos.append(pos + pos_add - line_start) - self.line_buffer.append(line[line_start:]) - self.what_omp.append(what_omp) - break - - if not self.line_buffer: - self.endpos.append(len(line)) - self.line_buffer.append(line) - self.what_omp.append('') - - - line = self.line_buffer.popleft() - endpos = self.endpos.popleft() - what_omp = self.what_omp.popleft() - - if not line: - break - - lines.append(what_omp + line) - - line_core = line[:endpos + 1] - - if self.notfortran_re.search(line[endpos+1:endpos+3]) or fypp_cont: - line_comments = line[endpos + 1:] - else: - line_comments = '' - - if line_core: - newline = (line_core[-1] == '\n') - else: - newline = False - - line_core = line_core.strip() - - if line_core and not NOTFORTRAN_LINE_RE.search(line_core): - continuation = 0 - if line_core.endswith('&'): - continuation = 1 - - if line_comments: - if (FYPP_LINE_RE.search(line[endpos+1:endpos+3]) or fypp_cont) and line_comments.strip()[-1] == '&': - fypp_cont = 1 - else: - fypp_cont = 0 - - line_core = line_core.strip('&') - - comments.append(line_comments.rstrip('\n')) - if joined_line.strip(): - joined_line = joined_line.rstrip( - '\n') + line_core + '\n' * newline - else: - joined_line = what_omp + line_core + '\n' * newline - - if not (continuation or fypp_cont): - break - - return (joined_line, comments, lines) diff --git a/fprettify/indenter.py b/fprettify/indenter.py new file mode 100644 index 0000000..bddbb02 --- /dev/null +++ b/fprettify/indenter.py @@ -0,0 +1,217 @@ +from fprettify.aligner import F90Aligner +from fprettify.parser import * +from fprettify.utils import log_message + + +class F90Indenter: + """ + Parses encapsulation of subunits / scopes line by line + and updates the indentation. + """ + + def __init__(self, scope_parser, first_indent, rel_indent, filename): + # scopes / subunits: + self._scope_storage = [] + # indents for all fortran lines: + self._indent_storage = [] + # indents of actual lines of current fortran line + self._line_indents = [] + + self._parser = scope_parser + + self._filename = filename + self._aligner = F90Aligner(filename) + + # no lines have been processed yet: + self._initial = True + + # implicit scopes: we define implicit scopes, as many as match + # first_indent and rel_indent. This allows for, e.g., a properly + # indented "END FUNCTION" without matching "FUNCTION" statement: + if rel_indent > 0: + for n_impl in range( + first_indent % rel_indent, first_indent + 1, rel_indent + ): + self._indent_storage += [n_impl] + + if not self._indent_storage: + self._indent_storage = [0] + + def process_lines_of_fline( + self, + f_line, + lines, + rel_ind, + rel_ind_con, + line_nr, + indent_fypp=True, + manual_lines_indent=None, + ): + """ + Process all lines that belong to a Fortran line `f_line`. + + Impose a relative indent of `rel_ind` for current Fortran line, + and `rel_ind_con` for line continuation. + By default line continuations are auto-aligned by F90Aligner + :param f_line: fortran line + :param lines: actual lines belonging to f_line + :param rel_ind: relative scope indent size for this line + :rel_ind_con: relative continuation indent size for this line + :line_nr: line number + :indent_fypp: whether or not to include fypp preprocessor lines + :manual_lines_indent: don't use F90Aligner but manually impose + indents for continuations + """ + + if self._initial and (PROG_RE.match(f_line) or MOD_RE.match(f_line)): + self._indent_storage[-1] = 0 + + self._line_indents = [0] * len(lines) + br_indent_list = [0] * len(lines) + + # local variables to avoid self hassle: + line_indents = self._line_indents + + scopes = self._scope_storage + indents = self._indent_storage + filename = self._filename + + # check statements that start new scope + is_new = False + valid_new = False + + f_filter = CharFilter(f_line, filter_fypp=not indent_fypp) + f_line_filtered = f_filter.filter_all() + + for new_n, newre in enumerate(self._parser["new"]): + if ( + newre + and newre.search(f_line_filtered) + and not self._parser["end"][new_n].search(f_line_filtered) + ): + what_new = new_n + is_new = True + valid_new = True + scopes.append(what_new) + log_message( + "{}: {}".format(what_new, f_line), "debug", filename, line_nr + ) + + # check statements that continue scope + is_con = False + valid_con = False + for con_n, conre in enumerate(self._parser["continue"]): + if conre and conre.search(f_line_filtered): + what_con = con_n + is_con = True + log_message( + "{}: {}".format(what_con, f_line), "debug", filename, line_nr + ) + if len(scopes) > 0: + what = scopes[-1] + if what == what_con or indent_fypp: + valid_con = True + + # check statements that end scope + is_end = False + valid_end = False + for end_n, endre in enumerate(self._parser["end"]): + if endre and endre.search(f_line_filtered): + what_end = end_n + is_end = True + log_message( + "{}: {}".format(what_end, f_line), "debug", filename, line_nr + ) + if len(scopes) > 0: + what = scopes.pop() + if ( + what == what_end + or not self._parser["end"][what_end].spec + or indent_fypp + ): + valid_end = True + log_message( + "{}: {}".format(what_end, f_line), + "debug", + filename, + line_nr, + ) + else: + valid_end = True + + # fypp preprocessor scopes may be within continuation lines + if indent_fypp and len(lines) > 1 and not FYPP_LINE_RE.search(f_line_filtered): + + for new_n, newre in enumerate(PREPRO_NEW_SCOPE): + for l in lines: + if newre and newre.search(l): + is_new = True + valid_new = True + scopes.append(new_n) + + for end_n, endre in enumerate(PREPRO_END_SCOPE): + for l in lines: + if endre and endre.search(l): + is_end = True + valid_end = True + if len(scopes) > 0: + what = scopes.pop() + + # deal with line breaks + if not manual_lines_indent: + self._aligner.process_lines_of_fline(f_line, lines, rel_ind_con, line_nr) + br_indent_list = self._aligner.get_lines_indent() + else: + br_indent_list = manual_lines_indent + + for pos in range(0, len(lines) - 1): + line_indents[pos + 1] = br_indent_list[pos + 1] + + if is_new and not is_end: + if not valid_new: + log_message( + "invalid scope opening statement", "info", filename, line_nr + ) + + line_indents = [ind + indents[-1] for ind in line_indents] + + indents.append(rel_ind + indents[-1]) + + elif (not is_new) and (is_con or is_end): + valid = valid_con if is_con else valid_end + + if not valid: + line_indents = [ind + indents[-1] for ind in line_indents] + log_message( + "invalid scope closing statement", "info", filename, line_nr + ) + else: + if len(indents) > 1 or self._initial: + line_indents = [ + ind + indents[-2 + self._initial] for ind in line_indents + ] + + if is_end and valid: + if len(indents) > 1: + indents.pop() + else: + indents[-1] = 0 + + else: + line_indents = [ind + indents[-1] for ind in line_indents] + + # we have processed first line: + self._initial = False + + # reassigning self.* to the updated variables + self._line_indents = line_indents + self._scope_storage = scopes + self._indent_storage = indents + + def get_fline_indent(self): + """after processing, retrieve the indentation of the full Fortran line.""" + return self._indent_storage[-1] + + def get_lines_indent(self): + """after processing, retrieve the indents of all line parts.""" + return self._line_indents diff --git a/fprettify/parser.py b/fprettify/parser.py new file mode 100644 index 0000000..6b2c497 --- /dev/null +++ b/fprettify/parser.py @@ -0,0 +1,1140 @@ +# -*- coding: utf-8 -*- +############################################################################### +# This file is part of fprettify. +# Copyright (C) 2016-2019 Patrick Seewald, CP2K developers group +# +# fprettify is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# fprettify is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with fprettify. If not, see . +############################################################################### + +"""This is a collection of Fortran parsing utilities.""" + +import re +from collections import deque + +from fprettify.constants import FORMATTER_ERROR_MESSAGE +from fprettify.exceptions import FprettifyParseException + +# constants +EOL_STR = r"\s*;?\s*$" # end of fortran line +EOL_SC = r"\s*;\s*$" # whether line is ended with semicolon +SOL_STR = r"^\s*" # start of fortran line +LINEBREAK_STR = r"(&)[\s]*(?:!.*)?$" # for parsing linebreaks +DEL_OPEN_STR = r"(\(\/?|\[)" # delimiter open +DEL_CLOSE_STR = r"(\/?\)|\])" # delimiter close +FYPP_LINE_STR = r"^(#!|#:|\$:|@:)" +FYPP_WITHOUT_PREPRO_STR = r"^(#!|\$:|@:)" +CPP_STR = r"^#[^!:{}]" +COMMENT_LINE_STR = r"^!" +FYPP_OPEN_STR = r"(#{|\${|@{)" +FYPP_CLOSE_STR = r"(}#|}\$|}@)" + +# regex flags +RE_FLAGS = re.IGNORECASE | re.UNICODE + +# FIXME bad ass regex! variable declaration +VAR_DECL_RE = re.compile( + r"^ *(?Pinteger(?: *\* *[0-9]+)?|logical|character(?: *\* *[0-9]+)?|real(?: *\* *[0-9]+)?|complex(?: *\* *[0-9]+)?|type) *(?P\((?:[^()]+|\((?:[^()]+|\([^()]*\))*\))*\))? *(?P(?: *, *[a-zA-Z_0-9]+(?: *\((?:[^()]+|\((?:[^()]+|\([^()]*\))*\))*\))?)+)? *(?P::)?(?P[^\n]+)\n?", + RE_FLAGS, +) + +# omp regex +OMP_COND_RE = re.compile(r"^\s*(!\$ )", RE_FLAGS) +OMP_DIR_RE = re.compile(r"^\s*(!\$OMP)", RE_FLAGS) + +# supported preprocessors +NOTFORTRAN_LINE_RE = re.compile( + r"(" + FYPP_LINE_STR + r"|" + CPP_STR + r"|" + COMMENT_LINE_STR + r")", RE_FLAGS +) +NOTFORTRAN_FYPP_LINE_RE = re.compile( + r"(" + CPP_STR + r"|" + COMMENT_LINE_STR + r")", RE_FLAGS +) +FYPP_LINE_RE = re.compile(FYPP_LINE_STR, RE_FLAGS) +FYPP_WITHOUT_PREPRO_RE = re.compile(FYPP_WITHOUT_PREPRO_STR, RE_FLAGS) +FYPP_OPEN_RE = re.compile(FYPP_OPEN_STR, RE_FLAGS) +FYPP_CLOSE_RE = re.compile(FYPP_CLOSE_STR, RE_FLAGS) +STR_OPEN_RE = re.compile(r"(" + FYPP_OPEN_STR + r"|" + r"'|\"|!)", RE_FLAGS) +CPP_RE = re.compile(CPP_STR, RE_FLAGS) + +# regular expressions for parsing delimiters +DEL_OPEN_RE = re.compile(r"^" + DEL_OPEN_STR, RE_FLAGS) +DEL_CLOSE_RE = re.compile(r"^" + DEL_CLOSE_STR, RE_FLAGS) + +# regular expressions for parsing operators +# Note: +/- in real literals and sign operator is ignored +PLUSMINUS_RE = re.compile(r"(?<=[\w\)\]])\s*(\+|-)\s*", RE_FLAGS) +# Note: ** or // (or any multiples of * or /) are ignored +# we also ignore any * or / before a :: because we may be seeing 'real*8' +MULTDIV_RE = re.compile( + r"(?<=[\w\)\]])\s*((?(?!=)|>=))\s*(?!\))", + RE_FLAGS, +) +LOG_OP_RE = re.compile(r"\s*(\.(?:AND|OR|EQV|NEQV)\.)\s*", RE_FLAGS) +PRINT_RE = re.compile(r"(?:(?<=\bPRINT)|(?<=\bREAD))\s*(\*,?)\s*", RE_FLAGS) + +# empty line regex +EMPTY_RE = re.compile(SOL_STR + r"$", RE_FLAGS) + +# statement label regex +STATEMENT_LABEL_RE = re.compile(r"^\s*(\d+\s)(?!" + EOL_STR + ")", RE_FLAGS) + +# regular expressions for parsing statements that start, continue or end a subunit: +IF_RE = re.compile(SOL_STR + r"(\w+\s*:)?\s*IF\s*\(.*\)\s*THEN" + EOL_STR, RE_FLAGS) +ELSE_RE = re.compile(SOL_STR + r"ELSE(\s*IF\s*\(.*\)\s*THEN)?" + EOL_STR, RE_FLAGS) +ENDIF_RE = re.compile(SOL_STR + r"END\s*IF(\s+\w+)?" + EOL_STR, RE_FLAGS) +DO_RE = re.compile(SOL_STR + r"(\w+\s*:)?\s*DO(" + EOL_STR + r"|\s+\w)", RE_FLAGS) +ENDDO_RE = re.compile(SOL_STR + r"END\s*DO(\s+\w+)?" + EOL_STR, RE_FLAGS) +SELCASE_RE = re.compile( + SOL_STR + r"SELECT\s*(CASE|RANK|TYPE)\s*\(.*\)" + EOL_STR, RE_FLAGS +) +CASE_RE = re.compile( + SOL_STR + + r"((CASE|RANK|TYPE\s+IS|CLASS\s+IS)\s*(\(.*\)|DEFAULT)|CLASS\s+DEFAULT)" + + EOL_STR, + RE_FLAGS, +) +ENDSEL_RE = re.compile(SOL_STR + r"END\s*SELECT" + EOL_STR, RE_FLAGS) +ASSOCIATE_RE = re.compile(SOL_STR + r"ASSOCIATE\s*\(.*\)" + EOL_STR, RE_FLAGS) +ENDASSOCIATE_RE = re.compile(SOL_STR + r"END\s*ASSOCIATE" + EOL_STR, RE_FLAGS) +BLK_RE = re.compile(SOL_STR + r"(\w+\s*:)?\s*BLOCK" + EOL_STR, RE_FLAGS) +ENDBLK_RE = re.compile(SOL_STR + r"END\s*BLOCK(\s+\w+)?" + EOL_STR, RE_FLAGS) +SUBR_RE = re.compile(r"^([^\"']* )?SUBROUTINE\s+\w+\s*(\(.*\))?" + EOL_STR, RE_FLAGS) +ENDSUBR_RE = re.compile(SOL_STR + r"END\s*SUBROUTINE(\s+\w+)?" + EOL_STR, RE_FLAGS) +FCT_RE = re.compile( + r"^([^\"']* )?FUNCTION\s+\w+\s*(\(.*\))?(\s*RESULT\s*\(\w+\))?" + EOL_STR, RE_FLAGS +) +ENDFCT_RE = re.compile(SOL_STR + r"END\s*FUNCTION(\s+\w+)?" + EOL_STR, RE_FLAGS) +MOD_RE = re.compile(SOL_STR + r"MODULE\s+\w+" + EOL_STR, RE_FLAGS) +ENDMOD_RE = re.compile(SOL_STR + r"END\s*MODULE(\s+\w+)?" + EOL_STR, RE_FLAGS) +SMOD_RE = re.compile(SOL_STR + r"SUBMODULE\s*\(\w+\)\s+\w+" + EOL_STR, RE_FLAGS) +ENDSMOD_RE = re.compile(SOL_STR + r"END\s*SUBMODULE(\s+\w+)?" + EOL_STR, RE_FLAGS) +TYPE_RE = re.compile( + SOL_STR + + r"TYPE(\s*,\s*(BIND\s*\(\s*C\s*\)|EXTENDS\s*\(.*\)|ABSTRACT|PUBLIC|PRIVATE))*(\s*,\s*)?(\s*::\s*|\s+)\w+" + + EOL_STR, + RE_FLAGS, +) +ENDTYPE_RE = re.compile(SOL_STR + r"END\s*TYPE(\s+\w+)?" + EOL_STR, RE_FLAGS) +PROG_RE = re.compile(SOL_STR + r"PROGRAM\s+\w+" + EOL_STR, RE_FLAGS) +ENDPROG_RE = re.compile(SOL_STR + r"END\s*PROGRAM(\s+\w+)?" + EOL_STR, RE_FLAGS) +INTERFACE_RE = re.compile( + r"^([^\"']* )?INTERFACE(\s+\w+|\s+(OPERATOR|ASSIGNMENT)\s*\(.*\))?" + EOL_STR, + RE_FLAGS, +) +ENDINTERFACE_RE = re.compile( + SOL_STR + r"END\s*INTERFACE(\s+\w+|\s+(OPERATOR|ASSIGNMENT)\s*\(.*\))?" + EOL_STR, + RE_FLAGS, +) +CONTAINS_RE = re.compile(SOL_STR + r"CONTAINS" + EOL_STR, RE_FLAGS) +ENUM_RE = re.compile( + SOL_STR + r"ENUM(\s*,\s*(BIND\s*\(\s*C\s*\)))?((\s*::\s*|\s+)\w+)?" + EOL_STR, + RE_FLAGS, +) +ENDENUM_RE = re.compile(SOL_STR + r"END\s*ENUM(\s+\w+)?" + EOL_STR, RE_FLAGS) +ENDANY_RE = re.compile(SOL_STR + r"END" + EOL_STR, RE_FLAGS) + +# Regular expressions for where and forall block constructs +FORALL_RE = re.compile(SOL_STR + r"(\w+\s*:)?\s*FORALL\s*\(.*\)" + EOL_STR, RE_FLAGS) +ENDFORALL_RE = re.compile(SOL_STR + r"END\s*FORALL(\s+\w+)?" + EOL_STR, RE_FLAGS) +WHERE_RE = re.compile(SOL_STR + r"(\w+\s*:)?\s*WHERE\s*\(.*\)" + EOL_STR, RE_FLAGS) +ELSEWHERE_RE = re.compile( + SOL_STR + r"ELSE\s*WHERE(\(.*\))?(\s*\w+)?" + EOL_STR, RE_FLAGS +) +ENDWHERE_RE = re.compile(SOL_STR + r"END\s*WHERE(\s+\w+)?" + EOL_STR, RE_FLAGS) + +# Regular expressions for preprocessor directives +FYPP_DEF_RE = re.compile(SOL_STR + r"#:DEF\s+", RE_FLAGS) +FYPP_ENDDEF_RE = re.compile(SOL_STR + r"#:ENDDEF", RE_FLAGS) +FYPP_IF_RE = re.compile(SOL_STR + r"#:IF\s+", RE_FLAGS) +FYPP_ELIF_ELSE_RE = re.compile(SOL_STR + r"#:(ELIF\s+|ELSE)", RE_FLAGS) +FYPP_ENDIF_RE = re.compile(SOL_STR + r"#:ENDIF", RE_FLAGS) +FYPP_FOR_RE = re.compile(SOL_STR + r"#:FOR\s+", RE_FLAGS) +FYPP_ENDFOR_RE = re.compile(SOL_STR + r"#:ENDFOR", RE_FLAGS) +FYPP_BLOCK_RE = re.compile(SOL_STR + r"#:BLOCK\s+", RE_FLAGS) +FYPP_ENDBLOCK_RE = re.compile(SOL_STR + r"#:ENDBLOCK", RE_FLAGS) +FYPP_CALL_RE = re.compile(SOL_STR + r"#:CALL\s+", RE_FLAGS) +FYPP_ENDCALL_RE = re.compile(SOL_STR + r"#:ENDCALL", RE_FLAGS) +FYPP_MUTE_RE = re.compile(SOL_STR + r"#:MUTE", RE_FLAGS) +FYPP_ENDMUTE_RE = re.compile(SOL_STR + r"#:ENDMUTE", RE_FLAGS) +PRIVATE_RE = re.compile(SOL_STR + r"PRIVATE\s*::", RE_FLAGS) +PUBLIC_RE = re.compile(SOL_STR + r"PUBLIC\s*::", RE_FLAGS) +END_RE = re.compile( + SOL_STR + + r"(END)\s*(IF|DO|SELECT|ASSOCIATE|BLOCK|SUBROUTINE|FUNCTION|MODULE|SUBMODULE|TYPE|PROGRAM|INTERFACE|ENUM|WHERE|FORALL)", + RE_FLAGS, +) + +# markups to deactivate formatter +NO_ALIGN_RE = re.compile(SOL_STR + r"&\s*[^\s*]+") + +# match namelist names +NML_RE = re.compile(r"(/\w+/)", RE_FLAGS) + +# find namelists and data statements +NML_STMT_RE = re.compile(SOL_STR + r"NAMELIST.*/.*/", RE_FLAGS) +DATA_STMT_RE = re.compile(SOL_STR + r"DATA\s+\w", RE_FLAGS) + +## Regexp for f90 keywords' +F90_KEYWORDS_RE = re.compile( + r"\b(" + + "|".join( + ( + "allocatable", + "allocate", + "assign", + "assignment", + "backspace", + "block", + "call", + "case", + "character", + "close", + "common", + "complex", + "contains", + "continue", + "cycle", + "data", + "deallocate", + "dimension", + "do", + "double", + "else", + "elseif", + "elsewhere", + "end", + "enddo", + "endfile", + "endif", + "entry", + "equivalence", + "exit", + "external", + "forall", + "format", + "function", + "goto", + "if", + "implicit", + "include", + "inquire", + "integer", + "intent", + "interface", + "intrinsic", + "logical", + "module", + "namelist", + "none", + "nullify", + "only", + "open", + "operator", + "optional", + "parameter", + "pause", + "pointer", + "precision", + "print", + "private", + "procedure", + "program", + "public", + "read", + "real", + "recursive", + "result", + "return", + "rewind", + "save", + "select", + "sequence", + "stop", + "subroutine", + "target", + "then", + "type", + "use", + "where", + "while", + "write", + ## F95 keywords. + "elemental", + "pure", + ## F2003 + "abstract", + "associate", + "asynchronous", + "bind", + "class", + "deferred", + "enum", + "enumerator", + "extends", + "extends_type_of", + "final", + "generic", + "import", + "non_intrinsic", + "non_overridable", + "nopass", + "pass", + "protected", + "same_type_as", + "value", + "volatile", + ## F2008. + "contiguous", + "submodule", + "concurrent", + "codimension", + "sync all", + "sync memory", + "critical", + "image_index", + ) + ) + + r")\b", + RE_FLAGS, +) + +## Regexp whose first part matches F90 intrinsic procedures. +## Add a parenthesis to avoid catching non-procedures. +F90_PROCEDURES_RE = re.compile( + r"\b(" + + "|".join( + ( + "abs", + "achar", + "acos", + "adjustl", + "adjustr", + "aimag", + "aint", + "all", + "allocated", + "anint", + "any", + "asin", + "associated", + "atan", + "atan2", + "bit_size", + "btest", + "ceiling", + "char", + "cmplx", + "conjg", + "cos", + "cosh", + "count", + "cshift", + "date_and_time", + "dble", + "digits", + "dim", + "dot_product", + "dprod", + "eoshift", + "epsilon", + "exp", + "exponent", + "floor", + "fraction", + "huge", + "iachar", + "iand", + "ibclr", + "ibits", + "ibset", + "ichar", + "ieor", + "index", + "int", + "ior", + "ishft", + "ishftc", + "kind", + "lbound", + "len", + "len_trim", + "lge", + "lgt", + "lle", + "llt", + "log", + "log10", + "logical", + "matmul", + "max", + "maxexponent", + "maxloc", + "maxval", + "merge", + "min", + "minexponent", + "minloc", + "minval", + "mod", + "modulo", + "mvbits", + "nearest", + "nint", + "not", + "pack", + "precision", + "present", + "product", + "radix", + ## Real is taken out here to avoid highlighting declarations. + "random_number", + "random_seed", + "range", ## "real" + "repeat", + "reshape", + "rrspacing", + "scale", + "scan", + "selected_int_kind", + "selected_real_kind", + "set_exponent", + "shape", + "sign", + "sin", + "sinh", + "size", + "spacing", + "spread", + "sqrt", + "sum", + "system_clock", + "tan", + "tanh", + "tiny", + "transfer", + "transpose", + "trim", + "ubound", + "unpack", + "verify", + ## F95 intrinsic functions. + "null", + "cpu_time", + ## F2003. + "move_alloc", + "command_argument_count", + "get_command", + "get_command_argument", + "get_environment_variable", + "selected_char_kind", + "wait", + "flush", + "new_line", + "extends", + "extends_type_of", + "same_type_as", + "bind", + ## F2003 ieee_arithmetic intrinsic module. + "ieee_support_underflow_control", + "ieee_get_underflow_mode", + "ieee_set_underflow_mode", + ## F2003 iso_c_binding intrinsic module. + "c_loc", + "c_funloc", + "c_associated", + "c_f_pointer", + "c_f_procpointer", + ## F2008. + "bge", + "bgt", + "ble", + "blt", + "dshiftl", + "dshiftr", + "leadz", + "popcnt", + "poppar", + "trailz", + "maskl", + "maskr", + "shifta", + "shiftl", + "shiftr", + "merge_bits", + "iall", + "iany", + "iparity", + "storage_size", + "bessel_j0", + "bessel_j1", + "bessel_jn", + "bessel_y0", + "bessel_y1", + "bessel_yn", + "erf", + "erfc", + "erfc_scaled", + "gamma", + "hypot", + "log_gamma", + "norm2", + "parity", + "findloc", + "is_contiguous", + "sync images", + "lock", + "unlock", + "image_index", + "lcobound", + "ucobound", + "num_images", + "this_image", + ## F2008 iso_fortran_env module. + "compiler_options", + "compiler_version", + ## F2008 iso_c_binding module. + "c_sizeof", + ) + ) + + r")\b", + RE_FLAGS, +) + +F90_MODULES_RE = re.compile( + r"\b(" + + "|".join( + ( + ## F2003/F2008 module names + "iso_fortran_env", + "iso_c_binding", + "ieee_exceptions", + "ieee_arithmetic", + "ieee_features", + ) + ) + + r")\b", + RE_FLAGS, +) + +## Regexp matching intrinsic operators +F90_OPERATORS_RE = re.compile( + r"(" + + "|".join( + [ + r"\." + a + r"\." + for a in ( + "and", + "eq", + "eqv", + "false", + "ge", + "gt", + "le", + "lt", + "ne", + "neqv", + "not", + "or", + "true", + ) + ] + ) + + r")", + RE_FLAGS, +) + +## Regexp for Fortran intrinsic constants +F90_CONSTANTS_RE = re.compile( + r"\b(" + + "|".join( + ( + ## F2003 iso_fortran_env constants. + "input_unit", + "output_unit", + "error_unit", + "iostat_end", + "iostat_eor", + "numeric_storage_size", + "character_storage_size", + "file_storage_size", + ## F2003 iso_c_binding constants. + "c_int", + "c_short", + "c_long", + "c_long_long", + "c_signed_char", + "c_size_t", + "c_int8_t", + "c_int16_t", + "c_int32_t", + "c_int64_t", + "c_int_least8_t", + "c_int_least16_t", + "c_int_least32_t", + "c_int_least64_t", + "c_int_fast8_t", + "c_int_fast16_t", + "c_int_fast32_t", + "c_int_fast64_t", + "c_intmax_t", + "c_intptr_t", + "c_float", + "c_double", + "c_long_double", + "c_float_complex", + "c_double_complex", + "c_long_double_complex", + "c_bool", + "c_char", + "c_null_char", + "c_alert", + "c_backspace", + "c_form_feed", + "c_new_line", + "c_carriage_return", + "c_horizontal_tab", + "c_vertical_tab", + "c_ptr", + "c_funptr", + "c_null_ptr", + "c_null_funptr", + ## F2008 iso_fortran_env constants. + "character_kinds", + "int8", + "int16", + "int32", + "int64", + "integer_kinds", + "iostat_inquire_internal_unit", + "logical_kinds", + "real_kinds", + "real32", + "real64", + "real128", + "lock_type", + "atomic_int_kind", + "atomic_logical_kind", + ) + ) + + r")\b", + RE_FLAGS, +) + +F90_INT_RE = r"[-+]?[0-9]+" +F90_FLOAT_RE = r"[-+]?([0-9]+\.[0-9]*|\.[0-9]+)" +F90_NUMBER_RE = "(" + F90_INT_RE + "|" + F90_FLOAT_RE + ")" +F90_FLOAT_EXP_RE = F90_NUMBER_RE + r"[eEdD]" + F90_NUMBER_RE +F90_NUMBER_ALL_RE = "(" + F90_NUMBER_RE + "|" + F90_FLOAT_EXP_RE + ")" +F90_NUMBER_ALL_REC = re.compile(F90_NUMBER_ALL_RE, RE_FLAGS) + +## F90_CONSTANTS_TYPES_RE = re.compile(r"\b" + F90_NUMBER_ALL_RE + "_(" + "|".join([a + r"\b" for a in ( +F90_CONSTANTS_TYPES_RE = re.compile( + r"(" + + F90_NUMBER_ALL_RE + + ")*_(" + + "|".join( + ( + ## F2003 iso_fortran_env constants. + ## F2003 iso_c_binding constants. + "c_int", + "c_short", + "c_long", + "c_long_long", + "c_signed_char", + "c_size_t", + "c_int8_t", + "c_int16_t", + "c_int32_t", + "c_int64_t", + "c_int_least8_t", + "c_int_least16_t", + "c_int_least32_t", + "c_int_least64_t", + "c_int_fast8_t", + "c_int_fast16_t", + "c_int_fast32_t", + "c_int_fast64_t", + "c_intmax_t", + "c_intptr_t", + "c_float", + "c_double", + "c_long_double", + "c_float_complex", + "c_double_complex", + "c_long_double_complex", + "c_bool", + "c_char", + ## F2008 iso_fortran_env constants. + "character_kinds", + "int8", + "int16", + "int32", + "int64", + "integer_kinds", + "logical_kinds", + "real_kinds", + "real32", + "real64", + "real128", + "lock_type", + "atomic_int_kind", + "atomic_logical_kind", + ) + ) + + r")\b", + RE_FLAGS, +) + + +def get_curr_delim(line, pos): + """get delimiter token in line starting at pos, if it exists""" + what_del_open = DEL_OPEN_RE.search(line[pos : pos + 2]) + what_del_close = DEL_CLOSE_RE.search(line[pos : pos + 2]) + return [what_del_open, what_del_close] + + +class fline_parser: + def __init__(self): + pass + + def search(self, line): + pass + + +class parser_re(fline_parser): + def __init__(self, regex, spec=True): + self._re = regex + self.spec = spec + + def search(self, line): + return self._re.search(line) + + def split(self, line): + return self._re.split(line) + + +class plusminus_parser(parser_re): + """parser for +/- in addition""" + + def __init__(self, regex): + self._re = regex + self._re_excl = re.compile(r"\b(\d+\.?\d*|\d*\.?\d+)[de]" + EOL_STR, RE_FLAGS) + + def split(self, line): + partsplit = self._re.split(line) + partsplit_out = [] + + # exclude splits due to '+/-' in real literals + for n, part in enumerate(partsplit): + if re.search(r"^(\+|-)$", part): + if self._re_excl.search(partsplit[n - 1]): + if n == 1: + partsplit_out = [partsplit[n - 1]] + if n + 1 >= len(partsplit) or not partsplit_out: + raise FprettifyParseException( + "non-standard expression involving + or -", "", 0 + ) + partsplit_out[-1] += part + partsplit[n + 1] + else: + if n == 1: + partsplit_out = [partsplit[n - 1]] + if n + 1 >= len(partsplit): + raise FprettifyParseException( + "non-standard expression involving + or -", "", 0 + ) + partsplit_out += [part, partsplit[n + 1]] + + if not partsplit_out: + partsplit_out = partsplit + + return partsplit_out + + +class where_parser(parser_re): + """parser for where / forall construct""" + + def search(self, line): + match = self._re.search(line) + + if match: + level = 0 + for pos, char in CharFilter(line): + [what_del_open, what_del_close] = get_curr_delim(line, pos) + + if what_del_open: + if what_del_open.group() == r"(": + level += 1 + + if what_del_close and what_del_close.group() == r")": + if level == 1: + if EMPTY_RE.search(line[pos + 1 :]): + return True + else: + return False + else: + level += -1 + + return False + + +def build_scope_parser(fypp=True, mod=True): + parser = {} + parser["new"] = [ + parser_re(IF_RE), + parser_re(DO_RE), + parser_re(SELCASE_RE), + parser_re(SUBR_RE), + parser_re(FCT_RE), + parser_re(INTERFACE_RE), + parser_re(TYPE_RE), + parser_re(ENUM_RE), + parser_re(ASSOCIATE_RE), + None, + parser_re(BLK_RE), + where_parser(WHERE_RE), + where_parser(FORALL_RE), + ] + + parser["continue"] = [ + parser_re(ELSE_RE), + None, + parser_re(CASE_RE), + parser_re(CONTAINS_RE), + parser_re(CONTAINS_RE), + None, + parser_re(CONTAINS_RE), + None, + None, + None, + None, + parser_re(ELSEWHERE_RE), + None, + ] + + parser["end"] = [ + parser_re(ENDIF_RE), + parser_re(ENDDO_RE), + parser_re(ENDSEL_RE), + parser_re(ENDSUBR_RE), + parser_re(ENDFCT_RE), + parser_re(ENDINTERFACE_RE), + parser_re(ENDTYPE_RE), + parser_re(ENDENUM_RE), + parser_re(ENDASSOCIATE_RE), + parser_re(ENDANY_RE, spec=False), + parser_re(ENDBLK_RE), + parser_re(ENDWHERE_RE), + parser_re(ENDFORALL_RE), + ] + + if mod: + parser["new"].extend( + [parser_re(MOD_RE), parser_re(SMOD_RE), parser_re(PROG_RE)] + ) + parser["continue"].extend( + [parser_re(CONTAINS_RE), parser_re(CONTAINS_RE), parser_re(CONTAINS_RE)] + ) + parser["end"].extend( + [parser_re(ENDMOD_RE), parser_re(ENDSMOD_RE), parser_re(ENDPROG_RE)] + ) + + if fypp: + parser["new"].extend(PREPRO_NEW_SCOPE) + parser["continue"].extend(PREPRO_CONTINUE_SCOPE) + parser["end"].extend(PREPRO_END_SCOPE) + + return parser + + +PREPRO_NEW_SCOPE = [ + parser_re(FYPP_DEF_RE), + parser_re(FYPP_IF_RE), + parser_re(FYPP_FOR_RE), + parser_re(FYPP_BLOCK_RE), + parser_re(FYPP_CALL_RE), + parser_re(FYPP_MUTE_RE), +] +PREPRO_CONTINUE_SCOPE = [None, parser_re(FYPP_ELIF_ELSE_RE), None, None, None, None] +PREPRO_END_SCOPE = [ + parser_re(FYPP_ENDDEF_RE), + parser_re(FYPP_ENDIF_RE), + parser_re(FYPP_ENDFOR_RE), + parser_re(FYPP_ENDBLOCK_RE), + parser_re(FYPP_ENDCALL_RE), + parser_re(FYPP_ENDMUTE_RE), +] + +# two-sided operators +LR_OPS_RE = [REL_OP_RE, LOG_OP_RE, plusminus_parser(PLUSMINUS_RE), MULTDIV_RE, PRINT_RE] + +USE_RE = re.compile( + SOL_STR + "USE(\s+|(,.+?)?::\s*)\w+?((,.+?=>.+?)+|,\s*only\s*:.+?)?$" + EOL_STR, + RE_FLAGS, +) + + +class CharFilter: + """ + An iterator to wrap the iterator returned by `enumerate(string)` + and ignore comments and characters inside strings + """ + + def __init__( + self, string, filter_comments=True, filter_strings=True, filter_fypp=True + ): + self._content = string + self._it = enumerate(self._content) + self._instring = "" + self._infypp = False + self._incomment = "" + self._instring = "" + self._filter_comments = filter_comments + self._filter_strings = filter_strings + if filter_fypp: + self._notfortran_re = NOTFORTRAN_LINE_RE + else: + self._notfortran_re = NOTFORTRAN_FYPP_LINE_RE + + def update( + self, string, filter_comments=True, filter_strings=True, filter_fypp=True + ): + self._content = string + self._it = enumerate(self._content) + self._filter_comments = filter_comments + self._filter_strings = filter_strings + if filter_fypp: + self._notfortran_re = NOTFORTRAN_LINE_RE + else: + self._notfortran_re = NOTFORTRAN_FYPP_LINE_RE + + def __iter__(self): + return self + + def __next__(self): + + pos, char = next(self._it) + + char2 = self._content[pos : pos + 2] + + if not self._instring: + if not self._incomment: + if FYPP_OPEN_RE.search(char2): + self._instring = char2 + self._infypp = True + elif self._notfortran_re.search(char2): + self._incomment = char + elif char in ['"', "'"]: + self._instring = char + else: + if self._infypp: + if FYPP_CLOSE_RE.search(char2): + self._instring = "" + self._infypp = False + if self._filter_strings: + self.__next__() + return self.__next__() + + elif char in ['"', "'"]: + if self._instring == char: + self._instring = "" + if self._filter_strings: + return self.__next__() + + if self._filter_comments: + if self._incomment: + raise StopIteration + + if self._filter_strings: + if self._instring: + return self.__next__() + + return (pos, char) + + def filter_all(self): + filtered_str = "" + for _, char in self: + filtered_str += char + return filtered_str + + def instring(self): + return self._instring + + +class InputStream: + """Class to read logical Fortran lines from a Fortran file.""" + + def __init__(self, infile, filter_fypp=True, orig_filename=None): + if not orig_filename: + orig_filename = infile.name + self.line_buffer = deque([]) + self.infile = infile + self.line_nr = 0 + self.filename = orig_filename + self.endpos = deque([]) + self.what_omp = deque([]) + if filter_fypp: + self.notfortran_re = NOTFORTRAN_LINE_RE + else: + self.notfortran_re = NOTFORTRAN_FYPP_LINE_RE + + def next_fortran_line(self): + """Reads a group of connected lines (connected with &, separated by newline or semicolon) + returns a touple with the joined line, and a list with the original lines. + Doesn't support multiline character constants! + """ + joined_line = "" + comments = [] + lines = [] + continuation = 0 + fypp_cont = 0 + instring = "" + + string_iter = CharFilter("") + fypp_cont = 0 + while 1: + if not self.line_buffer: + line = self.infile.readline().replace("\t", 8 * " ") + self.line_nr += 1 + # convert OMP-conditional fortran statements into normal fortran statements + # but remember to convert them back + + what_omp = OMP_COND_RE.search(line) + + if what_omp: + what_omp = what_omp.group(1) + else: + what_omp = "" + + if what_omp: + line = line.replace(what_omp, "", 1) + line_start = 0 + + pos = -1 + + # multiline string: prepend line continuation with '&' + if string_iter.instring() and not line.lstrip().startswith("&"): + line = "&" + line + + # update instead of CharFilter(line) to account for multiline strings + string_iter.update(line) + for pos, char in string_iter: + if char == ";" or pos + 1 == len(line): + self.endpos.append(pos - line_start) + self.line_buffer.append(line[line_start : pos + 1]) + self.what_omp.append(what_omp) + what_omp = "" + line_start = pos + 1 + + if pos + 1 < len(line): + if fypp_cont: + self.endpos.append(-1) + self.line_buffer.append(line) + self.what_omp.append(what_omp) + else: + for pos_add, char in CharFilter( + line[pos + 1 :], filter_comments=False + ): + char2 = line[pos + 1 + pos_add : pos + 3 + pos_add] + if self.notfortran_re.search(char2): + self.endpos.append(pos + pos_add - line_start) + self.line_buffer.append(line[line_start:]) + self.what_omp.append(what_omp) + break + + if not self.line_buffer: + self.endpos.append(len(line)) + self.line_buffer.append(line) + self.what_omp.append("") + + line = self.line_buffer.popleft() + endpos = self.endpos.popleft() + what_omp = self.what_omp.popleft() + + if not line: + break + + lines.append(what_omp + line) + + line_core = line[: endpos + 1] + + if self.notfortran_re.search(line[endpos + 1 : endpos + 3]) or fypp_cont: + line_comments = line[endpos + 1 :] + else: + line_comments = "" + + if line_core: + newline = line_core[-1] == "\n" + else: + newline = False + + line_core = line_core.strip() + + if line_core and not NOTFORTRAN_LINE_RE.search(line_core): + continuation = 0 + if line_core.endswith("&"): + continuation = 1 + + if line_comments: + if ( + FYPP_LINE_RE.search(line[endpos + 1 : endpos + 3]) or fypp_cont + ) and line_comments.strip()[-1] == "&": + fypp_cont = 1 + else: + fypp_cont = 0 + + line_core = line_core.strip("&") + + comments.append(line_comments.rstrip("\n")) + if joined_line.strip(): + joined_line = joined_line.rstrip("\n") + line_core + "\n" * newline + else: + joined_line = what_omp + line_core + "\n" * newline + + if not (continuation or fypp_cont): + break + + return (joined_line, comments, lines) + + +def parse_fprettify_directives( + lines, comment_lines, in_format_off_block, filename, line_nr +): + """ + parse formatter directives '!&' and line continuations starting with an + ampersand. + """ + auto_align = not any(NO_ALIGN_RE.search(_) for _ in lines) + auto_format = not ( + in_format_off_block or any(_.lstrip().startswith("!&") for _ in comment_lines) + ) + if not auto_format: + auto_align = False + if (len(lines)) == 1: + valid_directive = True + if lines[0].strip().startswith("!&<"): + if in_format_off_block: + valid_directive = False + else: + in_format_off_block = True + if lines[0].strip().startswith("!&>"): + if not in_format_off_block: + valid_directive = False + else: + in_format_off_block = False + if not valid_directive: + raise FprettifyParseException(FORMATTER_ERROR_MESSAGE, filename, line_nr) + + return [auto_align, auto_format, in_format_off_block] diff --git a/fprettify/preprocessor.py b/fprettify/preprocessor.py new file mode 100644 index 0000000..3fd5b34 --- /dev/null +++ b/fprettify/preprocessor.py @@ -0,0 +1,73 @@ +from fprettify.parser import ( + EMPTY_RE, + FYPP_LINE_RE, + OMP_COND_RE, + OMP_DIR_RE, + STATEMENT_LABEL_RE, +) + + +def preprocess_omp(f_line, lines): + """convert omp conditional to normal fortran""" + + is_omp_conditional = bool(OMP_COND_RE.search(f_line)) + if is_omp_conditional: + f_line = OMP_COND_RE.sub(" ", f_line, count=1) + lines = [OMP_COND_RE.sub(" ", l, count=1) for l in lines] + + return [f_line, lines, is_omp_conditional] + + +def preprocess_labels(f_line, lines): + """remove statement labels""" + + match = STATEMENT_LABEL_RE.search(f_line) + if match: + label = match.group(1) + else: + label = "" + + if label: + f_line = STATEMENT_LABEL_RE.sub(len(label) * " ", f_line, count=1) + lines[0] = STATEMENT_LABEL_RE.sub(len(label) * " ", lines[0], count=1) + + return [f_line, lines, label] + + +def preprocess_line(f_line, lines, comments, filename, line_nr, indent_fypp): + """preprocess lines: identification and formatting of special cases""" + is_blank = False + prev_indent = False + do_format = False + + # is_special: special directives that should not be treated as Fortran + # currently supported: fypp preprocessor directives or comments for FORD documentation + is_special = [False] * len(lines) + + for pos, line in enumerate(lines): + line_strip = line.lstrip() + if indent_fypp: + is_special[pos] = line_strip.startswith("!!") or ( + FYPP_LINE_RE.search(line_strip) if pos > 0 else False + ) + else: + is_special[pos] = FYPP_LINE_RE.search(line_strip) or line_strip.startswith( + "!!" + ) + + # if first line is special, all lines should be special + if is_special[0]: + is_special = [True] * len(lines) + + if EMPTY_RE.search(f_line): # empty lines including comment lines + if any(comments): + if lines[0].startswith(" ") and not OMP_DIR_RE.search(lines[0]): + # indent comment lines only if they were not indented before. + prev_indent = True + else: + is_blank = True + lines = [l.strip(" ") if not is_special[n] else l for n, l in enumerate(lines)] + else: + do_format = True + + return [lines, do_format, prev_indent, is_blank, is_special] diff --git a/fprettify/tests/__init__.py b/fprettify/tests/__init__.py index 5980930..5c5017a 100644 --- a/fprettify/tests/__init__.py +++ b/fprettify/tests/__init__.py @@ -19,46 +19,47 @@ ############################################################################### """Dynamically create tests based on examples in examples/before.""" -from __future__ import (absolute_import, division, - print_function, unicode_literals) -import sys -import os -import unittest +import difflib import hashlib -import logging import io +import logging +import os import re -import difflib import subprocess -import inspect +import sys +import unittest -sys.stderr = io.TextIOWrapper( - sys.stderr.detach(), encoding='UTF-8', line_buffering=True) +from fprettify.constants import FORTRAN_EXTENSIONS +from fprettify.exceptions import FprettifyInternalException, FprettifyParseException +from fprettify.formatter import reformat_ffile +from fprettify.utils import log_exception, set_logger -import fprettify -from fprettify.fparse_utils import FprettifyParseException, FprettifyInternalException +sys.stderr = io.TextIOWrapper( + sys.stderr.detach(), encoding="UTF-8", line_buffering=True +) def joinpath(path1, path2): return os.path.normpath(os.path.join(path1, path2)) -MYPATH = os.path.dirname(os.path.abspath( - inspect.getfile(inspect.currentframe()))) -BEFORE_DIR = joinpath(MYPATH, r'../../fortran_tests/before/') -AFTER_DIR = joinpath(MYPATH, r'../../fortran_tests/after/') -RESULT_DIR = joinpath(MYPATH, r'../../fortran_tests/test_results/') -RESULT_FILE = joinpath(RESULT_DIR, r'expected_results') -FAILED_FILE = joinpath(RESULT_DIR, r'failed_results') +MYPATH = os.path.dirname(__file__) + +BEFORE_DIR = joinpath(MYPATH, r"../../fortran_tests/before/") +AFTER_DIR = joinpath(MYPATH, r"../../fortran_tests/after/") +RESULT_DIR = joinpath(MYPATH, r"../../fortran_tests/test_results/") +RESULT_FILE = joinpath(RESULT_DIR, r"expected_results") +FAILED_FILE = joinpath(RESULT_DIR, r"failed_results") RUNSCRIPT = joinpath(MYPATH, r"../../fprettify.py") -fprettify.set_fprettify_logger(logging.ERROR) +set_logger(logging.ERROR) class AlienInvasion(Exception): """Should not happen""" + pass @@ -69,6 +70,7 @@ def eprint(*args, **kwargs): print(*args, file=sys.stderr, flush=True, **kwargs) + class FPrettifyTestCase(unittest.TestCase): """ test class to be recognized by unittest. @@ -98,7 +100,7 @@ def setUpClass(cls): eprint("-" * 70) eprint("recognized Fortran files") - eprint(", ".join(fprettify.FORTRAN_EXTENSIONS)) + eprint(", ".join(FORTRAN_EXTENSIONS)) eprint("-" * 70) eprint("Testing with Fortran files in " + BEFORE_DIR) eprint("Writing formatted Fortran files to " + AFTER_DIR) @@ -114,7 +116,7 @@ def tearDownClass(cls): """ if cls.n_parsefail + cls.n_internalfail > 0: format = "{:<20}{:<6}" - eprint('\n' + "=" * 70) + eprint("\n" + "=" * 70) eprint("IGNORED errors: invalid or old Fortran") eprint("-" * 70) eprint(format.format("parse errors: ", cls.n_parsefail)) @@ -122,20 +124,22 @@ def tearDownClass(cls): @staticmethod def write_result(filename, content, sep_str): # pragma: no cover - with io.open(filename, 'a', encoding='utf-8') as outfile: - outfile.write(sep_str.join(content) + '\n') + with io.open(filename, "a", encoding="utf-8") as outfile: + outfile.write(sep_str.join(content) + "\n") def test_whitespace(self): """simple test for whitespace formatting options -w in [0, 1, 2]""" instring = "(/-a-b-(a+b-c)/(-c)*d**e,f[1]%v/)" - outstring_exp = ["(/-a-b-(a+b-c)/(-c)*d**e,f[1]%v/)", - "(/-a-b-(a+b-c)/(-c)*d**e, f[1]%v/)", - "(/-a - b - (a + b - c)/(-c)*d**e, f[1]%v/)", - "(/-a - b - (a + b - c) / (-c) * d**e, f[1]%v/)"] + outstring_exp = [ + "(/-a-b-(a+b-c)/(-c)*d**e,f[1]%v/)", + "(/-a-b-(a+b-c)/(-c)*d**e, f[1]%v/)", + "(/-a - b - (a + b - c)/(-c)*d**e, f[1]%v/)", + "(/-a - b - (a + b - c) / (-c) * d**e, f[1]%v/)", + ] outstring = [] for w, out in zip(range(0, 4), outstring_exp): - args = ['-w', str(w)] + args = ["-w", str(w)] self.assert_fprettify_result(args, instring, out) def test_type_selector(self): @@ -143,7 +147,7 @@ def test_type_selector(self): instring = "A%component=func(mytype%a,mytype%abc+mytype%abcd)" outstring_exp = "A % component = func(mytype % a, mytype % abc + mytype % abcd)" - self.assert_fprettify_result(['-w 4'], instring, outstring_exp) + self.assert_fprettify_result(["-w 4"], instring, outstring_exp) def test_indent(self): """simple test for indent options -i in [0, 3, 4]""" @@ -152,70 +156,105 @@ def test_indent(self): instring = "iF(teSt)ThEn\nCaLl subr(a,b,&\nc,(/d,&\ne,f/))\nEnD iF" outstring_exp = [ - "iF (teSt) ThEn\n" + - " " * ind + "CaLl subr(a, b, &\n" + - " " * (10 + ind) + "c, (/d, &\n" + - " " * (15 + ind) + "e, f/))\nEnD iF" + "iF (teSt) ThEn\n" + + " " * ind + + "CaLl subr(a, b, &\n" + + " " * (10 + ind) + + "c, (/d, &\n" + + " " * (15 + ind) + + "e, f/))\nEnD iF" for ind in indents ] for ind, out in zip(indents, outstring_exp): - args = ['-i', str(ind)] + args = ["-i", str(ind)] self.assert_fprettify_result(args, instring, out) def test_nested(self): """test correct indentation of nested loops""" - instring = ("integer :: i,j\ndo i=1,2\ndo j=1,3\n" - "print*,i,j,i*j\nend do\nend do") - outstring_exp_default = ("integer :: i, j\ndo i = 1, 2\ndo j = 1, 3\n" - " print *, i, j, i*j\nend do\nend do") - outstring_exp_strict = ("integer :: i, j\ndo i = 1, 2\n do j = 1, 3\n" - " print *, i, j, i*j\n end do\nend do") + instring = ( + "integer :: i,j\ndo i=1,2\ndo j=1,3\n" "print*,i,j,i*j\nend do\nend do" + ) + outstring_exp_default = ( + "integer :: i, j\ndo i = 1, 2\ndo j = 1, 3\n" + " print *, i, j, i*j\nend do\nend do" + ) + outstring_exp_strict = ( + "integer :: i, j\ndo i = 1, 2\n do j = 1, 3\n" + " print *, i, j, i*j\n end do\nend do" + ) self.assert_fprettify_result([], instring, outstring_exp_default) - self.assert_fprettify_result(['--strict-indent'], instring, outstring_exp_strict) + self.assert_fprettify_result( + ["--strict-indent"], instring, outstring_exp_strict + ) def test_reset_indent(self): """test of reset indentation at file start""" - instring = ("integer :: i,j\ndo i=1,2\ndo j=1,3\n" - "print*,i,j,i*j\nend do\nend do", - " module a\ninteger :: 1\n") - outstring = ("integer :: i, j\ndo i = 1, 2\ndo j = 1, 3\n" - " print *, i, j, i*j\nend do\nend do", - "module a\n integer :: 1") + instring = ( + "integer :: i,j\ndo i=1,2\ndo j=1,3\n" "print*,i,j,i*j\nend do\nend do", + " module a\ninteger :: 1\n", + ) + outstring = ( + "integer :: i, j\ndo i = 1, 2\ndo j = 1, 3\n" + " print *, i, j, i*j\nend do\nend do", + "module a\n integer :: 1", + ) for ind, out in zip(instring, outstring): - self.assert_fprettify_result([],ind, out) + self.assert_fprettify_result([], ind, out) def test_disable(self): """test disabling indentation and/or whitespace formatting""" - instring = ("if(&\nl==111)&\n then\n do m =1, 2\n A=&\nB+C\n end do; endif") - outstring_exp_default = ("if ( &\n l == 111) &\n then\n do m = 1, 2\n" - " A = &\n B + C\n end do; end if") - outstring_exp_nowhitespace = ("if(&\n l==111)&\n then\n do m =1, 2\n" - " A=&\n B+C\n end do; endif") - outstring_exp_noindent = ("if ( &\nl == 111) &\n then\n do m = 1, 2\n" - " A = &\nB + C\n end do; end if") + instring = ( + "if(&\nl==111)&\n then\n do m =1, 2\n A=&\nB+C\n end do; endif" + ) + outstring_exp_default = ( + "if ( &\n l == 111) &\n then\n do m = 1, 2\n" + " A = &\n B + C\n end do; end if" + ) + outstring_exp_nowhitespace = ( + "if(&\n l==111)&\n then\n do m =1, 2\n" + " A=&\n B+C\n end do; endif" + ) + outstring_exp_noindent = ( + "if ( &\nl == 111) &\n then\n do m = 1, 2\n" + " A = &\nB + C\n end do; end if" + ) self.assert_fprettify_result([], instring, outstring_exp_default) - self.assert_fprettify_result(['--disable-whitespace'], instring, outstring_exp_nowhitespace) - self.assert_fprettify_result(['--disable-indent'], instring, outstring_exp_noindent) - self.assert_fprettify_result(['--disable-indent', '--disable-whitespace'], instring, instring) + self.assert_fprettify_result( + ["--disable-whitespace"], instring, outstring_exp_nowhitespace + ) + self.assert_fprettify_result( + ["--disable-indent"], instring, outstring_exp_noindent + ) + self.assert_fprettify_result( + ["--disable-indent", "--disable-whitespace"], instring, instring + ) def test_comments(self): """test options related to comments""" - instring = ("TYPE mytype\n! c1\n !c2\n INTEGER :: a ! c3\n" - " REAL :: b, & ! c4\n! c5\n ! c6\n" - " d ! c7\nEND TYPE ! c8") - outstring_exp_default = ("TYPE mytype\n! c1\n !c2\n INTEGER :: a ! c3\n" - " REAL :: b, & ! c4\n ! c5\n ! c6\n" - " d ! c7\nEND TYPE ! c8") - outstring_exp_strip = ("TYPE mytype\n! c1\n !c2\n INTEGER :: a ! c3\n" - " REAL :: b, & ! c4\n ! c5\n ! c6\n" - " d ! c7\nEND TYPE ! c8") + instring = ( + "TYPE mytype\n! c1\n !c2\n INTEGER :: a ! c3\n" + " REAL :: b, & ! c4\n! c5\n ! c6\n" + " d ! c7\nEND TYPE ! c8" + ) + outstring_exp_default = ( + "TYPE mytype\n! c1\n !c2\n INTEGER :: a ! c3\n" + " REAL :: b, & ! c4\n ! c5\n ! c6\n" + " d ! c7\nEND TYPE ! c8" + ) + outstring_exp_strip = ( + "TYPE mytype\n! c1\n !c2\n INTEGER :: a ! c3\n" + " REAL :: b, & ! c4\n ! c5\n ! c6\n" + " d ! c7\nEND TYPE ! c8" + ) self.assert_fprettify_result([], instring, outstring_exp_default) - self.assert_fprettify_result(['--strip-comments'], instring, outstring_exp_strip) + self.assert_fprettify_result( + ["--strip-comments"], instring, outstring_exp_strip + ) def test_directive(self): """ @@ -225,16 +264,18 @@ def test_directive(self): # manual alignment instring = "align_me = [ -1, 10,0, &\n & 0,1000 , 0,&\n &0 , -1, 1]" - outstring_exp = "align_me = [-1, 10, 0, &\n & 0, 1000, 0,&\n &0, -1, 1]" + outstring_exp = ( + "align_me = [-1, 10, 0, &\n & 0, 1000, 0,&\n &0, -1, 1]" + ) self.assert_fprettify_result([], instring, outstring_exp) # inline deactivate - instring2 = '\n'.join(_ + ' !&' for _ in instring.splitlines()) + instring2 = "\n".join(_ + " !&" for _ in instring.splitlines()) outstring_exp = instring2 self.assert_fprettify_result([], instring2, outstring_exp) # block deactivate - instring3 = '!&<\n' + instring + '\n!&>' + instring3 = "!&<\n" + instring + "\n!&>" outstring_exp = instring3 self.assert_fprettify_result([], instring3, outstring_exp) @@ -244,10 +285,8 @@ def assert_fprettify_result(self, args, instring, outstring_exp): outstring_exp """ args.insert(0, RUNSCRIPT) - p1 = subprocess.Popen( - args, stdout=subprocess.PIPE, stdin=subprocess.PIPE) - outstring = p1.communicate(instring.encode( - 'UTF-8'))[0].decode('UTF-8').rstrip() + p1 = subprocess.Popen(args, stdout=subprocess.PIPE, stdin=subprocess.PIPE) + outstring = p1.communicate(instring.encode("UTF-8"))[0].decode("UTF-8").rstrip() self.assertEqual(outstring_exp.rstrip(), outstring) def test_io(self): @@ -260,30 +299,33 @@ def test_io(self): alien_file = "alien_invasion.f90" if os.path.isfile(alien_file): - raise AlienInvasion( - "remove file alien_invasion.f90") # pragma: no cover + raise AlienInvasion("remove file alien_invasion.f90") # pragma: no cover try: - with io.open(alien_file, 'w', encoding='utf-8') as infile: + with io.open(alien_file, "w", encoding="utf-8") as infile: infile.write(instring) # testing stdin --> stdout - p1 = subprocess.Popen(RUNSCRIPT, - stdout=subprocess.PIPE, stdin=subprocess.PIPE) - outstring.append(p1.communicate( - instring.encode('UTF-8'))[0].decode('UTF-8')) + p1 = subprocess.Popen( + RUNSCRIPT, stdout=subprocess.PIPE, stdin=subprocess.PIPE + ) + outstring.append( + p1.communicate(instring.encode("UTF-8"))[0].decode("UTF-8") + ) # testing file --> stdout - p1 = subprocess.Popen([RUNSCRIPT, alien_file, '--stdout'], - stdout=subprocess.PIPE) - outstring.append(p1.communicate( - instring.encode('UTF-8')[0])[0].decode('UTF-8')) + p1 = subprocess.Popen( + [RUNSCRIPT, alien_file, "--stdout"], stdout=subprocess.PIPE + ) + outstring.append( + p1.communicate(instring.encode("UTF-8")[0])[0].decode("UTF-8") + ) # testing file --> file (inplace) p1 = subprocess.Popen([RUNSCRIPT, alien_file]) p1.wait() - with io.open(alien_file, 'r', encoding='utf-8') as infile: + with io.open(alien_file, "r", encoding="utf-8") as infile: outstring.append(infile.read()) for outstr in outstring: @@ -297,114 +339,130 @@ def test_io(self): def test_multi_alias(self): """test for issue #11 (multiple alias and alignment)""" - instring="use A,only:B=>C,&\nD=>E" - outstring="use A, only: B => C, &\n D => E" + instring = "use A,only:B=>C,&\nD=>E" + outstring = "use A, only: B => C, &\n D => E" self.assert_fprettify_result([], instring, outstring) def test_use(self): """test for alignment of use statements""" - instring1="use A,only:B,C,&\nD,E" - instring2="use A,only:&\nB,C,D,E" - outstring1="use A, only: B, C, &\n D, E" - outstring2="use A, only: &\n B, C, D, E" + instring1 = "use A,only:B,C,&\nD,E" + instring2 = "use A,only:&\nB,C,D,E" + outstring1 = "use A, only: B, C, &\n D, E" + outstring2 = "use A, only: &\n B, C, D, E" self.assert_fprettify_result([], instring1, outstring1) self.assert_fprettify_result([], instring2, outstring2) def test_wrongkind(self): """test whitespacing of deprecated kind definition""" - instring = ["REAL*8 :: r, f ! some reals", - "REAL * 8 :: r, f ! some reals", - "INTEGER * 4 :: c, i ! some integers", - "INTEGER*4 :: c, i ! some integers"] - outstring = ["REAL*8 :: r, f ! some reals", - "REAL*8 :: r, f ! some reals", - "INTEGER*4 :: c, i ! some integers", - "INTEGER*4 :: c, i ! some integers"] + instring = [ + "REAL*8 :: r, f ! some reals", + "REAL * 8 :: r, f ! some reals", + "INTEGER * 4 :: c, i ! some integers", + "INTEGER*4 :: c, i ! some integers", + ] + outstring = [ + "REAL*8 :: r, f ! some reals", + "REAL*8 :: r, f ! some reals", + "INTEGER*4 :: c, i ! some integers", + "INTEGER*4 :: c, i ! some integers", + ] for i in range(0, len(instring)): self.assert_fprettify_result([], instring[i], outstring[i]) def test_new_intrinsics(self): """test new I/O intrinsics""" - instring = ["REWIND(12)", - "BACKSPACE(13)", - "INQUIRE(14)"] - outstring = ["REWIND (12)", - "BACKSPACE (13)", - "INQUIRE (14)"] + instring = ["REWIND(12)", "BACKSPACE(13)", "INQUIRE(14)"] + outstring = ["REWIND (12)", "BACKSPACE (13)", "INQUIRE (14)"] for i in range(0, len(instring)): self.assert_fprettify_result([], instring[i], outstring[i]) def test_associate(self): """test correct formatting of associate construct""" - instring = ("associate(a=>b , c =>d ,e=> f )\n" - "e=a+c\n" - "end associate") - outstring = ("associate (a => b, c => d, e => f)\n" - " e = a + c\n" - "end associate") + instring = "associate(a=>b , c =>d ,e=> f )\n" "e=a+c\n" "end associate" + outstring = ( + "associate (a => b, c => d, e => f)\n" " e = a + c\n" "end associate" + ) self.assert_fprettify_result([], instring, outstring) def test_line_length(self): """test line length option""" - instring = ["REAL(KIND=4) :: r,f ! some reals", - "if( min == max.and.min .eq. thres )", - "INQUIRE(14)"] + instring = [ + "REAL(KIND=4) :: r,f ! some reals", + "if( min == max.and.min .eq. thres )", + "INQUIRE(14)", + ] instring_ = "if( min == max.and.min .eq. thres ) one_really_long_function_call_to_hit_the_line_limit(parameter1, parameter2,parameter3,parameter4,parameter5,err) ! this line would be too long" - outstring = ["REAL(KIND=4) :: r, f ! some reals", - "REAL(KIND=4) :: r,f ! some reals", - "if (min == max .and. min .eq. thres)", - "if( min == max.and.min .eq. thres )", - "INQUIRE (14)", - "INQUIRE (14)"] - outstring_ = ["if( min == max.and.min .eq. thres ) one_really_long_function_call_to_hit_the_line_limit(parameter1, parameter2,parameter3,parameter4,parameter5,err) ! this line would be too long", - "if (min == max .and. min .eq. thres) one_really_long_function_call_to_hit_the_line_limit(parameter1, parameter2, parameter3, parameter4, parameter5, err) ! this line would be too long"] + outstring = [ + "REAL(KIND=4) :: r, f ! some reals", + "REAL(KIND=4) :: r,f ! some reals", + "if (min == max .and. min .eq. thres)", + "if( min == max.and.min .eq. thres )", + "INQUIRE (14)", + "INQUIRE (14)", + ] + outstring_ = [ + "if( min == max.and.min .eq. thres ) one_really_long_function_call_to_hit_the_line_limit(parameter1, parameter2,parameter3,parameter4,parameter5,err) ! this line would be too long", + "if (min == max .and. min .eq. thres) one_really_long_function_call_to_hit_the_line_limit(parameter1, parameter2, parameter3, parameter4, parameter5, err) ! this line would be too long", + ] # test shorter lines first, after all the actual length doesn't matter for i in range(0, len(instring)): - self.assert_fprettify_result(['-S'], instring[i], outstring[2*i]) - self.assert_fprettify_result(['-S', '-l 20'], instring[i], outstring[2*i + 1]) + self.assert_fprettify_result(["-S"], instring[i], outstring[2 * i]) + self.assert_fprettify_result( + ["-S", "-l 20"], instring[i], outstring[2 * i + 1] + ) # now test a long line - self.assert_fprettify_result(['-S'], instring_, outstring_[0]) - self.assert_fprettify_result(['-S', '-l 0'], instring_, outstring_[1]) + self.assert_fprettify_result(["-S"], instring_, outstring_[0]) + self.assert_fprettify_result(["-S", "-l 0"], instring_, outstring_[1]) def test_relation_replacement(self): """test replacement of relational statements""" - instring = ["if ( min < max .and. min .lt. thres)", - "if (min > max .and. min .gt. thres )", - "if ( min == max .and. min .eq. thres )", - "if(min /= max .and. min .ne. thres)", - "if(min >= max .and. min .ge. thres )", - "if( min <= max .and. min .le. thres)", - "'==== heading", - "if (vtk%my_rank .eq. 0) write (vtk%filehandle_par, '(\"\",", - "if (abc(1) .lt. -bca .or. &\n qwe .gt. ewq) then"] - f_outstring = ["if (min .lt. max .and. min .lt. thres)", - "if (min .gt. max .and. min .gt. thres)", - "if (min .eq. max .and. min .eq. thres)", - "if (min .ne. max .and. min .ne. thres)", - "if (min .ge. max .and. min .ge. thres)", - "if (min .le. max .and. min .le. thres)", - "'==== heading", - "if (vtk%my_rank .eq. 0) write (vtk%filehandle_par, '(\"\",", - "if (abc(1) .lt. -bca .or. &\n qwe .gt. ewq) then"] - c_outstring = ["if (min < max .and. min < thres)", - "if (min > max .and. min > thres)", - "if (min == max .and. min == thres)", - "if (min /= max .and. min /= thres)", - "if (min >= max .and. min >= thres)", - "if (min <= max .and. min <= thres)", - "'==== heading", - "if (vtk%my_rank == 0) write (vtk%filehandle_par, '(\"\",", - "if (abc(1) < -bca .or. &\n qwe > ewq) then"] + instring = [ + "if ( min < max .and. min .lt. thres)", + "if (min > max .and. min .gt. thres )", + "if ( min == max .and. min .eq. thres )", + "if(min /= max .and. min .ne. thres)", + "if(min >= max .and. min .ge. thres )", + "if( min <= max .and. min .le. thres)", + "'==== heading", + "if (vtk%my_rank .eq. 0) write (vtk%filehandle_par, '(\"",', + "if (abc(1) .lt. -bca .or. &\n qwe .gt. ewq) then", + ] + f_outstring = [ + "if (min .lt. max .and. min .lt. thres)", + "if (min .gt. max .and. min .gt. thres)", + "if (min .eq. max .and. min .eq. thres)", + "if (min .ne. max .and. min .ne. thres)", + "if (min .ge. max .and. min .ge. thres)", + "if (min .le. max .and. min .le. thres)", + "'==== heading", + "if (vtk%my_rank .eq. 0) write (vtk%filehandle_par, '(\"",', + "if (abc(1) .lt. -bca .or. &\n qwe .gt. ewq) then", + ] + c_outstring = [ + "if (min < max .and. min < thres)", + "if (min > max .and. min > thres)", + "if (min == max .and. min == thres)", + "if (min /= max .and. min /= thres)", + "if (min >= max .and. min >= thres)", + "if (min <= max .and. min <= thres)", + "'==== heading", + "if (vtk%my_rank == 0) write (vtk%filehandle_par, '(\"",', + "if (abc(1) < -bca .or. &\n qwe > ewq) then", + ] for i in range(0, len(instring)): - self.assert_fprettify_result(['--enable-replacements', '--c-relations'], instring[i], c_outstring[i]) - self.assert_fprettify_result(['--enable-replacements'], instring[i], f_outstring[i]) + self.assert_fprettify_result( + ["--enable-replacements", "--c-relations"], instring[i], c_outstring[i] + ) + self.assert_fprettify_result( + ["--enable-replacements"], instring[i], f_outstring[i] + ) def test_swap_case(self): """test replacement of keyword character case""" @@ -427,8 +485,8 @@ def test_swap_case(self): "USE ISO_FORTRAN_ENV, ONLY: int64", "INTEGER, INTENT(IN) :: r, i, j, k", "IF (l.EQ.2) l=MAX (l64, 2_int64)", - "PURE SUBROUTINE mypure()" - ) + "PURE SUBROUTINE mypure()", + ) outstring = ( "module exAmple", "integer, parameter :: SELECTED_REAL_KIND = 1*2", @@ -448,11 +506,12 @@ def test_swap_case(self): "use iso_fortran_env, only: INT64", "integer, intent(IN) :: r, i, j, k", "if (l .eq. 2) l = max(l64, 2_INT64)", - "pure subroutine mypure()" - ) + "pure subroutine mypure()", + ) for i in range(len(instring)): - self.assert_fprettify_result(['--case', '1', '1', '1', '2'], - instring[i], outstring[i]) + self.assert_fprettify_result( + ["--case", "1", "1", "1", "2"], instring[i], outstring[i] + ) def test_do(self): """test correct parsing of do statement""" @@ -462,45 +521,51 @@ def test_do(self): def test_omp(self): """test formatting of omp directives""" - instring = ("PROGRAM test_omp\n" - " !$OMP PARALLEL DO\n" - "b=4\n" - "!$a=b\n" - "!$ a=b\n" - " !$ c=b\n" - "!$acc parallel loop\n" - "!$OMP END PARALLEL DO\n" - "END PROGRAM") - outstring = ("PROGRAM test_omp\n" - "!$OMP PARALLEL DO\n" - " b = 4\n" - "!$a=b\n" - "!$ a = b\n" - "!$ c = b\n" - "!$acc parallel loop\n" - "!$OMP END PARALLEL DO\n" - "END PROGRAM") + instring = ( + "PROGRAM test_omp\n" + " !$OMP PARALLEL DO\n" + "b=4\n" + "!$a=b\n" + "!$ a=b\n" + " !$ c=b\n" + "!$acc parallel loop\n" + "!$OMP END PARALLEL DO\n" + "END PROGRAM" + ) + outstring = ( + "PROGRAM test_omp\n" + "!$OMP PARALLEL DO\n" + " b = 4\n" + "!$a=b\n" + "!$ a = b\n" + "!$ c = b\n" + "!$acc parallel loop\n" + "!$OMP END PARALLEL DO\n" + "END PROGRAM" + ) self.assert_fprettify_result([], instring, outstring) def test_ford(self): """test formatting of ford comments""" - instring = (" a = b\n" - " !! ford docu\n" - "b=c\n" - " !! ford docu\n" - "subroutine test(a,b,&\n" - " !! ford docu\n" - " c, d, e)" - ) - outstring = (" a = b\n" - " !! ford docu\n" - " b = c\n" - " !! ford docu\n" - " subroutine test(a, b, &\n" - " !! ford docu\n" - " c, d, e)" - ) + instring = ( + " a = b\n" + " !! ford docu\n" + "b=c\n" + " !! ford docu\n" + "subroutine test(a,b,&\n" + " !! ford docu\n" + " c, d, e)" + ) + outstring = ( + " a = b\n" + " !! ford docu\n" + " b = c\n" + " !! ford docu\n" + " subroutine test(a, b, &\n" + " !! ford docu\n" + " c, d, e)" + ) self.assert_fprettify_result([], instring, outstring) @@ -517,7 +582,7 @@ def test_fypp(self): outstring = [] instring += [ -""" + """ #:if DEBUG> 0 print *, "hola" if( .not. (${cond}$) ) then @@ -528,10 +593,10 @@ def test_fypp(self): end if #:endif """ -] + ] outstring += [ -""" + """ #:if DEBUG> 0 print *, "hola" if (.not. (${cond}$)) then @@ -542,10 +607,10 @@ def test_fypp(self): end if #:endif """ -] + ] instring += [ -""" + """ if (.not. (${cond}$)) then #:for element in list print *, "Element is in list!" @@ -553,10 +618,10 @@ def test_fypp(self): error stop end if """ -] + ] outstring += [ -""" + """ if (.not. (${cond}$)) then #:for element in list print *, "Element is in list!" @@ -564,10 +629,10 @@ def test_fypp(self): error stop end if """ -] + ] instring += [ -""" + """ #:if aa > 1 print *, "Number is more than 1" if (condition) then @@ -577,10 +642,10 @@ def test_fypp(self): end if #:endif """ -] + ] outstring += [ -""" + """ #:if aa > 1 print *, "Number is more than 1" if (condition) then @@ -590,71 +655,70 @@ def test_fypp(self): end if #:endif """ -] + ] instring += [ -""" + """ #:def DEBUG_CODE( code) #:if DEBUG > 0 $:code #:endif #:enddef DEBUG_CODE """ -] + ] outstring += [ -""" + """ #:def DEBUG_CODE( code) #:if DEBUG > 0 $:code #:endif #:enddef DEBUG_CODE """ -] - + ] instring += [ -""" + """ #:block DEBUG_CODE if (a 0 print *, "hola" if (.not. (${cond}$)) then @@ -665,10 +729,10 @@ def test_fypp(self): end if #:endif """ -] + ] outstring += [ -""" + """ #:if DEBUG > 0 print *, "hola" if (.not. (${cond}$)) then @@ -679,10 +743,10 @@ def test_fypp(self): end if #:endif """ -] + ] instring += [ -""" + """ program try #:def mydef a = & @@ -695,10 +759,10 @@ def test_fypp(self): #:enddef end program """ -] + ] outstring += [ -""" + """ program try #:def mydef a = & @@ -711,10 +775,10 @@ def test_fypp(self): #:enddef end program """ -] + ] instring += [ -""" + """ #:if worktype ${worktype}$, & #:else @@ -723,10 +787,10 @@ def test_fypp(self): DIMENSION(${arr_exp}$), & POINTER :: work """ -] + ] outstring += [ -""" + """ #:if worktype ${worktype}$, & #:else @@ -735,9 +799,7 @@ def test_fypp(self): DIMENSION(${arr_exp}$), & POINTER :: work """ -] - - + ] for instr, outstr in zip(instring, outstring): self.assert_fprettify_result([], instr, outstr) @@ -756,8 +818,12 @@ def test_mod(self): self.assert_fprettify_result([], instring_mod, outstring_mod) self.assert_fprettify_result([], instring_prog, outstring_prog) - self.assert_fprettify_result(['--disable-indent-mod'], instring_mod, outstring_mod_disable) - self.assert_fprettify_result(['--disable-indent-mod'], instring_prog, outstring_prog_disable) + self.assert_fprettify_result( + ["--disable-indent-mod"], instring_mod, outstring_mod_disable + ) + self.assert_fprettify_result( + ["--disable-indent-mod"], instring_prog, outstring_prog_disable + ) def test_decl(self): """test formatting of declarations""" @@ -769,9 +835,11 @@ def test_decl(self): self.assert_fprettify_result([], instring_1, instring_1) self.assert_fprettify_result([], instring_2, instring_2) - self.assert_fprettify_result(['--enable-decl'], instring_1, outstring_1) - self.assert_fprettify_result(['--enable-decl'], instring_2, outstring_2) - self.assert_fprettify_result(['--enable-decl', '--whitespace-decl=0'], instring_2, outstring_2_min) + self.assert_fprettify_result(["--enable-decl"], instring_1, outstring_1) + self.assert_fprettify_result(["--enable-decl"], instring_2, outstring_2) + self.assert_fprettify_result( + ["--enable-decl", "--whitespace-decl=0"], instring_2, outstring_2_min + ) def test_statement_label(self): instring = "1003 FORMAT(2(1x, i4), 5x, '-', 5x, '-', 3x, '-', 5x, '-', 5x, '-', 8x, '-', 3x, &\n 1p, 2(1x, d10.3))" @@ -788,7 +856,7 @@ def test_multiline_str(self): outstring = [] instring += [ -''' + """ CHARACTER(len=*), PARAMETER :: serialized_string = & "qtb_rng_gaussian 1 F T F 0.0000000000000000E+00& 12.0 12.0 12.0& @@ -797,11 +865,11 @@ def test_multiline_str(self): 12.0 12.0 12.0& 12.0 12.0 12.0& 12.0 12.0 12.0" -''' -] +""" + ] outstring += [ -''' + """ CHARACTER(len=*), PARAMETER :: serialized_string = & "qtb_rng_gaussian 1 F T F 0.0000000000000000E+00& & 12.0 12.0 12.0& @@ -810,11 +878,11 @@ def test_multiline_str(self): & 12.0 12.0 12.0& & 12.0 12.0 12.0& & 12.0 12.0 12.0" -''' -] +""" + ] instring += [ -''' + """ CHARACTER(len=*), PARAMETER :: serialized_string = & "qtb_rng_gaussian 1 F T F 0.0000000000000000E+00& & 12.0 12.0 12.0& @@ -823,11 +891,11 @@ def test_multiline_str(self): & 12.0 12.0 12.0& & 12.0 12.0 12.0& & 12.0 12.0 12.0" -''' -] +""" + ] outstring += [ -''' + """ CHARACTER(len=*), PARAMETER :: serialized_string = & "qtb_rng_gaussian 1 F T F 0.0000000000000000E+00& & 12.0 12.0 12.0& @@ -836,15 +904,14 @@ def test_multiline_str(self): & 12.0 12.0 12.0& & 12.0 12.0 12.0& & 12.0 12.0 12.0" -''' -] +""" + ] for instr, outstr in zip(instring, outstring): self.assert_fprettify_result([], instr, outstr) def test_label(self): - instring = \ -""" + instring = """ MODULE cp_lbfgs CONTAINS 20000 FORMAT('RUNNING THE L-BFGS-B CODE', /, /, & @@ -862,8 +929,7 @@ def test_label(self): END MODULE """ - outstring = \ -""" + outstring = """ MODULE cp_lbfgs CONTAINS 20000 FORMAT('RUNNING THE L-BFGS-B CODE', /, /, & @@ -884,7 +950,6 @@ def test_label(self): self.assert_fprettify_result([], instring, outstring) - def addtestmethod(testcase, fpath, ffile): """add a test method for each example.""" @@ -905,29 +970,29 @@ def testmethod(testcase): def test_result(path, info): return [os.path.relpath(path, BEFORE_DIR), info] - with io.open(example_before, 'r', encoding='utf-8') as infile: + with io.open(example_before, "r", encoding="utf-8") as infile: outstring = io.StringIO() try: - fprettify.reformat_ffile(infile, outstring) + reformat_ffile(infile, outstring) m = hashlib.sha256() - m.update(outstring.getvalue().encode('utf-8')) + m.update(outstring.getvalue().encode("utf-8")) test_info = "checksum" test_content = test_result(example_before, m.hexdigest()) - with io.open(example_after, 'w', encoding='utf-8') as outfile: + with io.open(example_after, "w", encoding="utf-8") as outfile: outfile.write(outstring.getvalue()) FPrettifyTestCase.n_success += 1 except FprettifyParseException as e: test_info = "parse error" - fprettify.log_exception(e, test_info) + log_exception(e, test_info) test_content = test_result(example_before, test_info) FPrettifyTestCase.n_parsefail += 1 except FprettifyInternalException as e: test_info = "internal error" - fprettify.log_exception(e, test_info) + log_exception(e, test_info) test_content = test_result(example_before, test_info) FPrettifyTestCase.n_internalfail += 1 except: # pragma: no cover @@ -936,38 +1001,52 @@ def test_result(path, info): after_exists = os.path.isfile(example_after) if after_exists: - with io.open(example_before, 'r', encoding='utf-8') as infile: + with io.open(example_before, "r", encoding="utf-8") as infile: before_content = infile.read() before_nosp = re.sub( - r'\n{3,}', r'\n\n', before_content.lower().replace(' ', '').replace('\t', '')) + r"\n{3,}", + r"\n\n", + before_content.lower().replace(" ", "").replace("\t", ""), + ) - with io.open(example_after, 'r', encoding='utf-8') as outfile: + with io.open(example_after, "r", encoding="utf-8") as outfile: after_content = outfile.read() - after_nosp = after_content.lower().replace(' ', '') + after_nosp = after_content.lower().replace(" ", "") testcase.assertMultiLineEqual(before_nosp, after_nosp) - sep_str = ' : ' - with io.open(RESULT_FILE, 'r', encoding='utf-8') as infile: + sep_str = " : " + with io.open(RESULT_FILE, "r", encoding="utf-8") as infile: found = False for line in infile: line_content = line.strip().split(sep_str) if line_content[0] == test_content[0]: found = True eprint(test_info, end=" ") - msg = '{} (old) != {} (new)'.format( - line_content[1], test_content[1]) - if test_info == "checksum" and after_exists and after_content.count('\n') < 10000: + msg = "{} (old) != {} (new)".format( + line_content[1], test_content[1] + ) + if ( + test_info == "checksum" + and after_exists + and after_content.count("\n") < 10000 + ): # difflib can not handle large files - result = list(difflib.unified_diff(before_content.splitlines( - True), after_content.splitlines(True), fromfile=test_content[0], tofile=line_content[0])) - msg += '\n' + ''.join(result) + result = list( + difflib.unified_diff( + before_content.splitlines(True), + after_content.splitlines(True), + fromfile=test_content[0], + tofile=line_content[0], + ) + ) + msg += "\n" + "".join(result) try: - testcase.assertEqual( - line_content[1], test_content[1], msg) + testcase.assertEqual(line_content[1], test_content[1], msg) except AssertionError: # pragma: no cover FPrettifyTestCase.write_result( - FAILED_FILE, test_content, sep_str) + FAILED_FILE, test_content, sep_str + ) raise break @@ -977,10 +1056,11 @@ def test_result(path, info): # not sure why this even works, using "test something" (with a space) as function name... # however it gives optimal test output - testmethod.__name__ = ("test " + joinpath(fpath, ffile)) + testmethod.__name__ = "test " + joinpath(fpath, ffile) setattr(testcase, testmethod.__name__, testmethod) + # make sure all directories exist if not os.path.exists(BEFORE_DIR): # pragma: no cover os.makedirs(BEFORE_DIR) @@ -989,13 +1069,15 @@ def test_result(path, info): if not os.path.exists(RESULT_DIR): # pragma: no cover os.makedirs(RESULT_DIR) if not os.path.exists(RESULT_FILE): # pragma: no cover - io.open(RESULT_FILE, 'w', encoding='utf-8').close() + io.open(RESULT_FILE, "w", encoding="utf-8").close() if os.path.exists(FAILED_FILE): # pragma: no cover # erase failures from previous testers - io.open(FAILED_FILE, 'w', encoding='utf-8').close() + io.open(FAILED_FILE, "w", encoding="utf-8").close() # this prepares FPrettifyTestCase class when module is loaded by unittest for dirpath, _, filenames in os.walk(BEFORE_DIR): - for example in [f for f in filenames if any(f.endswith(_) for _ in fprettify.FORTRAN_EXTENSIONS)]: + for example in [ + f for f in filenames if any(f.endswith(_) for _ in FORTRAN_EXTENSIONS) + ]: rel_dirpath = os.path.relpath(dirpath, start=BEFORE_DIR) addtestmethod(FPrettifyTestCase, rel_dirpath, example) diff --git a/fprettify/utils.py b/fprettify/utils.py new file mode 100644 index 0000000..4a6d44b --- /dev/null +++ b/fprettify/utils.py @@ -0,0 +1,372 @@ +import errno +import logging +import os +import sys +from difflib import unified_diff +from itertools import chain +from os import PathLike +from pathlib import Path +from typing import Iterable, List, Optional, Union + +import configargparse as argparse + +from fprettify.constants import FORTRAN_EXTENSIONS + + +def build_ws_dict(args): + # todo: do we need this? + ws_dict = {} + ws_dict["comma"] = args.whitespace_comma + ws_dict["assignments"] = args.whitespace_assignment + ws_dict["decl"] = args.whitespace_decl + ws_dict["relational"] = args.whitespace_relational + ws_dict["logical"] = args.whitespace_logical + ws_dict["plusminus"] = args.whitespace_plusminus + ws_dict["multdiv"] = args.whitespace_multdiv + ws_dict["print"] = args.whitespace_print + ws_dict["type"] = args.whitespace_type + ws_dict["intrinsics"] = args.whitespace_intrinsics + return ws_dict + + +def get_parser_args(): + return { + "description": "Auto-format modern Fortran source files. Config files ('.fprettify.rc') in the home (~) directory and any such files located in parent directories of the input file will be used. When the standard input is used, the search is started from the current directory.", + "formatter_class": argparse.ArgumentDefaultsHelpFormatter, + "args_for_setting_config_path": ["-c", "--config-file"], + } + + +def get_arg_parser(args=None): + """Create the argument parse for the command line interface.""" + parser = argparse.ArgumentParser(**args) + + parser.add_argument( + "-i", "--indent", type=int, default=3, help="relative indentation width" + ) + parser.add_argument( + "-l", + "--line-length", + type=int, + default=132, + help="column after which a line should end, viz. -ffree-line-length-n for GCC", + ) + parser.add_argument( + "-w", + "--whitespace", + type=int, + choices=range(0, 5), + default=2, + help="Presets for the amount of whitespace - " + " 0: minimal whitespace" + " | 1: operators (except arithmetic), print/read" + " | 2: operators, print/read, plus/minus" + " | 3: operators, print/read, plus/minus, muliply/divide" + " | 4: operators, print/read, plus/minus, muliply/divide, type component selector", + ) + parser.add_argument( + "--whitespace-comma", + type=str2bool, + nargs="?", + default="None", + const=True, + help="boolean, en-/disable whitespace for comma/semicolons", + ) + parser.add_argument( + "--whitespace-assignment", + type=str2bool, + nargs="?", + default="None", + const=True, + help="boolean, en-/disable whitespace for assignments", + ) + parser.add_argument( + "--whitespace-decl", + type=str2bool, + nargs="?", + default="None", + const=True, + help="boolean, en-/disable whitespace for declarations (requires '--enable-decl')", + ) + parser.add_argument( + "--whitespace-relational", + type=str2bool, + nargs="?", + default="None", + const=True, + help="boolean, en-/disable whitespace for relational operators", + ) + parser.add_argument( + "--whitespace-logical", + type=str2bool, + nargs="?", + default="None", + const=True, + help="boolean, en-/disable whitespace for logical operators", + ) + parser.add_argument( + "--whitespace-plusminus", + type=str2bool, + nargs="?", + default="None", + const=True, + help="boolean, en-/disable whitespace for plus/minus arithmetic", + ) + parser.add_argument( + "--whitespace-multdiv", + type=str2bool, + nargs="?", + default="None", + const=True, + help="boolean, en-/disable whitespace for multiply/divide arithmetic", + ) + parser.add_argument( + "--whitespace-print", + type=str2bool, + nargs="?", + default="None", + const=True, + help="boolean, en-/disable whitespace for print/read statements", + ) + parser.add_argument( + "--whitespace-type", + type=str2bool, + nargs="?", + default="None", + const=True, + help="boolean, en-/disable whitespace for select type components", + ) + parser.add_argument( + "--whitespace-intrinsics", + type=str2bool, + nargs="?", + default="None", + const=True, + help="boolean, en-/disable whitespace for intrinsics like if/write/close", + ) + parser.add_argument( + "--strict-indent", + action="store_true", + default=False, + help="strictly impose indentation even for nested loops", + ) + parser.add_argument( + "--enable-decl", + action="store_true", + default=False, + help="enable whitespace formatting of declarations ('::' operator).", + ) + parser.add_argument( + "--disable-indent", + action="store_true", + default=False, + help="don't impose indentation", + ) + parser.add_argument( + "--disable-whitespace", + action="store_true", + default=False, + help="don't impose whitespace formatting", + ) + parser.add_argument( + "--enable-replacements", + action="store_true", + default=False, + help="replace relational operators (e.g. '.lt.' <--> '<')", + ) + parser.add_argument( + "--c-relations", + action="store_true", + default=False, + help="C-style relational operators ('<', '<=', ...)", + ) + parser.add_argument( + "--case", + nargs=4, + default=[0, 0, 0, 0], + type=int, + help="Enable letter case formatting of intrinsics by specifying which of " + "keywords, procedures/modules, operators and constants (in this order) should be lowercased or uppercased - " + " 0: do nothing" + " | 1: lowercase" + " | 2: uppercase", + ) + + parser.add_argument( + "--strip-comments", + action="store_true", + default=False, + help="strip whitespaces before comments", + ) + parser.add_argument( + "--disable-fypp", + action="store_true", + default=False, + help="Disables the indentation of fypp preprocessor blocks.", + ) + parser.add_argument( + "--disable-indent-mod", + action="store_true", + default=False, + help="Disables the indentation after module / program.", + ) + + parser.add_argument( + "-d", + "--diff", + action="store_true", + default=False, + help="Write file differences to stdout instead of formatting inplace", + ) + parser.add_argument( + "-s", + "--stdout", + action="store_true", + default=False, + help="Write to stdout instead of formatting inplace", + ) + + group = parser.add_mutually_exclusive_group() + group.add_argument( + "-S", + "--silent", + "--no-report-errors", + action="store_true", + default=False, + help="Don't write any errors or warnings to stderr", + ) + group.add_argument( + "-D", "--debug", action="store_true", default=False, help=argparse.SUPPRESS + ) + parser.add_argument( + "path", + type=str, + nargs="*", + help="Paths to files to be formatted inplace. If no paths are given, stdin (-) is used by default. Path can be a directory if --recursive is used.", + default=["-"], + ) + parser.add_argument( + "-r", + "--recursive", + action="store_true", + default=False, + help="Recursively auto-format all Fortran files in subdirectories of specified path; recognized filename extensions: {}".format( + ", ".join(FORTRAN_EXTENSIONS) + ), + ) + parser.add_argument( + "-e", + "--exclude", + action="append", + default=[], + type=str, + help="File or directory patterns to be excluded when searching for Fortran files to format", + ) + parser.add_argument( + "-f", + "--fortran", + type=str, + action="append", + default=[], + help="Overrides default fortran extensions recognized by --recursive. Repeat this option to specify more than one extension.", + ) + parser.add_argument("--version", action="version", version="%(prog)s 0.3.7") + return parser + + +def get_config_files(path: Union[str, PathLike]) -> List[Path]: + """Find configuration files in or above the given path.""" + files = [] + path = Path(path).expanduser().absolute() + if not path.exists(): + raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), str(path)) + parent = path if path.is_dir() else path.parent + while True: + file = parent / ".fprettify.rc" + if file.is_file(): + files.insert(0, file) + parent = parent.parent + if parent == parent: + break + parent = parent + return files + + +def glob(path, recursive=False, *patterns) -> Iterable[Path]: + """ + Find pathnames under the given path, optionally recursively, + matching any of the given patterns. + """ + if recursive: + return iter(chain.from_iterable(path.rglob(p) for p in patterns)) + else: + return iter(chain.from_iterable(path.glob(p) for p in patterns)) + + +def prune_excluded( + files: Iterable[Path], exclude: Optional[Iterable[Union[str, PathLike]]] +) -> Iterable[Path]: + if any(exclude): + exclude = [Path(e).expanduser().absolute() for e in exclude] + else: + exclude = [] + for f in files: + if any(f.samefile(e) or f.is_relative_to(e) for e in exclude): + continue + yield f + + +def set_logger(level): + """setup custom logger""" + logger = logging.getLogger("fprettify-logger") + logger.setLevel(level) + stream_handler = logging.StreamHandler() + stream_handler.setLevel(level) + formatter = logging.Formatter( + "%(levelname)s: File %(ffilename)s, line %(fline)s\n %(message)s" + ) + stream_handler.setFormatter(formatter) + logger.addHandler(stream_handler) + + +def show_diff(buffer, fname): + i = open(fname).readlines() + o = buffer.readlines() + print("".join(unified_diff(i, o, fname, fname, n=5)), file=sys.stdout) + + +def diff(a, b, a_name, b_name): + # type: (str, str, str, str) -> str + + """Return a unified diff string between strings `a` and `b`.""" + import difflib + + a_lines = [line + "\n" for line in a.splitlines()] + b_lines = [line + "\n" for line in b.splitlines()] + return "".join( + difflib.unified_diff(a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5) + ) + + +def str2bool(str) -> Optional[bool]: + """Convert the given string to a boolean.""" + if str.lower() in ("yes", "true", "t", "y", "1"): + return True + elif str.lower() in ("no", "false", "f", "n", "0"): + return False + else: + return None + + +def log_message(message, level, filename, line_nr): + """log a message""" + + logger = logging.getLogger("fprettify-logger") + logger_d = {"ffilename": filename, "fline": line_nr} + logger_to_use = getattr(logger, level) + logger_to_use(message, extra=logger_d) + + +def log_exception(e, message): + """log an exception and a message""" + log_message(message, "exception", e.filename, e.line_nr) From 4450d4aefd4cac2ecbfadafc9953835777481406 Mon Sep 17 00:00:00 2001 From: wpbonelli Date: Sun, 10 Mar 2024 08:44:52 -0400 Subject: [PATCH 2/2] group fmt fns by name/purpose, consistent fn names --- fprettify/__init__.py | 74 +- fprettify/formatter.py | 1443 ++++++++++++++++++----------------- fprettify/parser.py | 112 ++- fprettify/preprocessor.py | 73 -- fprettify/tests/__init__.py | 4 +- setup.cfg | 2 +- 6 files changed, 838 insertions(+), 870 deletions(-) delete mode 100644 fprettify/preprocessor.py diff --git a/fprettify/__init__.py b/fprettify/__init__.py index 8812d1e..df1452a 100644 --- a/fprettify/__init__.py +++ b/fprettify/__init__.py @@ -72,7 +72,7 @@ from fprettify.constants import * from fprettify.exceptions import * -from fprettify.formatter import reformat_inplace +from fprettify.formatter import reformat from fprettify.utils import ( build_ws_dict, get_arg_parser, @@ -119,16 +119,12 @@ def run(argv=sys.argv): # pragma: no cover if not args.recursive: filenames = [directory] else: - if args.fortran: - ext = args.fortran - else: - ext = FORTRAN_EXTENSIONS + ext = args.fortran if args.fortran else FORTRAN_EXTENSIONS filenames = [] from fnmatch import fnmatch for dirpath, dirnames, files in os.walk(directory, topdown=True): - # Prune excluded patterns from list of child directories dirnames[:] = [ dirname @@ -156,7 +152,6 @@ def run(argv=sys.argv): # pragma: no cover filenames.append(ffile) for filename in filenames: - # reparse arguments using the file's list of config files filearguments = parser_args filearguments["default_config_files"] = ["~/.fprettify.rc"] + list( @@ -167,7 +162,8 @@ def run(argv=sys.argv): # pragma: no cover file_argparser = get_arg_parser(filearguments) file_args = file_argparser.parse_args(argv[1:]) ws_dict = build_ws_dict(file_args) - + stdout = file_args.stdout or directory == "-" + diffonly = file_args.diff case_dict = { "keywords": file_args.case[0], "procedures": file_args.case[1], @@ -175,23 +171,28 @@ def run(argv=sys.argv): # pragma: no cover "constants": file_args.case[3], } - stdout = file_args.stdout or directory == "-" - diffonly = file_args.diff - - if file_args.debug: - level = logging.DEBUG - elif args.silent: - level = logging.CRITICAL - else: - level = logging.WARNING - - set_logger(level) + set_logger( + logging.DEBUG + if file_args.debug + else logging.CRITICAL if args.silent else logging.WARNING + ) try: - reformat_inplace( - filename, - stdout=stdout, - diffonly=diffonly, + # transfer input stream or file to an in-memory buffer + if filename == "-": + ib = io.StringIO() + ib.write(sys.stdin.read()) + else: + ib = io.open(filename, "r", encoding="utf-8") + + # buffer for reformatted output + ob = io.StringIO() + + # reformat the input buffer + reformat( + ib, + ob, + orig_filename=filename, impose_indent=not file_args.disable_indent, indent_size=file_args.indent, strict_indent=file_args.strict_indent, @@ -209,6 +210,33 @@ def run(argv=sys.argv): # pragma: no cover indent_fypp=not file_args.disable_fypp, indent_mod=not file_args.disable_indent_mod, ) + + # if in diff mode, just write the diff to stdout + if diffonly: + from fprettify.utils import diff + + ib.seek(0) + ob.seek(0) + sys.stdout.write(diff(ib.read(), ob.read(), filename, filename)) + else: + # otherwise write reformatted content to the selected output + if stdout: + sys.stdout.write(ob.getvalue()) + return + + # write to output file only if content has changed + import hashlib + + output = ob.getvalue() + hash_new = hashlib.md5() + hash_new.update(output.encode("utf-8")) + hash_old = hashlib.md5() + with io.open(filename, "r", encoding="utf-8") as f: + hash_old.update(f.read().encode("utf-8")) + if hash_new.digest() == hash_old.digest(): + return + with io.open(filename, "w", encoding="utf-8") as f: + f.write(output) except FprettifyException as e: log_exception(e, "Fatal error occured") sys.exit(1) diff --git a/fprettify/formatter.py b/fprettify/formatter.py index d790982..9773dff 100644 --- a/fprettify/formatter.py +++ b/fprettify/formatter.py @@ -1,402 +1,214 @@ import io -import sys from fprettify.constants import * from fprettify.exceptions import FprettifyInternalException from fprettify.indenter import F90Indenter from fprettify.parser import * -from fprettify.preprocessor import preprocess_labels, preprocess_line, preprocess_omp -from fprettify.utils import diff, log_message +from fprettify.utils import log_message -def inspect_ffile_format( - infile, indent_size, strict_indent, indent_fypp=False, orig_filename=None -): - """ - Determine indentation by inspecting original Fortran file. +def get_linebreak_position(lines, filter_fypp=True): + """extract linebreak positions in Fortran line from lines""" + linebreak_pos = [] + if filter_fypp: + notfortran_re = NOTFORTRAN_LINE_RE + else: + notfortran_re = NOTFORTRAN_FYPP_LINE_RE - This is mainly for finding aligned blocks of DO/IF statements. - Also check if it has f77 constructs. - :param infile: open file - :param indent_size: the default indent size - :orig_filename: filename used for messages - :returns: [ target indent sizes for each line, - indent of first line (offset) ] - """ - if not orig_filename: - orig_filename = infile.name + for line in lines: + found = None + for char_pos, _ in CharFilter(line, filter_strings=False): + if re.match(LINEBREAK_STR, line[char_pos:], RE_FLAGS): + found = char_pos + if found: + linebreak_pos.append(found) + elif notfortran_re.search(line.lstrip(" ")): + linebreak_pos.append(0) - indents = [] - stream = InputStream( - infile, filter_fypp=not indent_fypp, orig_filename=orig_filename - ) - prev_offset = 0 - first_indent = -1 - has_fypp = False + linebreak_pos = [ + sum(linebreak_pos[0 : _ + 1]) - 1 for _ in range(0, len(linebreak_pos)) + ] - while 1: - f_line, _, lines = stream.next_fortran_line() - if not lines: - break + return linebreak_pos - if FYPP_LINE_RE.search(f_line): - has_fypp = True - f_line, lines, _ = preprocess_labels(f_line, lines) +def get_manual_alignment(lines): + """extract manual indents for line continuations from line""" + manual_lines_indent = [len(l) - len(l.lstrip(" ").lstrip("&")) for l in lines] + manual_lines_indent = [ind - manual_lines_indent[0] for ind in manual_lines_indent] + return manual_lines_indent - offset = len(lines[0]) - len(lines[0].lstrip(" ")) - if f_line.strip() and first_indent == -1: - first_indent = offset - indents.append(offset - prev_offset) - # don't impose indentation for blocked do/if constructs: - if IF_RE.search(f_line) or DO_RE.search(f_line): - if prev_offset != offset or strict_indent: - indents[-1] = indent_size - else: - indents[-1] = indent_size +def preprocess_omp(f_line, lines): + """convert omp conditional to normal fortran""" - prev_offset = offset + is_omp_conditional = bool(OMP_COND_RE.search(f_line)) + if is_omp_conditional: + f_line = OMP_COND_RE.sub(" ", f_line, count=1) + lines = [OMP_COND_RE.sub(" ", l, count=1) for l in lines] - return indents, first_indent, has_fypp + return [f_line, lines, is_omp_conditional] -def replace_relational_single_fline(f_line, cstyle): - """ - format a single Fortran line - replaces scalar relational - operators in logical expressions to either Fortran or C-style. - .lt. <--> < - .le. <--> <= - .gt. <--> > - .ge. <--> >= - .eq. <--> == - .ne. <--> /= - """ +def preprocess_labels(f_line, lines): + """remove statement labels""" - new_line = f_line + match = STATEMENT_LABEL_RE.search(f_line) + if match: + label = match.group(1) + else: + label = "" - # only act on lines that do contain a relation - if REL_OP_RE.search(f_line): - # check that relation is not inside quotes, a string, or commented - # (think of underlining a heading with === or things like markup being printed which we do not replace) - pos_prev = -1 - pos = -1 - line_parts = [""] - for pos, char in CharFilter(f_line): - if pos > pos_prev + 1: # skipped string - line_parts.append(f_line[pos_prev + 1 : pos].strip()) # append string - line_parts.append("") + if label: + f_line = STATEMENT_LABEL_RE.sub(len(label) * " ", f_line, count=1) + lines[0] = STATEMENT_LABEL_RE.sub(len(label) * " ", lines[0], count=1) - line_parts[-1] += char + return [f_line, lines, label] - pos_prev = pos - if pos + 1 < len(f_line): - line_parts.append(f_line[pos + 1 :]) +def preprocess_line(f_line, lines, comments, filename, line_nr, indent_fypp): + """preprocess lines: identification and formatting of special cases""" + is_blank = False + prev_indent = False + do_format = False - for pos, part in enumerate(line_parts): - # exclude comments, strings: - if not STR_OPEN_RE.match(part): - # also exclude / if we see a namelist and data statement - if cstyle: - part = re.sub(r"\.LT\.", "< ", part, flags=RE_FLAGS) - part = re.sub(r"\.LE\.", "<= ", part, flags=RE_FLAGS) - part = re.sub(r"\.GT\.", "> ", part, flags=RE_FLAGS) - part = re.sub(r"\.GE\.", ">= ", part, flags=RE_FLAGS) - part = re.sub(r"\.EQ\.", "== ", part, flags=RE_FLAGS) - part = re.sub(r"\.NE\.", "/= ", part, flags=RE_FLAGS) - else: - part = re.sub(r"<=", ".le.", part, flags=RE_FLAGS) - part = re.sub(r"<", ".lt.", part, flags=RE_FLAGS) - part = re.sub(r">=", ".ge.", part, flags=RE_FLAGS) - part = re.sub(r">", ".gt.", part, flags=RE_FLAGS) - part = re.sub(r"==", ".eq.", part, flags=RE_FLAGS) - part = re.sub(r"\/=", ".ne.", part, flags=RE_FLAGS) + # is_special: special directives that should not be treated as Fortran + # currently supported: fypp preprocessor directives or comments for FORD documentation + is_special = [False] * len(lines) - line_parts[pos] = part + for pos, line in enumerate(lines): + line_strip = line.lstrip() + if indent_fypp: + is_special[pos] = line_strip.startswith("!!") or ( + FYPP_LINE_RE.search(line_strip) if pos > 0 else False + ) + else: + is_special[pos] = FYPP_LINE_RE.search(line_strip) or line_strip.startswith( + "!!" + ) - new_line = "".join(line_parts) + # if first line is special, all lines should be special + if is_special[0]: + is_special = [True] * len(lines) - return new_line + if EMPTY_RE.search(f_line): # empty lines including comment lines + if any(comments): + if lines[0].startswith(" ") and not OMP_DIR_RE.search(lines[0]): + # indent comment lines only if they were not indented before. + prev_indent = True + else: + is_blank = True + lines = [l.strip(" ") if not is_special[n] else l for n, l in enumerate(lines)] + else: + do_format = True + return [lines, do_format, prev_indent, is_blank, is_special] -def replace_keywords_single_fline(f_line, case_dict): - """ - format a single Fortran line - change case of keywords - """ - new_line = f_line +def pass_defaults_to_next_line(f_line): + """defaults to be transferred from f_line to next line""" + if re.search(r";\s*$", f_line, RE_FLAGS): + # if line ended with semicolon, don't indent next line + do_indent = False + use_same_line = True + else: + do_indent = True + use_same_line = False - # Collect words list - pos_prev = -1 - pos = -1 - line_parts = [""] - for pos, char in CharFilter(f_line): - if pos > pos_prev + 1: # skipped string - line_parts.append(f_line[pos_prev + 1 : pos].strip()) # append string - line_parts.append("") + return [do_indent, use_same_line] - line_parts[-1] += char - pos_prev = pos +def add_whitespace_charwise(line, spacey, scope_parser, format_decl, filename, line_nr): + """add whitespace character wise (no need for context aware parsing)""" + line_ftd = line + pos_eq = [] + end_of_delim = -1 + level = 0 + for pos, char in CharFilter(line): + # offset w.r.t. unformatted line + offset = len(line_ftd) - len(line) - if pos + 1 < len(f_line): - line_parts.append(f_line[pos + 1 :]) + # format delimiters + what_del_open = None + what_del_close = None + if pos > end_of_delim: + [what_del_open, what_del_close] = get_curr_delim(line, pos) - line_parts = [ - [a] if STR_OPEN_RE.match(a) else re.split(F90_OPERATORS_RE, a) - for a in line_parts - ] # problem, split "." - line_parts = [b for a in line_parts for b in a] + if what_del_open or what_del_close: + sep1 = 0 + sep2 = 0 - ## line_parts = [[a] if STR_OPEN_RE.match(a) else re.split('(\W)',a) - ## for a in line_parts] # problem, split "." - line_parts = [ - [a] if STR_OPEN_RE.match(a) else re.split("([^a-zA-Z0-9_.])", a) - for a in line_parts - ] - line_parts = [b for a in line_parts for b in a] + if what_del_open: + delim = what_del_open.group() + else: + delim = what_del_close.group() - swapcase = lambda s, a: s if a == 0 else (s.lower() if a == 1 else s.upper()) + lhs = line_ftd[: pos + offset] + rhs = line_ftd[pos + len(delim) + offset :] - nbparts = len(line_parts) - for pos, part in enumerate(line_parts): - # exclude comments, strings: - if part.strip() and not STR_OPEN_RE.match(part): - if F90_KEYWORDS_RE.match(part): - part = swapcase(part, case_dict["keywords"]) - elif F90_MODULES_RE.match(part): - part = swapcase(part, case_dict["procedures"]) - elif F90_PROCEDURES_RE.match(part): - ok = False - for pos2 in range(pos + 1, nbparts): - part2 = line_parts[pos2] - if part2.strip() and not ( - part2 == "\n" or STR_OPEN_RE.match(part2) - ): - ok = part2 == "(" - break - if ok: - part = swapcase(part, case_dict["procedures"]) - elif F90_OPERATORS_RE.match(part): - part = swapcase(part, case_dict["operators"]) - elif F90_CONSTANTS_RE.match(part): - part = swapcase(part, case_dict["constants"]) - elif F90_CONSTANTS_TYPES_RE.match(part): - part = swapcase(part, case_dict["constants"]) - elif F90_NUMBER_ALL_REC.match(part): - part = swapcase(part, case_dict["constants"]) + # format opening delimiters + if what_del_open: + level += 1 # new scope + # add separating whitespace before opening delimiter + # with some exceptions: + # FIXME: duplication of regex, better to include them into + # INTR_STMTS_PAR + if ( + ( + not re.search( + (r"(" + DEL_OPEN_STR + r"|[\w\*/=\+\-:])\s*$"), + line[:pos], + RE_FLAGS, + ) + and not EMPTY_RE.search(line[:pos]) + ) + or re.search( + SOL_STR + r"(\w+\s*:)?(ELSE)?\s*IF\s*$", line[:pos], RE_FLAGS + ) + or re.search( + SOL_STR + r"(\w+\s*:)?\s*DO\s+WHILE\s*$", line[:pos], RE_FLAGS + ) + or re.search( + SOL_STR + r"(SELECT)?\s*CASE\s*$", line[:pos], RE_FLAGS + ) + or re.search( + SOL_STR + r"(SELECT)?\s*RANK\s*$", line[:pos], RE_FLAGS + ) + or re.search(SOL_STR + r"SELECT\s*TYPE\s*$", line[:pos], RE_FLAGS) + or re.search(SOL_STR + r"CLASS\s*DEFAULT\s*$", line[:pos], RE_FLAGS) + or re.search( + SOL_STR + r"(TYPE|CLASS)\s+IS\s*$", line[:pos], RE_FLAGS + ) + or re.search( + r"(? 0: + level += -1 # close scope + else: + log_message( + "unpaired bracket delimiters", "info", filename, line_nr + ) - new_line = "".join(line_parts) + # add separating whitespace after closing delimiter + # with some exceptions: + if not re.search( + r"^\s*(" + DEL_CLOSE_STR + r"|[,%:/\*])", line[pos + 1 :], RE_FLAGS + ): + sep2 = 1 + elif re.search(r"^\s*::", line[pos + 1 :], RE_FLAGS): + sep2 = len(rhs) - len(rhs.lstrip(" ")) if not format_decl else 1 - return new_line + # where delimiter token ends + end_of_delim = pos + len(delim) - 1 - -def format_single_fline( - f_line, - whitespace, - whitespace_dict, - linebreak_pos, - ampersand_sep, - scope_parser, - format_decl, - filename, - line_nr, - auto_format=True, -): - """ - format a single Fortran line - imposes white space formatting - and inserts linebreaks. - Takes a logical Fortran line `f_line` as input as well as the positions - of the linebreaks (`linebreak_pos`), and the number of - separating whitespace characters before ampersand (`ampersand_sep`). - `filename` and `line_nr` just for error messages. - The higher `whitespace`, the more white space characters inserted - - whitespace = 0, 1, 2, 3 are currently supported. - whitespace formatting can additionally controlled more fine-grained - via a dictionary of bools (whitespace_dict) - auto formatting can be turned off by setting `auto_format` to False. - """ - - # define whether to put whitespaces around operators: - mapping = { - "comma": 0, # 0: comma, semicolon - "assignments": 1, # 1: assignment operators - "relational": 2, # 2: relational operators - "logical": 3, # 3: logical operators - "plusminus": 4, # 4: arithm. operators plus and minus - "multdiv": 5, # 5: arithm. operators multiply and divide - "print": 6, # 6: print / read statements - "type": 7, # 7: select type components - "intrinsics": 8, # 8: intrinsics - "decl": 9, # 9: declarations - } - - if whitespace == 0: - spacey = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - elif whitespace == 1: - spacey = [1, 1, 1, 1, 0, 0, 1, 0, 1, 1] - elif whitespace == 2: - spacey = [1, 1, 1, 1, 1, 0, 1, 0, 1, 1] - elif whitespace == 3: - spacey = [1, 1, 1, 1, 1, 1, 1, 0, 1, 1] - elif whitespace == 4: - spacey = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1] - else: - raise NotImplementedError("unknown value for whitespace") - - if whitespace_dict: - # iterate over dictionary and override settings for 'spacey' - for key, value in mapping.items(): - if whitespace_dict[key] == True: - spacey[value] = 1 - elif whitespace_dict[key] == False: - spacey[value] = 0 - - line = f_line - line_orig = line - - if auto_format: - - line = rm_extra_whitespace(line, format_decl) - line = add_whitespace_charwise( - line, spacey, scope_parser, format_decl, filename, line_nr - ) - line = add_whitespace_context(line, spacey) - - lines_out = split_reformatted_line( - line_orig, linebreak_pos, ampersand_sep, line, filename, line_nr - ) - return lines_out - - -def rm_extra_whitespace(line, format_decl): - """rm all unneeded whitespace chars, except for declarations""" - line_ftd = "" - pos_prev = -1 - pos = -1 - for pos, char in CharFilter(line): - if format_decl: - is_decl = False - else: - is_decl = line[pos:].lstrip().startswith("::") or line[ - :pos - ].rstrip().endswith("::") - - if pos > pos_prev + 1: # skipped string - line_ftd = line_ftd + line[pos_prev + 1 : pos] - - if char == " ": - # remove double spaces: - if line_ftd and (re.search(r"[\w]", line_ftd[-1]) or is_decl): - line_ftd = line_ftd + char - else: - if ( - line_ftd - and line_ftd[-1] == " " - and (not re.search(r"[\w]", char) and not is_decl) - ): - line_ftd = line_ftd[:-1] # remove spaces except between words - line_ftd = line_ftd + char - pos_prev = pos - - line_ftd = line_ftd + line[pos + 1 :] - return line_ftd - - -def add_whitespace_charwise(line, spacey, scope_parser, format_decl, filename, line_nr): - """add whitespace character wise (no need for context aware parsing)""" - line_ftd = line - pos_eq = [] - end_of_delim = -1 - level = 0 - for pos, char in CharFilter(line): - # offset w.r.t. unformatted line - offset = len(line_ftd) - len(line) - - # format delimiters - what_del_open = None - what_del_close = None - if pos > end_of_delim: - [what_del_open, what_del_close] = get_curr_delim(line, pos) - - if what_del_open or what_del_close: - sep1 = 0 - sep2 = 0 - - if what_del_open: - delim = what_del_open.group() - else: - delim = what_del_close.group() - - lhs = line_ftd[: pos + offset] - rhs = line_ftd[pos + len(delim) + offset :] - - # format opening delimiters - if what_del_open: - level += 1 # new scope - # add separating whitespace before opening delimiter - # with some exceptions: - # FIXME: duplication of regex, better to include them into - # INTR_STMTS_PAR - if ( - ( - not re.search( - (r"(" + DEL_OPEN_STR + r"|[\w\*/=\+\-:])\s*$"), - line[:pos], - RE_FLAGS, - ) - and not EMPTY_RE.search(line[:pos]) - ) - or re.search( - SOL_STR + r"(\w+\s*:)?(ELSE)?\s*IF\s*$", line[:pos], RE_FLAGS - ) - or re.search( - SOL_STR + r"(\w+\s*:)?\s*DO\s+WHILE\s*$", line[:pos], RE_FLAGS - ) - or re.search( - SOL_STR + r"(SELECT)?\s*CASE\s*$", line[:pos], RE_FLAGS - ) - or re.search( - SOL_STR + r"(SELECT)?\s*RANK\s*$", line[:pos], RE_FLAGS - ) - or re.search(SOL_STR + r"SELECT\s*TYPE\s*$", line[:pos], RE_FLAGS) - or re.search(SOL_STR + r"CLASS\s*DEFAULT\s*$", line[:pos], RE_FLAGS) - or re.search( - SOL_STR + r"(TYPE|CLASS)\s+IS\s*$", line[:pos], RE_FLAGS - ) - or re.search( - r"(? 0: - level += -1 # close scope - else: - log_message( - "unpaired bracket delimiters", "info", filename, line_nr - ) - - # add separating whitespace after closing delimiter - # with some exceptions: - if not re.search( - r"^\s*(" + DEL_CLOSE_STR + r"|[,%:/\*])", line[pos + 1 :], RE_FLAGS - ): - sep2 = 1 - elif re.search(r"^\s*::", line[pos + 1 :], RE_FLAGS): - sep2 = len(rhs) - len(rhs.lstrip(" ")) if not format_decl else 1 - - # where delimiter token ends - end_of_delim = pos + len(delim) - 1 - - line_ftd = ( - lhs.rstrip(" ") + " " * sep1 + delim + " " * sep2 + rhs.lstrip(" ") - ) + line_ftd = ( + lhs.rstrip(" ") + " " * sep1 + delim + " " * sep2 + rhs.lstrip(" ") + ) # format commas and semicolons if char in [",", ";"]: @@ -540,38 +352,128 @@ def add_whitespace_context(line, spacey): return line -def split_reformatted_line( - line_orig, linebreak_pos_orig, ampersand_sep, line, filename, line_nr -): - """ - Infer linebreak positions of formatted line from linebreak positions in - original line and split line. - """ - # shift line break positions from original to reformatted line - pos_new = 0 - pos_old = 0 - linebreak_pos_orig.sort(reverse=True) - linebreak_pos_ftd = [] - while 1: +def remove_extra_whitespace(line, format_decl): + """rm all unneeded whitespace chars, except for declarations""" + line_ftd = "" + pos_prev = -1 + pos = -1 + for pos, char in CharFilter(line): + if format_decl: + is_decl = False + else: + is_decl = line[pos:].lstrip().startswith("::") or line[ + :pos + ].rstrip().endswith("::") - if pos_new == len(line) or pos_old == len(line_orig): - break + if pos > pos_prev + 1: # skipped string + line_ftd = line_ftd + line[pos_prev + 1 : pos] - if line[pos_new] != line_orig[pos_old]: - raise FprettifyInternalException( - "failed at finding line break position", filename, line_nr - ) + if char == " ": + # remove double spaces: + if line_ftd and (re.search(r"[\w]", line_ftd[-1]) or is_decl): + line_ftd = line_ftd + char + else: + if ( + line_ftd + and line_ftd[-1] == " " + and (not re.search(r"[\w]", char) and not is_decl) + ): + line_ftd = line_ftd[:-1] # remove spaces except between words + line_ftd = line_ftd + char + pos_prev = pos - if linebreak_pos_orig and pos_old > linebreak_pos_orig[-1]: - linebreak_pos_orig.pop() - linebreak_pos_ftd.append(pos_new) - continue + line_ftd = line_ftd + line[pos + 1 :] + return line_ftd - pos_new += 1 - while pos_new < len(line) and line[pos_new] == " ": - pos_new += 1 - pos_old += 1 +def remove_trailing_whitespace(lines): + """remove trailing whitespaces from lines""" + lines = [re.sub(r"\s+$", "\n", l, RE_FLAGS) for l in lines] + return lines + + +def remove_leading_ampersands(lines, is_special, filename, line_nr): + """ + remove and return preceding ampersands ('pre_ampersand'). Also return + number of whitespace characters before ampersand of previous line + ('ampersand_sep'). + + Note: Don't do any whitespace formatting on ampersands if next line starts + with an ampersand but remember the original number of spaces + (ampersand_sep). This "special rule" is necessary since ampersands starting + a line can be used to break literals, so changing the number of whitespaces + before the ampersand ending the previous line may lead to invalid syntax or + may change the number of whitespace characters in a string. + """ + pre_ampersand = [] + ampersand_sep = [] + + for pos, line in enumerate(lines): + match = re.search(SOL_STR + r"(&\s*)", line) + if match: + pre_ampersand.append(match.group(1)) + # amount of whitespace before ampersand of previous line: + m = re.search(r"(\s*)&[\s]*(?:!.*)?$", lines[pos - 1]) + if not m: + raise FprettifyParseException( + "Bad continuation line format", filename, line_nr + ) + sep = len(m.group(1)) + + ampersand_sep.append(sep) + else: + pre_ampersand.append("") + if pos > 0: + # use default 1 whitespace character before ampersand + ampersand_sep.append(1) + + lines = [l.strip(" ").strip("&") if not s else l for l, s in zip(lines, is_special)] + return [lines, pre_ampersand, ampersand_sep] + + +def prepend_ampersands(lines, indent, pre_ampersand): + """prepend ampersands and correct indent""" + for pos, line in enumerate(lines): + amp_insert = pre_ampersand[pos] + if amp_insert: + indent[pos] += -1 + lines[pos] = amp_insert + line.lstrip() + + return [lines, indent] + + +def split_reformatted_line( + line_orig, linebreak_pos_orig, ampersand_sep, line, filename, line_nr +): + """ + Infer linebreak positions of formatted line from linebreak positions in + original line and split line. + """ + # shift line break positions from original to reformatted line + pos_new = 0 + pos_old = 0 + linebreak_pos_orig.sort(reverse=True) + linebreak_pos_ftd = [] + while 1: + + if pos_new == len(line) or pos_old == len(line_orig): + break + + if line[pos_new] != line_orig[pos_old]: + raise FprettifyInternalException( + "failed at finding line break position", filename, line_nr + ) + + if linebreak_pos_orig and pos_old > linebreak_pos_orig[-1]: + linebreak_pos_orig.pop() + linebreak_pos_ftd.append(pos_new) + continue + + pos_new += 1 + while pos_new < len(line) and line[pos_new] == " ": + pos_new += 1 + + pos_old += 1 while pos_old < len(line_orig) and line_orig[pos_old] == " ": pos_old += 1 @@ -591,137 +493,390 @@ def split_reformatted_line( return lines_split -def reformat_inplace( - filename, stdout=False, diffonly=False, **kwargs -): # pragma: no cover - """reformat a file in place.""" - if filename == "-": - infile = io.StringIO() - infile.write(sys.stdin.read()) - else: - infile = io.open(filename, "r", encoding="utf-8") +def append_comments(lines, comment_lines, is_special): + """append comments to lines""" + for pos, (line, comment) in enumerate(zip(lines, comment_lines)): + if pos < len(lines) - 1: + has_nl = True # has next line + if not line.strip() and not is_special[pos]: + comment = comment.lstrip() + else: + has_nl = not re.search(EOL_SC, line) + lines[pos] = lines[pos].rstrip(" ") + comment + "\n" * has_nl - newfile = io.StringIO() - reformat_ffile(infile, newfile, orig_filename=filename, **kwargs) + return lines - if diffonly: - infile.seek(0) - newfile.seek(0) - diff_contents = diff(infile.read(), newfile.read(), filename, filename) - sys.stdout.write(diff_contents) - else: - if stdout: - sys.stdout.write(newfile.getvalue()) +def format_comments(lines, comments, strip_comments): + comments_ftd = [] + for line, comment in zip(lines, comments): + has_comment = bool(comment.strip()) + if has_comment: + if strip_comments: + sep = not comment.strip() == line.strip() + else: + line_minus_comment = line.replace(comment, "") + sep = len(line_minus_comment.rstrip("\n")) - len( + line_minus_comment.rstrip() + ) else: - outfile = io.open(filename, "r", encoding="utf-8") - - # write to outfile only if content has changed + sep = 0 - import hashlib + if line.strip(): # empty lines between linebreaks are ignored + comments_ftd.append(" " * sep + comment.strip()) + return comments_ftd - hash_new = hashlib.md5() - hash_new.update(newfile.getvalue().encode("utf-8")) - hash_old = hashlib.md5() - hash_old.update(outfile.read().encode("utf-8")) - outfile.close() +def format_line( + f_line, + whitespace, + whitespace_dict, + linebreak_pos, + ampersand_sep, + scope_parser, + format_decl, + filename, + line_nr, + auto_format=True, +): + """ + format a single Fortran line - imposes white space formatting + and inserts linebreaks. + Takes a logical Fortran line `f_line` as input as well as the positions + of the linebreaks (`linebreak_pos`), and the number of + separating whitespace characters before ampersand (`ampersand_sep`). + `filename` and `line_nr` just for error messages. + The higher `whitespace`, the more white space characters inserted - + whitespace = 0, 1, 2, 3 are currently supported. + whitespace formatting can additionally controlled more fine-grained + via a dictionary of bools (whitespace_dict) + auto formatting can be turned off by setting `auto_format` to False. + """ - if hash_new.digest() != hash_old.digest(): - outfile = io.open(filename, "w", encoding="utf-8") - outfile.write(newfile.getvalue()) + # define whether to put whitespaces around operators: + mapping = { + "comma": 0, # 0: comma, semicolon + "assignments": 1, # 1: assignment operators + "relational": 2, # 2: relational operators + "logical": 3, # 3: logical operators + "plusminus": 4, # 4: arithm. operators plus and minus + "multdiv": 5, # 5: arithm. operators multiply and divide + "print": 6, # 6: print / read statements + "type": 7, # 7: select type components + "intrinsics": 8, # 8: intrinsics + "decl": 9, # 9: declarations + } + if whitespace == 0: + spacey = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + elif whitespace == 1: + spacey = [1, 1, 1, 1, 0, 0, 1, 0, 1, 1] + elif whitespace == 2: + spacey = [1, 1, 1, 1, 1, 0, 1, 0, 1, 1] + elif whitespace == 3: + spacey = [1, 1, 1, 1, 1, 1, 1, 0, 1, 1] + elif whitespace == 4: + spacey = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1] + else: + raise NotImplementedError("unknown value for whitespace") -def reformat_ffile( - infile, - outfile, - impose_indent=True, - indent_size=3, - strict_indent=False, - impose_whitespace=True, - case_dict={}, - impose_replacements=False, - cstyle=False, - whitespace=2, - whitespace_dict={}, - llength=132, - strip_comments=False, - format_decl=False, - orig_filename=None, - indent_fypp=True, - indent_mod=True, -): - """main method to be invoked for formatting a Fortran file.""" + if whitespace_dict: + # iterate over dictionary and override settings for 'spacey' + for key, value in mapping.items(): + if whitespace_dict[key] == True: + spacey[value] = 1 + elif whitespace_dict[key] == False: + spacey[value] = 0 - # note: whitespace formatting and indentation may require different parsing rules - # (e.g. preprocessor statements may be indented but not whitespace formatted) - # therefore we invoke reformat_ffile independently for: - # 1) whitespace formatting - # 2) indentation + line = f_line + line_orig = line - if not orig_filename: - orig_filename = infile.name + if auto_format: - # 1) whitespace formatting - oldfile = infile - newfile = infile + line = remove_extra_whitespace(line, format_decl) + line = add_whitespace_charwise( + line, spacey, scope_parser, format_decl, filename, line_nr + ) + line = add_whitespace_context(line, spacey) - if impose_whitespace: - _impose_indent = False + lines_out = split_reformatted_line( + line_orig, linebreak_pos, ampersand_sep, line, filename, line_nr + ) + return lines_out - newfile = io.StringIO() - reformat_ffile_combined( - oldfile, - newfile, - _impose_indent, - indent_size, - strict_indent, - impose_whitespace, - case_dict, - impose_replacements, - cstyle, - whitespace, - whitespace_dict, - llength, - strip_comments, - format_decl, - orig_filename, - indent_fypp, - indent_mod, - ) - oldfile = newfile - # 2) indentation - if impose_indent: +def write_line( + outfile, + indent, + lines, + orig_lines, + indent_special, + llength, + use_same_line, + is_omp_conditional, + label, + filename, + line_nr, +): + """Write reformatted line to file""" - _impose_whitespace = False - _impose_replacements = False + for ind, line, orig_line in zip(indent, lines, orig_lines): - newfile = io.StringIO() - reformat_ffile_combined( - oldfile, - newfile, - impose_indent, - indent_size, - strict_indent, - _impose_whitespace, - case_dict, - _impose_replacements, - cstyle, - whitespace, - whitespace_dict, - llength, - strip_comments, - format_decl, - orig_filename, - indent_fypp, - indent_mod, - ) + # get actual line length excluding comment: + line_length = 0 + for line_length, _ in CharFilter(line): + pass + line_length += 1 - outfile.write(newfile.getvalue()) + if indent_special != 1: + ind_use = ind + else: + if use_same_line: + ind_use = 1 + else: + ind_use = 0 + + if CPP_RE.search(line.lstrip()): + ind_use = 0 + + if label: + label_use = label + label = "" # no label for continuation lines + else: + label_use = "" + + if ind_use + line_length <= ( + llength + 1 + ): # llength (default 132) plus 1 newline char + outfile.write( + "!$ " * is_omp_conditional + + label_use + + " " + * ( + ind_use + - 3 * is_omp_conditional + - len(label_use) + + len(line) + - len(line.lstrip(" ")) + ) + + line.lstrip(" ") + ) + elif line_length <= (llength + 1): + outfile.write( + "!$ " * is_omp_conditional + + label_use + + " " + * ( + (llength + 1) + - 3 * is_omp_conditional + - len(label_use) + - len(line.lstrip(" ")) + ) + + line.lstrip(" ") + ) + + log_message( + LINESPLIT_MESSAGE + " (limit: " + str(llength) + ")", + "warning", + filename, + line_nr, + ) + else: + outfile.write(orig_line) + log_message( + LINESPLIT_MESSAGE + " (limit: " + str(llength) + ")", + "warning", + filename, + line_nr, + ) + + +def inspect_file( + infile, indent_size, strict_indent, indent_fypp=False, orig_filename=None +): + """ + Determine indentation by inspecting original Fortran file. + + This is mainly for finding aligned blocks of DO/IF statements. + Also check if it has f77 constructs. + :param infile: open file + :param indent_size: the default indent size + :orig_filename: filename used for messages + :returns: [ target indent sizes for each line, + indent of first line (offset) ] + """ + if not orig_filename: + orig_filename = infile.name + + indents = [] + stream = InputStream( + infile, filter_fypp=not indent_fypp, orig_filename=orig_filename + ) + prev_offset = 0 + first_indent = -1 + has_fypp = False + + while 1: + f_line, _, lines = stream.next_fortran_line() + if not lines: + break + + if FYPP_LINE_RE.search(f_line): + has_fypp = True + + f_line, lines, _ = preprocess_labels(f_line, lines) + + offset = len(lines[0]) - len(lines[0].lstrip(" ")) + if f_line.strip() and first_indent == -1: + first_indent = offset + indents.append(offset - prev_offset) + + # don't impose indentation for blocked do/if constructs: + if IF_RE.search(f_line) or DO_RE.search(f_line): + if prev_offset != offset or strict_indent: + indents[-1] = indent_size + else: + indents[-1] = indent_size + + prev_offset = offset + + return indents, first_indent, has_fypp + + +def replace_relational_ops(line, cstyle): + """ + Replace scalar relational operators in logical expressions + with either Fortran or C-style. Reformats a single line. + .lt. <--> < + .le. <--> <= + .gt. <--> > + .ge. <--> >= + .eq. <--> == + .ne. <--> /= + """ + + new_line = line + + # only act on lines that do contain a relation + if REL_OP_RE.search(line): + # check that relation is not inside quotes, a string, or commented + # (think of underlining a heading with === or things like markup being printed which we do not replace) + pos_prev = -1 + pos = -1 + line_parts = [""] + for pos, char in CharFilter(line): + if pos > pos_prev + 1: # skipped string + line_parts.append(line[pos_prev + 1 : pos].strip()) # append string + line_parts.append("") + + line_parts[-1] += char + + pos_prev = pos + + if pos + 1 < len(line): + line_parts.append(line[pos + 1 :]) + + for pos, part in enumerate(line_parts): + # exclude comments, strings: + if not STR_OPEN_RE.match(part): + # also exclude / if we see a namelist and data statement + if cstyle: + part = re.sub(r"\.LT\.", "< ", part, flags=RE_FLAGS) + part = re.sub(r"\.LE\.", "<= ", part, flags=RE_FLAGS) + part = re.sub(r"\.GT\.", "> ", part, flags=RE_FLAGS) + part = re.sub(r"\.GE\.", ">= ", part, flags=RE_FLAGS) + part = re.sub(r"\.EQ\.", "== ", part, flags=RE_FLAGS) + part = re.sub(r"\.NE\.", "/= ", part, flags=RE_FLAGS) + else: + part = re.sub(r"<=", ".le.", part, flags=RE_FLAGS) + part = re.sub(r"<", ".lt.", part, flags=RE_FLAGS) + part = re.sub(r">=", ".ge.", part, flags=RE_FLAGS) + part = re.sub(r">", ".gt.", part, flags=RE_FLAGS) + part = re.sub(r"==", ".eq.", part, flags=RE_FLAGS) + part = re.sub(r"\/=", ".ne.", part, flags=RE_FLAGS) + + line_parts[pos] = part + + new_line = "".join(line_parts) + + return new_line + + +def replace_keywords(line, case_dict): + """ + format a single Fortran line - change case of keywords + """ + + new_line = line + + # Collect words list + pos_prev = -1 + pos = -1 + line_parts = [""] + for pos, char in CharFilter(line): + if pos > pos_prev + 1: # skipped string + line_parts.append(line[pos_prev + 1 : pos].strip()) # append string + line_parts.append("") + + line_parts[-1] += char + + pos_prev = pos + + if pos + 1 < len(line): + line_parts.append(line[pos + 1 :]) + + line_parts = [ + [a] if STR_OPEN_RE.match(a) else re.split(F90_OPERATORS_RE, a) + for a in line_parts + ] # problem, split "." + line_parts = [b for a in line_parts for b in a] + + ## line_parts = [[a] if STR_OPEN_RE.match(a) else re.split('(\W)',a) + ## for a in line_parts] # problem, split "." + line_parts = [ + [a] if STR_OPEN_RE.match(a) else re.split("([^a-zA-Z0-9_.])", a) + for a in line_parts + ] + line_parts = [b for a in line_parts for b in a] + + swapcase = lambda s, a: s if a == 0 else (s.lower() if a == 1 else s.upper()) + + nbparts = len(line_parts) + for pos, part in enumerate(line_parts): + # exclude comments, strings: + if part.strip() and not STR_OPEN_RE.match(part): + if F90_KEYWORDS_RE.match(part): + part = swapcase(part, case_dict["keywords"]) + elif F90_MODULES_RE.match(part): + part = swapcase(part, case_dict["procedures"]) + elif F90_PROCEDURES_RE.match(part): + ok = False + for pos2 in range(pos + 1, nbparts): + part2 = line_parts[pos2] + if part2.strip() and not ( + part2 == "\n" or STR_OPEN_RE.match(part2) + ): + ok = part2 == "(" + break + if ok: + part = swapcase(part, case_dict["procedures"]) + elif F90_OPERATORS_RE.match(part): + part = swapcase(part, case_dict["operators"]) + elif F90_CONSTANTS_RE.match(part): + part = swapcase(part, case_dict["constants"]) + elif F90_CONSTANTS_TYPES_RE.match(part): + part = swapcase(part, case_dict["constants"]) + elif F90_NUMBER_ALL_REC.match(part): + part = swapcase(part, case_dict["constants"]) + + line_parts[pos] = part + + new_line = "".join(line_parts) + + return new_line -def reformat_ffile_combined( +def reformat_internal( infile, outfile, impose_indent=True, @@ -748,7 +903,7 @@ def reformat_ffile_combined( indent_fypp = False infile.seek(0) - req_indents, first_indent, has_fypp = inspect_ffile_format( + req_indents, first_indent, has_fypp = inspect_file( infile, indent_size, strict_indent, indent_fypp, orig_filename ) infile.seek(0) @@ -828,22 +983,22 @@ def reformat_ffile_combined( else: manual_lines_indent = [] - lines, pre_ampersand, ampersand_sep = remove_pre_ampersands( + lines, pre_ampersand, ampersand_sep = remove_leading_ampersands( lines, is_special, orig_filename, stream.line_nr ) - linebreak_pos = get_linebreak_pos(lines, filter_fypp=not indent_fypp) + linebreak_pos = get_linebreak_position(lines, filter_fypp=not indent_fypp) f_line = f_line.strip(" ") if impose_replacements: - f_line = replace_relational_single_fline(f_line, cstyle) + f_line = replace_relational_ops(f_line, cstyle) if impose_case: - f_line = replace_keywords_single_fline(f_line, case_dict) + f_line = replace_keywords(f_line, case_dict) if impose_whitespace: - lines = format_single_fline( + lines = format_line( f_line, whitespace, whitespace_dict, @@ -888,7 +1043,7 @@ def reformat_ffile_combined( if indent[0] < len(label): indent = [ind + len(label) - indent[0] for ind in indent] - write_formatted_line( + write_line( outfile, indent, lines, @@ -919,222 +1074,90 @@ def reformat_ffile_combined( ) -def format_comments(lines, comments, strip_comments): - comments_ftd = [] - for line, comment in zip(lines, comments): - has_comment = bool(comment.strip()) - if has_comment: - if strip_comments: - sep = not comment.strip() == line.strip() - else: - line_minus_comment = line.replace(comment, "") - sep = len(line_minus_comment.rstrip("\n")) - len( - line_minus_comment.rstrip() - ) - else: - sep = 0 - - if line.strip(): # empty lines between linebreaks are ignored - comments_ftd.append(" " * sep + comment.strip()) - return comments_ftd - - -def pass_defaults_to_next_line(f_line): - """defaults to be transferred from f_line to next line""" - if re.search(r";\s*$", f_line, RE_FLAGS): - # if line ended with semicolon, don't indent next line - do_indent = False - use_same_line = True - else: - do_indent = True - use_same_line = False - - return [do_indent, use_same_line] - - -def remove_trailing_whitespace(lines): - """remove trailing whitespaces from lines""" - lines = [re.sub(r"\s+$", "\n", l, RE_FLAGS) for l in lines] - return lines - - -def prepend_ampersands(lines, indent, pre_ampersand): - """prepend ampersands and correct indent""" - for pos, line in enumerate(lines): - amp_insert = pre_ampersand[pos] - if amp_insert: - indent[pos] += -1 - lines[pos] = amp_insert + line.lstrip() - - return [lines, indent] - - -def append_comments(lines, comment_lines, is_special): - """append comments to lines""" - for pos, (line, comment) in enumerate(zip(lines, comment_lines)): - if pos < len(lines) - 1: - has_nl = True # has next line - if not line.strip() and not is_special[pos]: - comment = comment.lstrip() - else: - has_nl = not re.search(EOL_SC, line) - lines[pos] = lines[pos].rstrip(" ") + comment + "\n" * has_nl - - return lines - - -def get_linebreak_pos(lines, filter_fypp=True): - """extract linebreak positions in Fortran line from lines""" - linebreak_pos = [] - if filter_fypp: - notfortran_re = NOTFORTRAN_LINE_RE - else: - notfortran_re = NOTFORTRAN_FYPP_LINE_RE - - for line in lines: - found = None - for char_pos, _ in CharFilter(line, filter_strings=False): - if re.match(LINEBREAK_STR, line[char_pos:], RE_FLAGS): - found = char_pos - if found: - linebreak_pos.append(found) - elif notfortran_re.search(line.lstrip(" ")): - linebreak_pos.append(0) - - linebreak_pos = [ - sum(linebreak_pos[0 : _ + 1]) - 1 for _ in range(0, len(linebreak_pos)) - ] - - return linebreak_pos - - -def remove_pre_ampersands(lines, is_special, filename, line_nr): - """ - remove and return preceding ampersands ('pre_ampersand'). Also return - number of whitespace characters before ampersand of previous line - ('ampersand_sep'). - - Note: Don't do any whitespace formatting on ampersands if next line starts - with an ampersand but remember the original number of spaces - (ampersand_sep). This "special rule" is necessary since ampersands starting - a line can be used to break literals, so changing the number of whitespaces - before the ampersand ending the previous line may lead to invalid syntax or - may change the number of whitespace characters in a string. - """ - pre_ampersand = [] - ampersand_sep = [] - - for pos, line in enumerate(lines): - match = re.search(SOL_STR + r"(&\s*)", line) - if match: - pre_ampersand.append(match.group(1)) - # amount of whitespace before ampersand of previous line: - m = re.search(r"(\s*)&[\s]*(?:!.*)?$", lines[pos - 1]) - if not m: - raise FprettifyParseException( - "Bad continuation line format", filename, line_nr - ) - sep = len(m.group(1)) - - ampersand_sep.append(sep) - else: - pre_ampersand.append("") - if pos > 0: - # use default 1 whitespace character before ampersand - ampersand_sep.append(1) - - lines = [l.strip(" ").strip("&") if not s else l for l, s in zip(lines, is_special)] - return [lines, pre_ampersand, ampersand_sep] - - -def get_manual_alignment(lines): - """extract manual indents for line continuations from line""" - manual_lines_indent = [len(l) - len(l.lstrip(" ").lstrip("&")) for l in lines] - manual_lines_indent = [ind - manual_lines_indent[0] for ind in manual_lines_indent] - return manual_lines_indent - - -def write_formatted_line( +def reformat( + infile, outfile, - indent, - lines, - orig_lines, - indent_special, - llength, - use_same_line, - is_omp_conditional, - label, - filename, - line_nr, + impose_indent=True, + indent_size=3, + strict_indent=False, + impose_whitespace=True, + case_dict={}, + impose_replacements=False, + cstyle=False, + whitespace=2, + whitespace_dict={}, + llength=132, + strip_comments=False, + format_decl=False, + orig_filename=None, + indent_fypp=True, + indent_mod=True, ): - """Write reformatted line to file""" + """main method to be invoked for formatting a Fortran file.""" - for ind, line, orig_line in zip(indent, lines, orig_lines): + # note: whitespace formatting and indentation may require different parsing rules + # (e.g. preprocessor statements may be indented but not whitespace formatted) + # therefore we invoke reformat_ffile independently for: + # 1) whitespace formatting + # 2) indentation - # get actual line length excluding comment: - line_length = 0 - for line_length, _ in CharFilter(line): - pass - line_length += 1 + if not orig_filename: + orig_filename = infile.name - if indent_special != 1: - ind_use = ind - else: - if use_same_line: - ind_use = 1 - else: - ind_use = 0 + # 1) whitespace formatting + oldfile = infile + newfile = infile - if CPP_RE.search(line.lstrip()): - ind_use = 0 + if impose_whitespace: + _impose_indent = False - if label: - label_use = label - label = "" # no label for continuation lines - else: - label_use = "" + newfile = io.StringIO() + reformat_internal( + oldfile, + newfile, + _impose_indent, + indent_size, + strict_indent, + impose_whitespace, + case_dict, + impose_replacements, + cstyle, + whitespace, + whitespace_dict, + llength, + strip_comments, + format_decl, + orig_filename, + indent_fypp, + indent_mod, + ) + oldfile = newfile - if ind_use + line_length <= ( - llength + 1 - ): # llength (default 132) plus 1 newline char - outfile.write( - "!$ " * is_omp_conditional - + label_use - + " " - * ( - ind_use - - 3 * is_omp_conditional - - len(label_use) - + len(line) - - len(line.lstrip(" ")) - ) - + line.lstrip(" ") - ) - elif line_length <= (llength + 1): - outfile.write( - "!$ " * is_omp_conditional - + label_use - + " " - * ( - (llength + 1) - - 3 * is_omp_conditional - - len(label_use) - - len(line.lstrip(" ")) - ) - + line.lstrip(" ") - ) + # 2) indentation + if impose_indent: - log_message( - LINESPLIT_MESSAGE + " (limit: " + str(llength) + ")", - "warning", - filename, - line_nr, - ) - else: - outfile.write(orig_line) - log_message( - LINESPLIT_MESSAGE + " (limit: " + str(llength) + ")", - "warning", - filename, - line_nr, - ) + _impose_whitespace = False + _impose_replacements = False + + newfile = io.StringIO() + reformat_internal( + oldfile, + newfile, + impose_indent, + indent_size, + strict_indent, + _impose_whitespace, + case_dict, + _impose_replacements, + cstyle, + whitespace, + whitespace_dict, + llength, + strip_comments, + format_decl, + orig_filename, + indent_fypp, + indent_mod, + ) + + outfile.write(newfile.getvalue()) diff --git a/fprettify/parser.py b/fprettify/parser.py index 6b2c497..e3c9eeb 100644 --- a/fprettify/parser.py +++ b/fprettify/parser.py @@ -706,15 +706,7 @@ def get_curr_delim(line, pos): return [what_del_open, what_del_close] -class fline_parser: - def __init__(self): - pass - - def search(self, line): - pass - - -class parser_re(fline_parser): +class Parser: def __init__(self, regex, spec=True): self._re = regex self.spec = spec @@ -726,7 +718,7 @@ def split(self, line): return self._re.split(line) -class plusminus_parser(parser_re): +class PlusMinusParser(Parser): """parser for +/- in addition""" def __init__(self, regex): @@ -763,7 +755,7 @@ def split(self, line): return partsplit_out -class where_parser(parser_re): +class WhereParser(Parser): """parser for where / forall construct""" def search(self, line): @@ -793,62 +785,60 @@ def search(self, line): def build_scope_parser(fypp=True, mod=True): parser = {} parser["new"] = [ - parser_re(IF_RE), - parser_re(DO_RE), - parser_re(SELCASE_RE), - parser_re(SUBR_RE), - parser_re(FCT_RE), - parser_re(INTERFACE_RE), - parser_re(TYPE_RE), - parser_re(ENUM_RE), - parser_re(ASSOCIATE_RE), + Parser(IF_RE), + Parser(DO_RE), + Parser(SELCASE_RE), + Parser(SUBR_RE), + Parser(FCT_RE), + Parser(INTERFACE_RE), + Parser(TYPE_RE), + Parser(ENUM_RE), + Parser(ASSOCIATE_RE), None, - parser_re(BLK_RE), - where_parser(WHERE_RE), - where_parser(FORALL_RE), + Parser(BLK_RE), + WhereParser(WHERE_RE), + WhereParser(FORALL_RE), ] parser["continue"] = [ - parser_re(ELSE_RE), + Parser(ELSE_RE), None, - parser_re(CASE_RE), - parser_re(CONTAINS_RE), - parser_re(CONTAINS_RE), + Parser(CASE_RE), + Parser(CONTAINS_RE), + Parser(CONTAINS_RE), None, - parser_re(CONTAINS_RE), + Parser(CONTAINS_RE), None, None, None, None, - parser_re(ELSEWHERE_RE), + Parser(ELSEWHERE_RE), None, ] parser["end"] = [ - parser_re(ENDIF_RE), - parser_re(ENDDO_RE), - parser_re(ENDSEL_RE), - parser_re(ENDSUBR_RE), - parser_re(ENDFCT_RE), - parser_re(ENDINTERFACE_RE), - parser_re(ENDTYPE_RE), - parser_re(ENDENUM_RE), - parser_re(ENDASSOCIATE_RE), - parser_re(ENDANY_RE, spec=False), - parser_re(ENDBLK_RE), - parser_re(ENDWHERE_RE), - parser_re(ENDFORALL_RE), + Parser(ENDIF_RE), + Parser(ENDDO_RE), + Parser(ENDSEL_RE), + Parser(ENDSUBR_RE), + Parser(ENDFCT_RE), + Parser(ENDINTERFACE_RE), + Parser(ENDTYPE_RE), + Parser(ENDENUM_RE), + Parser(ENDASSOCIATE_RE), + Parser(ENDANY_RE, spec=False), + Parser(ENDBLK_RE), + Parser(ENDWHERE_RE), + Parser(ENDFORALL_RE), ] if mod: - parser["new"].extend( - [parser_re(MOD_RE), parser_re(SMOD_RE), parser_re(PROG_RE)] - ) + parser["new"].extend([Parser(MOD_RE), Parser(SMOD_RE), Parser(PROG_RE)]) parser["continue"].extend( - [parser_re(CONTAINS_RE), parser_re(CONTAINS_RE), parser_re(CONTAINS_RE)] + [Parser(CONTAINS_RE), Parser(CONTAINS_RE), Parser(CONTAINS_RE)] ) parser["end"].extend( - [parser_re(ENDMOD_RE), parser_re(ENDSMOD_RE), parser_re(ENDPROG_RE)] + [Parser(ENDMOD_RE), Parser(ENDSMOD_RE), Parser(ENDPROG_RE)] ) if fypp: @@ -860,25 +850,25 @@ def build_scope_parser(fypp=True, mod=True): PREPRO_NEW_SCOPE = [ - parser_re(FYPP_DEF_RE), - parser_re(FYPP_IF_RE), - parser_re(FYPP_FOR_RE), - parser_re(FYPP_BLOCK_RE), - parser_re(FYPP_CALL_RE), - parser_re(FYPP_MUTE_RE), + Parser(FYPP_DEF_RE), + Parser(FYPP_IF_RE), + Parser(FYPP_FOR_RE), + Parser(FYPP_BLOCK_RE), + Parser(FYPP_CALL_RE), + Parser(FYPP_MUTE_RE), ] -PREPRO_CONTINUE_SCOPE = [None, parser_re(FYPP_ELIF_ELSE_RE), None, None, None, None] +PREPRO_CONTINUE_SCOPE = [None, Parser(FYPP_ELIF_ELSE_RE), None, None, None, None] PREPRO_END_SCOPE = [ - parser_re(FYPP_ENDDEF_RE), - parser_re(FYPP_ENDIF_RE), - parser_re(FYPP_ENDFOR_RE), - parser_re(FYPP_ENDBLOCK_RE), - parser_re(FYPP_ENDCALL_RE), - parser_re(FYPP_ENDMUTE_RE), + Parser(FYPP_ENDDEF_RE), + Parser(FYPP_ENDIF_RE), + Parser(FYPP_ENDFOR_RE), + Parser(FYPP_ENDBLOCK_RE), + Parser(FYPP_ENDCALL_RE), + Parser(FYPP_ENDMUTE_RE), ] # two-sided operators -LR_OPS_RE = [REL_OP_RE, LOG_OP_RE, plusminus_parser(PLUSMINUS_RE), MULTDIV_RE, PRINT_RE] +LR_OPS_RE = [REL_OP_RE, LOG_OP_RE, PlusMinusParser(PLUSMINUS_RE), MULTDIV_RE, PRINT_RE] USE_RE = re.compile( SOL_STR + "USE(\s+|(,.+?)?::\s*)\w+?((,.+?=>.+?)+|,\s*only\s*:.+?)?$" + EOL_STR, diff --git a/fprettify/preprocessor.py b/fprettify/preprocessor.py deleted file mode 100644 index 3fd5b34..0000000 --- a/fprettify/preprocessor.py +++ /dev/null @@ -1,73 +0,0 @@ -from fprettify.parser import ( - EMPTY_RE, - FYPP_LINE_RE, - OMP_COND_RE, - OMP_DIR_RE, - STATEMENT_LABEL_RE, -) - - -def preprocess_omp(f_line, lines): - """convert omp conditional to normal fortran""" - - is_omp_conditional = bool(OMP_COND_RE.search(f_line)) - if is_omp_conditional: - f_line = OMP_COND_RE.sub(" ", f_line, count=1) - lines = [OMP_COND_RE.sub(" ", l, count=1) for l in lines] - - return [f_line, lines, is_omp_conditional] - - -def preprocess_labels(f_line, lines): - """remove statement labels""" - - match = STATEMENT_LABEL_RE.search(f_line) - if match: - label = match.group(1) - else: - label = "" - - if label: - f_line = STATEMENT_LABEL_RE.sub(len(label) * " ", f_line, count=1) - lines[0] = STATEMENT_LABEL_RE.sub(len(label) * " ", lines[0], count=1) - - return [f_line, lines, label] - - -def preprocess_line(f_line, lines, comments, filename, line_nr, indent_fypp): - """preprocess lines: identification and formatting of special cases""" - is_blank = False - prev_indent = False - do_format = False - - # is_special: special directives that should not be treated as Fortran - # currently supported: fypp preprocessor directives or comments for FORD documentation - is_special = [False] * len(lines) - - for pos, line in enumerate(lines): - line_strip = line.lstrip() - if indent_fypp: - is_special[pos] = line_strip.startswith("!!") or ( - FYPP_LINE_RE.search(line_strip) if pos > 0 else False - ) - else: - is_special[pos] = FYPP_LINE_RE.search(line_strip) or line_strip.startswith( - "!!" - ) - - # if first line is special, all lines should be special - if is_special[0]: - is_special = [True] * len(lines) - - if EMPTY_RE.search(f_line): # empty lines including comment lines - if any(comments): - if lines[0].startswith(" ") and not OMP_DIR_RE.search(lines[0]): - # indent comment lines only if they were not indented before. - prev_indent = True - else: - is_blank = True - lines = [l.strip(" ") if not is_special[n] else l for n, l in enumerate(lines)] - else: - do_format = True - - return [lines, do_format, prev_indent, is_blank, is_special] diff --git a/fprettify/tests/__init__.py b/fprettify/tests/__init__.py index 5c5017a..b3abbd4 100644 --- a/fprettify/tests/__init__.py +++ b/fprettify/tests/__init__.py @@ -32,7 +32,7 @@ from fprettify.constants import FORTRAN_EXTENSIONS from fprettify.exceptions import FprettifyInternalException, FprettifyParseException -from fprettify.formatter import reformat_ffile +from fprettify.formatter import reformat from fprettify.utils import log_exception, set_logger sys.stderr = io.TextIOWrapper( @@ -975,7 +975,7 @@ def test_result(path, info): outstring = io.StringIO() try: - reformat_ffile(infile, outstring) + reformat(infile, outstring) m = hashlib.sha256() m.update(outstring.getvalue().encode("utf-8")) diff --git a/setup.cfg b/setup.cfg index ba5454c..b195772 100644 --- a/setup.cfg +++ b/setup.cfg @@ -35,7 +35,7 @@ install_requires = [options.entry_points] console_scripts = - fprettify = fprettify.__init__:run + fprettify = fprettify.cli:run [options.extras_require] dev =