From c50d925f99a8c4d041487099374458f600161e5e Mon Sep 17 00:00:00 2001
From: Boryana Goncharenko
 <3010723+boryanagoncharenko@users.noreply.github.com>
Date: Tue, 27 Feb 2024 19:13:18 +0200
Subject: [PATCH] Allow -= and >> operators to be used independently when
 merging grammars #5176

---
 grammars/level15-Additions.lark |   2 +-
 grammars/level17-Additions.lark |   2 +-
 grammars/level18-Additions.lark |   3 +-
 grammars/level2-Additions.lark  |   4 +-
 grammars/level3-Additions.lark  |   2 +-
 grammars/level5-Additions.lark  |   2 +-
 grammars/level8-Additions.lark  |   5 +-
 grammars/level9-Additions.lark  |   2 +-
 hedy.py                         |  84 +++++++++++++-------------
 tests/test_grammar_merging.py   | 102 ++++++++++++++++++++++++++++++++
 10 files changed, 156 insertions(+), 52 deletions(-)
 create mode 100644 tests/test_grammar_merging.py

diff --git a/grammars/level15-Additions.lark b/grammars/level15-Additions.lark
index dd714af4556..930c01ca896 100644
--- a/grammars/level15-Additions.lark
+++ b/grammars/level15-Additions.lark
@@ -1,4 +1,4 @@
-command:+= while_loop | ifpressed-= error_ifpressed_missing_else > ifelse | ifs
+command: += while_loop | ifpressed -= error_ifpressed_missing_else >> error_ifelse | ifs
 
 // new : while loop
 while_loop: _WHILE (_conditions) _EOL (_SPACE command) (_EOL _SPACE command)* _EOL? _END_BLOCK
diff --git a/grammars/level17-Additions.lark b/grammars/level17-Additions.lark
index 649bae25547..61f0f5c4cf4 100644
--- a/grammars/level17-Additions.lark
+++ b/grammars/level17-Additions.lark
@@ -1,6 +1,6 @@
 // We add the : at the end of if and else and elif and for/while
 
-command:+= ifpressed (ifpressed_elifs)* ifpressed_elses? | ifs (elifs)* elses? -= ifs elses?
+command: += ifpressed (ifpressed_elifs)* ifpressed_elses? | ifs (elifs)* elses? -= ifs elses?
 
 // These will be handled by the preprocessor step in the merger
 // It will find the appropiate function for this anotation and modify it accordingly
diff --git a/grammars/level18-Additions.lark b/grammars/level18-Additions.lark
index 66ced43ed1d..58c581064b9 100644
--- a/grammars/level18-Additions.lark
+++ b/grammars/level18-Additions.lark
@@ -1,6 +1,7 @@
 // adds round brackets in print() and range(), and changes ask to input
 
-command:+= input | input_empty_brackets | print_empty_brackets -= ask | error_print_no_quotes > assign | call
+command: += input | input_empty_brackets | print_empty_brackets -= ask | error_print_no_quotes >> assign | call
+_if_less_command: -= ask | error_print_no_quotes
 
 print: _PRINT (_LEFT_BRACKET (quoted_text | list_access | print_expression) (_COMMA (quoted_text | list_access | print_expression))* _RIGHT_BRACKET)?
 print_empty_brackets: _PRINT _LEFT_BRACKET _RIGHT_BRACKET
diff --git a/grammars/level2-Additions.lark b/grammars/level2-Additions.lark
index d3f8685296e..88f074be8d2 100644
--- a/grammars/level2-Additions.lark
+++ b/grammars/level2-Additions.lark
@@ -1,5 +1,5 @@
-//add the rules after += remove those after -= and the ones after > should be at the end
-command:+= error_ask_dep_2 | error_ask_missing_variable | assign | error_echo_dep_2 | error_non_decimal | sleep -= echo > error_invalid
+//add the rules after += remove those after -= and the ones after >> should be at the end
+command: += error_ask_dep_2 | error_ask_missing_variable | assign | error_echo_dep_2 | error_non_decimal | sleep -= echo >> error_invalid
 print: _PRINT (_print_argument)?
 _print_argument: (_SPACE | textwithoutspaces)*
 ask: var _IS _ASK ((_SPACE | text)*)?
diff --git a/grammars/level3-Additions.lark b/grammars/level3-Additions.lark
index 68b89579c1b..2533bef1b3b 100644
--- a/grammars/level3-Additions.lark
+++ b/grammars/level3-Additions.lark
@@ -1,4 +1,4 @@
-command:+= assign_list | add | remove | error_add_missing_to | error_remove_missing_from > error_invalid
+command:+= assign_list | add | remove | error_add_missing_to | error_remove_missing_from >> error_invalid
 _print_argument: (_SPACE | (list_access textwithoutspaces?) | textwithoutspaces)*
 ask: var _IS _ASK ((_SPACE | text_ask)*)?
 
diff --git a/grammars/level5-Additions.lark b/grammars/level5-Additions.lark
index f1d7012533c..3052af690ea 100644
--- a/grammars/level5-Additions.lark
+++ b/grammars/level5-Additions.lark
@@ -4,7 +4,7 @@ _empty_program: (_EOL | _SPACE)*
 _non_empty_program: _EOL* (command | error_invalid) _SPACE* (_EOL+ command _SPACE*)* _EOL* //lines may end on spaces and might be separated by many newlines
 
 //placing assign after print means print is will print 'is' and print is Felienne will print 'is Felienne'
-command:+= assign_button | ifpressed_else | error_ifpressed_missing_else | ifelse | ifs -= error_invalid > assign
+command: += assign_button | ifpressed_else | error_ifpressed_missing_else | ifelse | ifs -= error_invalid >> assign
 _if_less_command: print | ask | play | turtle | assign_button | assign_list | add | remove | sleep | error_print_no_quotes | assign
 
 // error_invalid is moved from to command to program, so that command rules have priority over error_invalid
diff --git a/grammars/level8-Additions.lark b/grammars/level8-Additions.lark
index abf65c1d1df..460959fbdb4 100644
--- a/grammars/level8-Additions.lark
+++ b/grammars/level8-Additions.lark
@@ -1,4 +1,4 @@
-command:+= error_repeat_dep_8 | error_ifelse | error_ifpressed_missing_else | ifpressed ifpressed_elses? | ifs elses?-= error_invalid_space | ifelse
+command: += error_repeat_dep_8 | error_ifelse | error_ifpressed_missing_else | ifpressed ifpressed_elses? | ifs elses? -= error_invalid_space | ifelse
 
 repeat.1: _REPEAT (INT | var_access) _TIMES _SPACE? _EOL (_SPACE command) (_EOL _SPACE command)* _EOL? _END_BLOCK
 error_repeat_dep_8: _REPEAT (INT | var_access) _TIMES _SPACE command
@@ -18,7 +18,6 @@ ifpressed_elses: elses
 //'if' cannot be used in Python, hence the name of the rule is 'ifs'
 ifs: _IF condition _EOL (_SPACE command) (_EOL _SPACE command)* _EOL? _END_BLOCK
 
-
 //old 'flat' ifelse:
-error_ifelse : ifelse
+error_ifelse : _if_clause (_SPACE+ _EOL* | _SPACE* _EOL+) _ELSE (_SPACE+ _EOL* | _SPACE* _EOL+) _if_less_command
 
diff --git a/grammars/level9-Additions.lark b/grammars/level9-Additions.lark
index 799a125d974..15a81eb92ed 100644
--- a/grammars/level9-Additions.lark
+++ b/grammars/level9-Additions.lark
@@ -1,3 +1,3 @@
 // only remove repeat error commands
 
-command:+= repeat | error_repeat_no_command-=repeat | error_repeat_no_command | error_repeat_no_print | error_repeat_no_times
+command: -= error_repeat_no_print | error_repeat_no_times
diff --git a/hedy.py b/hedy.py
index e6a57511761..3876e3c3136 100644
--- a/hedy.py
+++ b/hedy.py
@@ -2729,7 +2729,8 @@ def merge_grammars(grammar_text_1, grammar_text_2):
     # rules that are new in the second file are added (remaining_rules_grammar_2)
     merged_grammar = []
 
-    deletables = []   # this list collects rules we no longer need,
+    deletables = []
+    # this list collects rules we no longer need,
     # they will be removed when we encounter them
 
     rules_grammar_1 = grammar_text_1.split('\n')
@@ -2763,8 +2764,7 @@ def merge_grammars(grammar_text_1, grammar_text_2):
                 if definition_1.strip() == definition_2.strip():
                     warn_message = f"The rule {name_1} is duplicated: {definition_1} and {definition_2}. Please check!"
                     warnings.warn(warn_message)
-                # Used to compute the rules that use the merge operators in the grammar
-                # namely +=, -= and >
+                # Used to compute the rules that use the merge operators in the grammar, namely +=, -= and >>
                 new_rule, new_deletables = merge_rules_operator(definition_1, definition_2, name_1, line_2_processed)
                 if new_deletables:
                     deletables += new_deletables
@@ -2789,53 +2789,55 @@ def merge_grammars(grammar_text_1, grammar_text_2):
     return '\n'.join(rules_to_keep)
 
 
-def merge_rules_operator(prev_definition, new_definition, name, complete_line):
-    # Check if the rule is adding or substracting new rules
-    has_add_op = new_definition.startswith('+=')
-    has_remove_op = has_add_op and '-=' in new_definition
-    has_last_op = has_add_op and '>' in new_definition
-    deletables = None
-    if has_remove_op:
-        # Get the rules we need to substract
-        part_list = new_definition.split('-=')
-        add_list, commands_after_minus = (part_list[0], part_list[1]) if has_remove_op else (part_list[0], '')
-        add_list = add_list[3:]
-
-        # Get the rules that need to be last
-        split_on_greater_than = commands_after_minus.split('>')
-        commands_to_be_removed, last_list = (
-            split_on_greater_than[0], split_on_greater_than[1]) if has_last_op else (split_on_greater_than[0], '')
-        commands_after_minus = commands_to_be_removed + '|' + last_list
-        result_cmd_list = get_remaining_rules(prev_definition, commands_after_minus)
-        deletables = commands_to_be_removed.strip().split('|')
-    elif has_add_op:
-        # Get the rules that need to be last
-        part_list = new_definition.split('>')
-        add_list, commands_after_minus = (part_list[0], part_list[1]) if has_last_op else (part_list[0], '')
-        add_list = add_list[3:]
-        last_list = commands_after_minus
-        result_cmd_list = get_remaining_rules(prev_definition, commands_after_minus)
-    else:
-        result_cmd_list = prev_definition
+ADD_GRAMMAR_MERGE_OP = '+='
+REMOVE_GRAMMAR_MERGE_OP = '-='
+LAST_GRAMMAR_MERGE_OP = '>>'
+GRAMMAR_MERGE_OPERATORS = [ADD_GRAMMAR_MERGE_OP, REMOVE_GRAMMAR_MERGE_OP, LAST_GRAMMAR_MERGE_OP]
 
-    if has_last_op:
-        new_rule = f"{name}: {result_cmd_list} | {add_list} | {last_list}"
-    elif has_add_op:
-        new_rule = f"{name}: {result_cmd_list} | {add_list}"
-    else:
-        new_rule = complete_line
 
-    return new_rule, deletables
+def merge_rules_operator(prev_definition, new_definition, name, complete_line):
+    op_to_arg = get_operator_to_argument(new_definition)
+
+    add_arg = op_to_arg.get(ADD_GRAMMAR_MERGE_OP, '')
+    remove_arg = op_to_arg.get(REMOVE_GRAMMAR_MERGE_OP, '')
+    last_arg = op_to_arg.get(LAST_GRAMMAR_MERGE_OP, '')
+    remaining_commands = get_remaining_rules(prev_definition, remove_arg, last_arg)
+    ordered_commands = split_rule(remaining_commands, add_arg, last_arg)
+
+    new_rule = f"{name}: {' | '.join(ordered_commands)}" if bool(op_to_arg) else complete_line
+    deletable = split_rule(remove_arg)
+    return new_rule, deletable
+
+
+def get_operator_to_argument(definition):
+    # Creates a map of all used operators and their respective arguments e.g. {'+=': 'print | play', '>>': 'echo'}
+    operator_to_index = [(op, definition.find(op)) for op in GRAMMAR_MERGE_OPERATORS if op in definition]
+    result = {}
+    for i, (op, index) in enumerate(operator_to_index):
+        start_index = index + len(op)
+        if i + 1 < len(operator_to_index):
+            _, next_index = operator_to_index[i + 1]
+            result[op] = definition[start_index:next_index].strip()
+        else:
+            result[op] = definition[start_index:].strip()
+    return result
 
 
-def get_remaining_rules(orig_def, sub_def):
-    original_commands = [command.strip() for command in orig_def.split('|')]
-    commands_after_minus = [command.strip() for command in sub_def.split('|')]
+def get_remaining_rules(orig_def, *sub_def):
+    original_commands = split_rule(orig_def)
+    commands_after_minus = split_rule(*sub_def)
+    misses = [c for c in commands_after_minus if c not in original_commands]
+    if misses:
+        raise Exception(f"Command(s) {'|'.join(misses)} do not exist in the previous definition")
     remaining_commands = [cmd for cmd in original_commands if cmd not in commands_after_minus]
     remaining_commands = ' | '.join(remaining_commands)  # turn the result list into a string
     return remaining_commands
 
 
+def split_rule(*rules):
+    return [c.strip() for rule in rules for c in rule.split('|') if c.strip() != '']
+
+
 # this is only a couple of MB in total, safe to cache
 @cache
 def create_grammar(level, lang, skip_faulty):
diff --git a/tests/test_grammar_merging.py b/tests/test_grammar_merging.py
new file mode 100644
index 00000000000..4e1fa46f504
--- /dev/null
+++ b/tests/test_grammar_merging.py
@@ -0,0 +1,102 @@
+import unittest
+from hedy import merge_rules_operator
+
+
+class TestGrammarMerging(unittest.TestCase):
+
+    def test_rule_merging_without_operators(self):
+        prev_definition = ' _PRINT (text)?'
+        new_definition = ' _PRINT (_print_argument)?'
+        complete_line = f'print {new_definition}'
+
+        result, deletable = merge_rules_operator(prev_definition, new_definition, 'print', complete_line)
+
+        self.assertEqual(complete_line, result)
+        self.assertEqual([], deletable)
+
+    def test_rule_merging_with_greater_than_sign(self):
+        prev_definition = ' /([^\\n#ـ])([^\\n#]*)/ -> text //comment'
+        new_definition = ' /([^\\n#]+)/ -> text'
+        complete_line = 'text: /([^\\n#]+)/ -> text'
+
+        result, deletable = merge_rules_operator(prev_definition, new_definition, 'text', complete_line)
+
+        self.assertEqual(complete_line, result)
+        self.assertEqual([], deletable)
+
+    def test_rule_merging_add(self):
+        prev_definition = 'print'
+        new_definition = '+= assign | sleep'
+
+        result, deletable = merge_rules_operator(prev_definition, new_definition, 'command', '')
+
+        self.assertEqual('command: print | assign | sleep', result)
+        self.assertEqual([], deletable)
+
+    def test_rule_merging_add_and_remove(self):
+        prev_definition = 'print | play | test '
+        new_definition = '+= assign | sleep -= play | print'
+
+        result, deletable = merge_rules_operator(prev_definition, new_definition, 'command', '')
+
+        self.assertEqual('command: test | assign | sleep', result)
+        self.assertEqual(['play', 'print'], deletable)
+
+    def test_rule_merging_add_and_last(self):
+        prev_definition = 'print | play | test '
+        new_definition = '+= assign | sleep >> play | print'
+
+        result, deletable = merge_rules_operator(prev_definition, new_definition, 'command', '')
+
+        self.assertEqual('command: test | assign | sleep | play | print', result)
+        self.assertEqual([], deletable)
+
+    def test_rule_merging_add_remove_and_last(self):
+        prev_definition = ' print | error | echo | test '
+        new_definition = '+= assign | sleep -= echo | print >> test | error'
+
+        result, deletable = merge_rules_operator(prev_definition, new_definition, 'command', new_definition)
+
+        self.assertEqual('command: assign | sleep | test | error', result)
+        self.assertEqual(['echo', 'print'], deletable)
+
+    def test_rule_merging_remove(self):
+        prev_definition = 'print | test | assign'
+        new_definition = '-= assign | print'
+
+        result, deletable = merge_rules_operator(prev_definition, new_definition, 'command', '')
+
+        self.assertEqual('command: test', result)
+        self.assertEqual(['assign', 'print'], deletable)
+
+    def test_rule_merging_remove_non_existent_gives_error(self):
+        prev_definition = 'print | test | assign'
+        new_definition = '-= prin'
+
+        with self.assertRaises(Exception):
+            merge_rules_operator(prev_definition, new_definition, 'command', '')
+
+    def test_rule_merging_remove_and_last(self):
+        prev_definition = 'play | print | test | assign'
+        new_definition = '-= assign | print >> play'
+
+        result, deletable = merge_rules_operator(prev_definition, new_definition, 'command', '')
+
+        self.assertEqual('command: test | play', result)
+        self.assertEqual(['assign', 'print'], deletable)
+
+    def test_rule_merging_last(self):
+        prev_definition = 'print | test | assign | play'
+        new_definition = '>> print | play'
+
+        result, deletable = merge_rules_operator(prev_definition, new_definition, 'command', '')
+
+        self.assertEqual('command: test | assign | print | play', result)
+        self.assertEqual([], deletable)
+
+    def test_rule_merging_last_non_existent_gives_error(self):
+        prev_definition = 'print | test | assign'
+        new_definition = '-= prin'
+
+        with self.assertRaises(Exception):
+            merge_rules_operator(prev_definition, new_definition, 'command', '')