reasoning-gym/reasoning_gym/code/contrib/bfit/Compiler/FunctionCompiler.py

from collections import namedtuple
from functools import reduce

from .Exceptions import BFSemanticError, BFSyntaxError
from .Functions import check_function_exists, get_function_object
from .General import (
    get_literal_token_value,
    get_move_to_return_value_cell_code,
    get_print_string_code,
    get_variable_dimensions_from_token,
    get_variable_from_ID_token,
    is_token_literal,
    process_switch_cases,
)
from .Globals import create_variable_from_definition, get_global_variables, get_variable_size, is_variable_array
from .Node import (
    NodeArrayAssignment,
    NodeArrayGetElement,
    NodeArraySetElement,
    NodeFunctionCall,
    NodeTernary,
    NodeToken,
    NodeUnaryPostfix,
    NodeUnaryPrefix,
)
from .Parser import Parser
from .Token import Token

"""
This file implements the FunctionCompiler object
This is where we actually compile the code - statements, assignments, calculations, etc
The syntax of the language is defined here as compilation rules

A function is position-dependent - it needs to know where on the tape it runs
So that it can access global variables, which are at the beginning of the stack, correctly
Because of that, the function's code is dependent on when/where we call it
The idea is that we compile the function on demand - every time it is called
And every time we compile it - we pass the current stack pointer to it
This is implemented in the get_code() function

The FunctionCompiler object holds tokens that correspond to the function so that we can compile it on demand
"""


class FunctionCompiler:
    def __init__(self, name, tokens):
        self.name = name
        self.tokens = tokens
        self.parser = Parser(self.tokens)
        self.ids_map_list = list()
        self.type = None
        self.parameters = None
        self.process_function_definition()  # sets type and parameters
        self.return_value_cell = None  # will be set on every call to this function

    """
    ids_map_list is a list of named tuples. Each tuple represents a scope, and holds 2 items:
        1. an index of the next available cell. (if we want to insert a new ID to the ids_map_list, it will be in that index)
        2. a dictionary that maps an ID (string) to an index - the cell where we hold that variable

    We use this list as a stack:
        when entering a scope, we insert a (available_cell, dictionary) to the BEGINNING of the list
        when exiting a scope, we pop the last inserted tuple (the one at the BEGINNING of the list)

    When declaring a variable in the current scope, we add it to the dictionary at the beginning of the list,
    and increase the 'next_available_cell' by 1
    When retrieving a variable, we go through the list and return the first occurrence that matches the ID
    """

    def process_function_definition(self):
        # sets function type and parameters, advances parser

        function_return_type = self.parser.current_token()
        self.parser.advance_token()  # skip return type
        function_name = self.parser.current_token().data
        assert function_name == self.name
        self.parser.advance_token()  # skip ID
        parameters = self.get_function_parameters_declaration()
        # parser now points to LBRACE = beginning of function scope

        self.type = function_return_type
        self.parameters = parameters

    def get_code(self, current_stack_pointer):
        """
        layout:
                    current_stack_pointer -------
                                                |
                                                v
        global1 global2 unknown1 unknown2 my_return_value param1 param2 local1 local2

        current_stack_pointer is current next available cell
        which is the value of the caller's current_stack_pointer plus this function's parameters
        create ids map for global variables
        make room for return_value
        """
        self.insert_global_variables_to_function_scope()

        # self.current_stack_pointer is now equal to the size of the global variables plus 1 (next_available_cell)
        # new stack pointer should be at least that size
        assert self.current_stack_pointer() <= current_stack_pointer
        self.return_value_cell = current_stack_pointer
        self.set_stack_pointer(
            current_stack_pointer + 1
        )  # make room for return_value cell. next available cell is the next one after it.
        function_code = self.compile_function_scope(self.parameters)
        self.remove_ids_map()  # Global variables
        return function_code

    # =================
    # helper functions
    # =================
    def insert_global_variables_to_function_scope(self):
        self.add_ids_map()
        for variable in get_global_variables():
            self.insert_to_ids_map(variable)

    def get_array_index_expression(self):
        """
        the idea - address the multi-dimensional array as a one-dimensional array
        calculate the appropriate index in the one-dimensional array
        by multiplying the index in each dimension by its size (i.e the multiplication of all sizes of the following dimensions)
        and then using the NodeArrayGetElement/NodeArraySetElement class which gets an element in a one-dimensional array

        in order to do that, we need to create our own sub-tree of multiplications,
        and pass it as the "index expression"

        e.g if the array is: arr[10][5][2] and we want to get arr[4][3][1]
        then we want to calculate index = (4*(5*2) + 3*(2) + 1)
        """
        ID_token = self.parser.current_token()
        self.parser.advance_token(2)  # skip ID, LBRACK
        first_index_expression = index_expression = self.expression()  # first dimension
        self.parser.check_current_token_is(Token.RBRACK)
        self.parser.advance_token()  # skip RBRACK

        # now handle the next dimensions (if multi-dimensional array)
        dimensions = get_variable_dimensions_from_token(self.ids_map_list, ID_token)
        if len(dimensions) > 1:
            multiply_token = Token(Token.BINOP, ID_token.line, ID_token.column, data="*")
            add_token = Token(Token.BINOP, ID_token.line, ID_token.column, data="+")

            # multiply by next dimensions sizes
            multiply_amount = reduce(lambda x, y: x * y, dimensions[1:])  # size of the following dimensions
            node_token_multiply_amount = NodeToken(
                self.ids_map_list, token=Token(Token.NUM, ID_token.line, ID_token.column, data=str(multiply_amount))
            )
            index_expression = NodeToken(
                self.ids_map_list, token=multiply_token, left=first_index_expression, right=node_token_multiply_amount
            )

            # handle next dimensions
            dimension = 1
            while dimension < len(dimensions):
                if self.parser.current_token().type != Token.LBRACK:  # too few indexes given...
                    if dimension == 1:
                        return first_index_expression  # allow use of only one dimension for multi-dimensional array
                    raise BFSemanticError(
                        "%s is a %s-dimensional array, but only %s dimension(s) given as index"
                        % (str(ID_token), len(dimensions), dimension)
                    )
                self.parser.check_current_token_is(Token.LBRACK)
                self.parser.advance_token()  # skip LBRACK
                exp = self.expression()

                self.parser.check_current_token_is(Token.RBRACK)
                self.parser.advance_token()  # skip RBRACK

                # current_dimension_index *= size_of_following_dimensions
                if dimension + 1 < len(dimensions):  # not last dimension - need to multiply and add
                    multiply_amount = reduce(
                        lambda x, y: x * y, dimensions[dimension + 1 :]
                    )  # size of the following dimensions
                    node_token_multiply_amount = NodeToken(
                        self.ids_map_list,
                        token=Token(Token.NUM, ID_token.line, ID_token.column, data=str(multiply_amount)),
                    )
                    multiply_node = NodeToken(
                        self.ids_map_list, token=multiply_token, left=exp, right=node_token_multiply_amount
                    )

                    # prev_dimensions_index += current_dimension_index
                    index_expression = NodeToken(
                        self.ids_map_list, token=add_token, left=index_expression, right=multiply_node
                    )
                else:  # last dimension - no need to multiply, just add
                    index_expression = NodeToken(self.ids_map_list, token=add_token, left=index_expression, right=exp)
                dimension += 1

        if self.parser.current_token().type == Token.LBRACK:  # too many indexes given...
            raise BFSemanticError(
                "%s is a %s-dimensional array. Unexpected %s"
                % (str(ID_token), len(dimensions), self.parser.current_token())
            )
        return index_expression

    def get_token_after_array_access(self, offset=0):
        # in case we have: "ID[a][b][c]...[z] next_token", return "next_token"
        idx = self.parser.current_token_index + offset
        self.parser.check_next_tokens_are([Token.ID, Token.LBRACK], starting_index=idx - 1)
        idx += 1  # point to LBRACK
        while self.parser.token_at_index(idx).type == Token.LBRACK:
            idx = self.parser.find_matching(idx)  # point to RBRACK
            idx += 1  # advance to one after the RBRACK

        return self.parser.token_at_index(idx)

    def compile_array_assignment(self, token_id):
        # int id[a][b][c]... = {1, 2, 3, ...};
        # or int id[a][b][c]... = "\1\2\3...";
        # or int id[a][b][c]... = {{1, 2}, {3, 4}, ...};
        # or array assignment: id = {1, 2, 3, ...};
        self.parser.check_current_token_is(Token.ASSIGN)
        if self.parser.current_token().data != "=":
            raise BFSyntaxError("Unexpected %s when assigning array. Expected ASSIGN (=)" % self.parser.current_token())

        if self.parser.next_token().type not in [Token.LBRACE, Token.STRING]:
            raise BFSyntaxError("Expected LBRACE or STRING at '%s'" % self.parser.next_token())

        self.parser.advance_token()  # skip to LBRACE or STRING
        literal_tokens_list = self.parser.compile_array_initialization_list()

        return NodeArrayAssignment(self.ids_map_list, token_id, literal_tokens_list)

    def compile_variable_declaration(self):
        self.parser.check_next_token_is(Token.ID)
        self.parser.advance_token()  # skip "INT" (now points to ID)
        assert self.parser.current_token().type == Token.ID

        if self.parser.next_token().type == Token.SEMICOLON:  # INT ID SEMICOLON
            self.parser.advance_token(2)  # skip ID SEMICOLON
            return (
                ""  # no code is generated here. code was generated for defining this variable when we entered the scope
            )

        elif (
            self.parser.next_token().type == Token.ASSIGN and self.parser.next_token().data == "="
        ):  # INT ID = EXPRESSION SEMICOLON
            return self.compile_expression_as_statement()  # compile_expression_as_statement skips the SEMICOLON

        elif (
            self.parser.next_token().type == Token.LBRACK
        ):  # INT ID (LBRACK NUM RBRACK)+ (= ARRAY_INITIALIZATION)? SEMICOLON
            # array definition (int arr[2][3]...[];) or array definition and initialization (arr[2][3]...[] = {...};)
            token_id = self.parser.current_token()
            self.parser.advance_token()  # skip ID
            while self.parser.current_token().type == Token.LBRACK:  # loop to skip to after last RBRACK ]
                self.parser.check_current_tokens_are([Token.LBRACK, Token.NUM, Token.RBRACK])
                self.parser.advance_token(3)  # skip LBRACK, NUM, RBRACK

            if self.parser.current_token().type == Token.ASSIGN:  # initialization
                initialization_node = self.compile_array_assignment(token_id)
                code = initialization_node.get_code(self.current_stack_pointer()) + "<"  # discard expression value
            else:
                code = ""  # just array definition
                # no code is generated here. code was generated for defining this variable when we entered the scope
            self.parser.check_current_token_is(Token.SEMICOLON)
            self.parser.advance_token()  # skip SEMICOLON
            return code
        else:
            raise BFSyntaxError("Unexpected %s after %s" % (self.parser.next_token(), self.parser.current_token()))

    def add_ids_map(self):
        """
        the first cells are global variable cells (index 0 to n)
        the next cell (index n+1) is the return_value cell
        every function assumes that these cells exist
        """

        next_available_cell = 0 if len(self.ids_map_list) == 0 else self.ids_map_list[0].next_available_cell

        ids_map = namedtuple("ids_map", ["next_available_cell", "IDs_dict"])
        ids_map.next_available_cell = next_available_cell
        ids_map.IDs_dict = dict()

        self.ids_map_list.insert(0, ids_map)

    def remove_ids_map(self):
        self.ids_map_list.pop(0)

    def insert_to_ids_map(self, variable):
        ids_map = self.ids_map_list[0]

        self.check_id_doesnt_exist(variable.name)

        variable.cell_index = ids_map.next_available_cell
        ids_map.next_available_cell += get_variable_size(variable)
        ids_map.IDs_dict[variable.name] = variable

    def reserve_cell_in_ids_map(self):
        """
        reserve cell by increasing the "pointer" of the next available cell
        this is used for making room for return_value cell
        """
        ids_map = self.ids_map_list[0]
        ids_map.next_available_cell += 1

    def variables_dict_size(self, variables_dict_index):
        variables_dict = self.ids_map_list[variables_dict_index].IDs_dict

        size = 0
        for variable in variables_dict.values():
            size += get_variable_size(variable)

        return size

    def size_of_variables_current_scope(self):
        return self.variables_dict_size(0)

    def size_of_global_variables(self):
        return self.variables_dict_size(-1)

    def increase_stack_pointer(self, amount=1):
        # sometimes it is needed to increase the stack pointer
        # for example, when compiling "if ... else ...", we need 2 temporary cells before the inner scope code of both the if and the else
        # another example - when evaluating expression list in function call, each expression is evaluated while pointing to a different cell
        # therefore, it is needed to "update" the stack pointer to represent the new pointer
        self.ids_map_list[0].next_available_cell += amount

    def decrease_stack_pointer(self, amount=1):
        self.ids_map_list[0].next_available_cell -= amount

    def set_stack_pointer(self, new_value):
        assert new_value >= self.ids_map_list[0].next_available_cell
        self.ids_map_list[0].next_available_cell = new_value

    def current_stack_pointer(self):
        return self.ids_map_list[0].next_available_cell

    def insert_scope_variables_into_ids_map(self):
        # go through all the variable definitions in this scope (not including sub-scopes), and add them to the ids map
        # move the pointer to the next available cell (the one after the last variable declared in this scope)

        assert self.parser.current_token().type == Token.LBRACE
        self.parser.advance_token()

        i = self.parser.current_token_index
        while i < len(self.tokens):
            token = self.tokens[i]

            if token.type == Token.INT:
                if (
                    self.tokens[i - 2].type != Token.FOR
                ):  # if it is not a definition inside a FOR statement (for (int i = 0...))
                    variable = create_variable_from_definition(self.parser, index=i)
                    self.insert_to_ids_map(variable)

            elif token.type == Token.LBRACE:
                i = self.parser.find_matching(starting_index=i)

            elif token.type == Token.RBRACE:
                break  # we have reached the end of the scope

            i += 1

        return ">" * self.size_of_variables_current_scope()  # advance pointer to the next available cell

    def enter_scope(self):
        # create an ids map to the current scope, and then inserts the scope variables into it
        self.add_ids_map()
        return self.insert_scope_variables_into_ids_map()

    def exit_scope(self):
        # remove the ids map of the current scope
        # return pointer to the previous scope's next available cell
        code = "<" * self.size_of_variables_current_scope()
        self.remove_ids_map()
        return code

    def enter_function_scope(self, parameters):
        # make room for return_value cell
        # create an ids map to the current function scope
        # insert parameters into the ids map
        # insert scope variables into the ids map

        self.add_ids_map()
        for parameter in parameters:
            self.insert_to_ids_map(parameter)

        code = ">"  # skip return_value_cell
        code += self.insert_scope_variables_into_ids_map()
        # this inserts scope variables AND moves pointer right, with the amount of BOTH parameters and scope variables

        return code

    def check_id_doesnt_exist(self, ID):
        # make sure that the id does not exist in the current scope
        # used when defining a variable
        if ID in self.ids_map_list[0].IDs_dict:
            raise BFSemanticError("ID %s is already defined" % ID)

    # =================
    # compilation rules
    # =================

    # expression
    def function_call(self):
        # function_call: ID LPAREN expression_list RPAREN
        # returns NodeFunctionCall
        assert self.parser.current_token().type == Token.ID

        function_token = self.parser.current_token()
        function_name = function_token.data
        self.parser.advance_token()  # skip ID

        if function_name == self.name:
            raise BFSemanticError("No support for recursion yet :(.... in function call '%s'" % str(function_token))

        parameters = self.compile_expression_list()

        check_function_exists(function_token, len(parameters))
        function_to_call = get_function_object(function_name)

        return NodeFunctionCall(self.ids_map_list, function_to_call, parameters)

    def literal(self):
        # literal: NUM | CHAR | ID | ID (LBRACK expression RBRACK)+ | TRUE | FALSE | function_call | ( expression )

        token = self.parser.current_token()

        if token.type == Token.ID and self.parser.next_token().type == Token.LPAREN:
            return self.function_call()

        if (
            token.type == Token.ID and self.parser.next_token().type == Token.LBRACK
        ):  # array - ID(LBRACK expression RBRACK)+
            index_expression = self.get_array_index_expression()
            return NodeArrayGetElement(self.ids_map_list, token, index_expression)

        if is_token_literal(token) or token.type == Token.ID:
            self.parser.advance_token()
            return NodeToken(self.ids_map_list, token=token)

        if token.type != Token.LPAREN:
            raise BFSyntaxError(
                "Unexpected '%s'. expected literal (NUM | ID | ID(LBRACK expression RBRACK)+ | TRUE | FALSE | function_call | ( expression ))"
                % str(token)
            )

        # ( expression )
        self.parser.check_current_token_is(Token.LPAREN)
        self.parser.advance_token()  # skip LPAREN
        exp = self.expression()
        self.parser.check_current_token_is(Token.RPAREN)
        self.parser.advance_token()  # skip RPAREN

        return exp

    def unary_postfix(self):
        # unary_postfix: literal ( ++ | -- | UNARY_MULTIPLICATIVE)?

        literal = self.literal()
        token = self.parser.current_token()

        if token.type in [Token.INCREMENT, Token.DECREMENT, Token.UNARY_MULTIPLICATIVE]:
            self.parser.advance_token()
            new_node = NodeUnaryPostfix(self.ids_map_list, operation=token, literal=literal)
            return new_node
        else:
            return literal

    def unary_prefix(self):
        # unary_prefix:  ( (!|+|-)* unary_prefix ) | ( ( ++ | -- | UNARY_MULTIPLICATIVE | ~ ) literal ) | unary_postfix

        token = self.parser.current_token()

        if token.type in [Token.NOT, Token.BITWISE_NOT, Token.BINOP]:
            if token.type == Token.BINOP and token.data not in ["+", "-"]:
                raise BFSyntaxError(
                    "Expected either + or - as unary prefix instead of token %s" % self.parser.current_token()
                )
            self.parser.advance_token()
            unary_prefix = self.unary_prefix()

            new_node = NodeUnaryPrefix(self.ids_map_list, operation=token, literal=unary_prefix)
            return new_node

        elif token.type in [Token.INCREMENT, Token.DECREMENT, Token.UNARY_MULTIPLICATIVE]:
            self.parser.advance_token()
            literal = self.literal()

            new_node = NodeUnaryPrefix(self.ids_map_list, operation=token, literal=literal)
            return new_node

        else:
            return self.unary_postfix()

    def multiplicative(self):
        # multiplicative: unary_prefix ((MUL|DIV|MOD) unary_prefix)*

        n = self.unary_prefix()

        token = self.parser.current_token()
        while token is not None and token.type == Token.BINOP and token.data in ["*", "/", "%"]:
            self.parser.advance_token()
            next_factor = self.unary_prefix()

            new_node = NodeToken(self.ids_map_list, token=token, left=n, right=next_factor)
            n = new_node

            token = self.parser.current_token()

        return n

    def additive(self):
        # additive: multiplicative ((PLUS|MINUS) multiplicative)*

        n = self.multiplicative()

        token = self.parser.current_token()
        while token is not None and token.type == Token.BINOP and token.data in ["+", "-"]:
            self.parser.advance_token()
            next_term = self.multiplicative()

            new_node = NodeToken(self.ids_map_list, token=token, left=n, right=next_term)
            n = new_node

            token = self.parser.current_token()

        return n

    def shift(self):
        # shift: additive (<<|>> additive)*

        n = self.additive()

        token = self.parser.current_token()
        while token is not None and token.type == Token.BITWISE_SHIFT:
            self.parser.advance_token()
            next_additive = self.additive()

            new_node = NodeToken(self.ids_map_list, token=token, left=n, right=next_additive)
            n = new_node

            token = self.parser.current_token()

        return n

    def relational(self):
        # relational: shift (==|!=|<|>|<=|>= shift)?

        a = self.shift()

        token = self.parser.current_token()
        if token.type != Token.RELOP:  # just an arithmetic expression
            return a

        self.parser.advance_token()
        b = self.shift()

        new_node = NodeToken(self.ids_map_list, token=token, left=a, right=b)
        return new_node

    def bitwise_and(self):
        # bitwise_and: relational (& relational)*

        n = self.relational()

        token = self.parser.current_token()
        while token is not None and token.type == Token.BITWISE_AND:
            self.parser.advance_token()
            next_relational = self.relational()

            new_node = NodeToken(self.ids_map_list, token=token, left=n, right=next_relational)
            n = new_node

            token = self.parser.current_token()

        return n

    def bitwise_xor(self):
        # bitwise_xor: bitwise_and (| bitwise_and)*

        n = self.bitwise_and()

        token = self.parser.current_token()
        while token is not None and token.type == Token.BITWISE_XOR:
            self.parser.advance_token()
            next_bitwise_and = self.bitwise_and()

            new_node = NodeToken(self.ids_map_list, token=token, left=n, right=next_bitwise_and)
            n = new_node

            token = self.parser.current_token()

        return n

    def bitwise_or(self):
        # bitwise_or: bitwise_xor (| bitwise_xor)*

        n = self.bitwise_xor()

        token = self.parser.current_token()
        while token is not None and token.type == Token.BITWISE_OR:
            self.parser.advance_token()
            next_bitwise_xor = self.bitwise_xor()

            new_node = NodeToken(self.ids_map_list, token=token, left=n, right=next_bitwise_xor)
            n = new_node

            token = self.parser.current_token()

        return n

    def logical_and(self):
        # logical_and: bitwise_or (&& bitwise_or)*

        n = self.bitwise_or()

        token = self.parser.current_token()
        while token is not None and token.type == Token.AND:
            self.parser.advance_token()
            next_bitwise_or = self.bitwise_or()

            new_node = NodeToken(self.ids_map_list, token=token, left=n, right=next_bitwise_or)
            n = new_node

            token = self.parser.current_token()

        return n

    def logical_or(self):
        # logical_or: logical_and (|| logical_and)*

        n = self.logical_and()

        token = self.parser.current_token()
        while token is not None and token.type == Token.OR:
            self.parser.advance_token()
            next_and = self.logical_and()

            new_node = NodeToken(self.ids_map_list, token=token, left=n, right=next_and)
            n = new_node

            token = self.parser.current_token()

        return n

    def ternary_expression(self):
        # ternary_expression: logical_or (? expression : ternary_expression)?
        n = self.logical_or()
        if self.parser.current_token().type != Token.TERNARY:
            return n

        self.parser.advance_token()  # skip ?
        node_true = self.expression()
        self.parser.check_current_token_is(Token.COLON)
        self.parser.advance_token()  # skip :
        node_false = self.ternary_expression()
        return NodeTernary(self.ids_map_list, n, node_true, node_false)

    def assignment(self):
        # assignment: ID ASSIGN expression | ID ASSIGN ARRAY_INITIALIZATION | ID (LBRACK expression RBRACK)+ ASSIGN expression | ternary_expression

        if self.parser.current_token().type == Token.ID and self.parser.next_token().type == Token.ASSIGN:

            if self.parser.next_token(2).type in [Token.LBRACE, Token.STRING]:  # ID ASSIGN ARRAY_INITIALIZATION
                token_ID = self.parser.current_token()
                self.parser.advance_token()  # skip ID
                variable_ID = get_variable_from_ID_token(self.ids_map_list, token_ID)
                if not is_variable_array(variable_ID):
                    raise BFSemanticError("Trying to assign array to non-array variable %s" % token_ID)
                return self.compile_array_assignment(token_ID)

            # ID ASSIGN expression
            id_token = self.parser.current_token()
            assign_token = self.parser.next_token()
            self.parser.advance_token(amount=2)  # skip ID ASSIGN

            expression_node = self.expression()

            new_node = NodeToken(
                self.ids_map_list,
                left=NodeToken(self.ids_map_list, token=id_token),
                token=assign_token,
                right=expression_node,
            )
            return new_node

        elif (
            self.parser.current_token().type == Token.ID
            and self.parser.next_token().type == Token.LBRACK
            and self.get_token_after_array_access().type == Token.ASSIGN
        ):
            # ID (LBRACK expression RBRACK)+ ASSIGN value_expression
            id_token = self.parser.current_token()
            index_expression = self.get_array_index_expression()
            self.parser.check_current_token_is(Token.ASSIGN)
            assign_token = self.parser.current_token()
            self.parser.advance_token()  # skip ASSIGN
            value_expression = self.expression()

            return NodeArraySetElement(self.ids_map_list, id_token, index_expression, assign_token, value_expression)
        else:
            return self.ternary_expression()

    def expression(self):
        # expression: assignment
        return self.assignment()

    def compile_expression(self):
        # parses mathematical expressions (+-*/ ())
        # increments/decrements (++, --)
        # relative operations (==, !=, <, >, <=, >=)
        # bitwise operations (|, &, ^, ~)
        # logical operations (!, &&, ||, ~)
        # ternary expression (?)
        # assignment (=, +=, -=, *=, /=, %=, <<=, >>=, &=, |=, ^=)
        # this is implemented using a Node class that represents a parse tree

        """
        (used reference: https://introcs.cs.princeton.edu/java/11precedence/)
        order of operations (lowest precedence to highest precedence)
            assignment (=, +=, -=, *=, /=, %=, <<=, >>=, &=, |=, ^=)
            ternary_expression (?)
            logical_or (||)
            logical_and (&&)
            bitwise_or (|)
            bitwise_xor (^)
            bitwise_and (&)
            bitwise_not (~)
            relational (==|!=|<|>|<=|>=)
            shift (<<|>>)
            additive (+-)
            multiplicative (*/%)
            unary_prefix (!, ++, --, ~)
            unary_postfix (++, --)

        expression: assignment
        assignment: ID (=|+=|-=|*=|/=|%=|<<=|>>=|&=|(|=)|^=) expression | ternary_expression
        ternary_expression: logical_or (? expression : ternary_expression)?
        logical_or: logical_and (|| logical_and)*
        logical_and: bitwise_or (&& bitwise_or)*
        bitwise_or: bitwise_xor (| bitwise_xor)*
        bitwise_xor: bitwise_and (^ bitwise_and)*
        bitwise_and: relational (& relational)*
        relational: shift (==|!=|<|>|<=|>= shift)?
        shift: additive ((<<|>>) additive)*
        additive: multiplicative ((PLUS|MINUS) multiplicative)*
        multiplicative: unary_prefix ((MUL|DIV|MOD) unary_prefix)*
        unary_prefix:  ( (!|+|-)* unary_prefix ) | ( ( ++ | -- | ~ ) literal ) | unary_postfix
        unary_postfix: literal ( ++ | -- )?
        literal: NUM | CHAR | ID | ID[expression] | TRUE | FALSE | function_call | ( expression )
        """

        parse_tree = self.expression()
        expression_code = parse_tree.get_code(self.current_stack_pointer())
        return expression_code

    # functions-related
    def get_function_parameters_declaration(self):
        # parameters declaration: LPAREN (int ID (LBRACK NUM RBRACK)? (COMMA int ID)*)? RPAREN
        # return list of parameters (named tuples (type, ID)) at the same order as declared

        assert self.parser.current_token().type == Token.LPAREN
        self.parser.advance_token()

        res = list()

        token = self.parser.current_token()
        while token.type != Token.RPAREN:
            if token.type != Token.INT:
                raise BFSemanticError("Only int type is supported as a function parameter, and not '%s'" % str(token))

            parameter = create_variable_from_definition(self.parser, advance_tokens=True)
            res.append(parameter)

            if self.parser.current_token().type == Token.COMMA:
                self.parser.advance_token()
            else:
                self.parser.check_current_token_is(Token.RPAREN)

            token = self.parser.current_token()

        self.parser.advance_token()  # skip RPAREN
        return res

    def compile_expression_list(self):
        # expression_list: ( expression (COMMA expression)* )?
        # returns a list of Nodes - one node for each expression
        assert self.parser.current_token().type == Token.LPAREN
        self.parser.advance_token()

        expressions = list()

        token = self.parser.current_token()
        while token.type != Token.RPAREN:
            expressions.append(self.expression())

            if self.parser.current_token().type == Token.COMMA:
                self.parser.advance_token()
            else:
                self.parser.check_current_token_is(Token.RPAREN)
            token = self.parser.current_token()

        self.parser.advance_token()  # skip RPAREN
        return expressions

    def compile_return(self):
        # this assumes that the return is the last statement in the function

        self.parser.advance_token()  # skip return
        if self.parser.current_token().type == Token.SEMICOLON:
            # return;
            self.parser.advance_token()  # skip ;
            return ""  # nothing to do

        # return exp;
        expression_code = self.compile_expression()
        self.parser.check_current_token_is(Token.SEMICOLON)

        self.parser.advance_token()  # skip ;

        code = expression_code  # after this, we point to next available cell
        code += "<"  # point to value to return
        code += get_move_to_return_value_cell_code(self.return_value_cell, self.current_stack_pointer())

        return code

    # statements
    def compile_expression_as_statement(self):
        # this expression can be used as a statement.
        # e.g: x+=5;  or  x++ or ++x;

        assert self.parser.current_token().type in [
            Token.ID,
            Token.INCREMENT,
            Token.DECREMENT,
            Token.UNARY_MULTIPLICATIVE,
        ]

        code = self.compile_expression()
        self.parser.check_current_token_is(Token.SEMICOLON)
        self.parser.advance_token()  # skip ;

        code += "<"  # discard the expression's value

        return code

    def compile_print_string(self):
        # print(string);
        self.parser.check_next_tokens_are([Token.LPAREN, Token.STRING, Token.RPAREN, Token.SEMICOLON])
        self.parser.advance_token(amount=2)  # skip print (
        string_to_print = self.parser.current_token().data
        self.parser.advance_token(amount=3)  # skip string ) ;

        code = get_print_string_code(string_to_print)
        return code

    def compile_function_call_statement(self):
        # compile statement: function_call SEMICOLON
        function_call_node = self.function_call()
        function_call_code = function_call_node.get_code(current_pointer=self.current_stack_pointer())

        self.parser.check_current_token_is(Token.SEMICOLON)
        self.parser.advance_token()  # skip ;

        code = function_call_code  # at this point, we point to one after the return value
        code += "<"  # discard return value
        return code

    def compile_if(self):
        # if (expression) statement (else statement)?   note - statement can be scope { }

        self.parser.check_next_token_is(Token.LPAREN)
        self.parser.advance_token(amount=2)  # skip to after LPAREN

        expression_code = self.compile_expression()
        self.parser.check_current_token_is(Token.RPAREN)
        self.parser.advance_token()  # point to after RPAREN

        # if ... (else ...)?
        # need to use 2 temp cells
        # expression, execute_else

        self.increase_stack_pointer(amount=2)
        inside_if_code = self.compile_statement()

        have_else = self.parser.current_token().type == Token.ELSE
        if have_else:
            self.parser.advance_token()  # skip the 'else'
            inside_else_code = self.compile_statement()
        self.decrease_stack_pointer(amount=2)

        code = expression_code  # evaluate expression. after this we point to "execute_else" cell
        if have_else:
            code += "[-]+"  # execute_else = 1
        code += "<"  # point to the expression
        code += "["  # if it is non-zero
        code += ">"  # point to execute_else
        if have_else:
            code += "-"  # execute_else = 0
        code += ">"  # point to next available cell
        code += inside_if_code  # after this we point to the same cell (one after execute_else)
        code += "<<"  # point to expression
        code += "[-]"  # expression = 0
        code += "]"  # end if
        # now we point to next available cell (what used to be expression_code)

        if have_else:
            code += ">"  # point to execute_else
            code += "["  # if it is non-zero
            code += ">"  # point to next available cell
            code += inside_else_code  # after this we point to the same cell (one after execute_else)
            code += "<"  # point to execute_else
            code += "-"  # execute_else = 0
            code += "]"  # end if
            code += "<"  # point to next available cell (what used to be expression_code)

        return code

    def compile_while(self):  # while (expression) statement       note - statement can be scope { }
        self.parser.check_next_token_is(Token.LPAREN)
        self.parser.advance_token(amount=2)  # skip to after LPAREN

        expression_code = self.compile_expression()

        self.parser.check_current_token_is(Token.RPAREN)
        self.parser.advance_token()  # point to after RPAREN

        inner_scope_code = self.compile_statement()

        code = expression_code  # evaluate expression
        code += "<"  # point to the expression
        code += "["  # if it is 0, jump to after the <while> scope
        code += inner_scope_code  # <while> scope code. after this code, pointer points to the next available cell. i.e one after the expression
        code += expression_code  # re-evaluate the expression
        code += "<"  # point to the expression
        code += "]"  # after <while> scope

        return code

    def compile_do_while(self):  # do statement while (expression) semicolon      note - statement can be scope { }
        self.parser.check_current_token_is(Token.DO)
        self.parser.advance_token()

        inner_scope_code = self.compile_statement()

        self.parser.check_current_tokens_are([Token.WHILE, Token.LPAREN])
        self.parser.advance_token(amount=2)  # point to after LPAREN

        expression_code = self.compile_expression()

        self.parser.check_current_tokens_are([Token.RPAREN, Token.SEMICOLON])
        self.parser.advance_token(amount=2)  # point to after SEMICOLON

        code = "[-]+"  # set expression to 1. since do while loops executes the scope code first.
        code += "["  # go in scope
        code += inner_scope_code  # <do-while> scope code. after this code, pointer points to the same cell. i.e the expression
        code += expression_code  # evaluate the expression, after this code, the pointer is pointing to the next cell
        code += "<"  # point to the expression
        code += "]"  # after <do-while> scope

        return code

    def compile_switch(self):  # switch (expression) { ((default | case literal): statements* break;? statements*)* }
        self.parser.check_current_tokens_are([Token.SWITCH, Token.LPAREN])
        self.parser.advance_token(amount=2)  # point to after LPAREN

        self.increase_stack_pointer()  # use 1 temp cell before evaluating the expression
        expression_code = self.compile_expression()
        self.parser.check_current_tokens_are([Token.RPAREN, Token.LBRACE])
        self.parser.advance_token(amount=2)  # point to after LBRACE

        self.increase_stack_pointer()  # use 1 additional temp cell for indicating we need to execute a case
        cases = list()  # list of tuples: (value/"default" (int or string), case_code (string), has_break(bool))

        while self.parser.current_token().type in [
            Token.CASE,
            Token.DEFAULT,
        ]:  # (default | CASE literal) COLON statement* break;? statements*
            if self.parser.current_token().type == Token.CASE:
                self.parser.advance_token()  # skip CASE
                constant_value_token = self.parser.current_token()
                if not is_token_literal(constant_value_token):
                    raise BFSemanticError("Switch case value is not a literal. Token is %s" % constant_value_token)

                value = get_literal_token_value(constant_value_token)
                if value in [case for (case, _, _) in cases]:
                    raise BFSemanticError("Case %d already exists. Token is %s" % (value, constant_value_token))
            else:
                assert self.parser.current_token().type == Token.DEFAULT
                value = "default"
                if value in [case for (case, _, _) in cases]:
                    raise BFSemanticError("default case %s already exists." % self.parser.current_token())

            self.parser.check_next_token_is(Token.COLON)
            self.parser.advance_token(amount=2)  # point to after COLON

            inner_case_code = ""
            while self.parser.current_token().type not in [Token.CASE, Token.DEFAULT, Token.RBRACE, Token.BREAK]:
                inner_case_code += self.compile_statement(
                    allow_declaration=False
                )  # not allowed to declare variables directly inside case

            has_break = False
            if self.parser.current_token().type == Token.BREAK:  # ignore all statements after break
                self.parser.check_next_token_is(Token.SEMICOLON)
                self.parser.advance_token(amount=2)  # skip break SEMICOLON
                has_break = True
                while self.parser.current_token().type not in [Token.CASE, Token.DEFAULT, Token.RBRACE]:
                    self.compile_statement()  # advance the parser and discard the code
            cases.append((value, inner_case_code, has_break))

        if self.parser.current_token().type not in [Token.CASE, Token.DEFAULT, Token.RBRACE]:
            raise BFSyntaxError(
                "Expected case / default / RBRACE (}) instead of token %s" % self.parser.current_token()
            )
        self.parser.check_current_token_is(Token.RBRACE)
        self.parser.advance_token()
        self.decrease_stack_pointer(amount=2)

        return process_switch_cases(expression_code, cases)

    def compile_break(self):
        # TODO: Make the break statement in scopes inside switch-case (including if/else), and for/do/while
        raise NotImplementedError(
            "Break statement found outside of switch case first scope.\nBreak is not currently implemented for while/for/do statements.\nToken is %s"
            % self.parser.current_token()
        )

    def compile_for(self):
        # for (statement expression; expression) inner_scope_code   note: statement contains ;, and inner_scope_code can be scope { }
        # (the statement/second expression/inner_scope_code can be empty)
        # (the statement cannot contain scope - { and } )

        """
        <for> is a special case of scope
        the initial code (int i = 0;) is executed INSIDE the scope, but BEFORE the LBRACE
        so we manually compile the scope instead of using self.compile_scope():

        we first create an ids map, and in the case that there is a variable definition inside the <for> definition:
        we manually insert the ID into the ids map, and move the pointer to the right once, to make room for it
        (this needs to be done before the <for> definition's statement)
        next, inside the for's scope {}:
        after calling insert_scope_variables_into_ids_map, we move the pointer to the left once, since it counts the ID we entered manually as well
        after calling exit_scope, we move the pointer to the right, since it counts the ID we entered manually, and we don't want it to be discarded after every iteration
        finally, at the end of the <for> loop, we move the pointer once to the left, to discard the variable we defined manually
        """

        self.parser.check_current_tokens_are([Token.FOR, Token.LPAREN])
        self.parser.advance_token(amount=2)  # skip for (

        manually_inserted_variable_in_for_definition = False
        variable = None
        code = ""

        # =============== enter FOR scope ===============
        self.add_ids_map()
        # ===============================================

        if self.parser.current_token().type == Token.INT:
            # we are defining a variable inside the for statement definition (for (int i = 0....))
            variable = create_variable_from_definition(self.parser, advance_tokens=False)
            self.insert_to_ids_map(variable)
            manually_inserted_variable_in_for_definition = True
            code += ">" * get_variable_size(variable)

            show_side_effect_warning = self.parser.next_token(2).type != Token.ASSIGN
            if self.parser.next_token(2).type == Token.LBRACK:
                show_side_effect_warning = self.get_token_after_array_access(offset=1).type != Token.ASSIGN

            if show_side_effect_warning:
                print(
                    "[Warning] For loop variable '%s' isn't assigned to anything and may cause side effects"
                    % self.parser.next_token()
                )

        if self.parser.current_token().type == Token.LBRACE:  # statement is a scope
            raise BFSyntaxError("Unexpected scope inside for loop statement - %s" % self.parser.current_token())
        initial_statement = self.compile_statement()

        condition_expression = self.compile_expression()
        self.parser.check_current_token_is(Token.SEMICOLON)
        self.parser.advance_token()  # skip ;

        if self.parser.current_token().type == Token.RPAREN:
            modification_expression = ""  # no modification expression
        else:
            modification_expression = self.compile_expression()
            modification_expression += "<"  # discard expression value
        self.parser.check_current_token_is(Token.RPAREN)
        self.parser.advance_token()  # skip )

        inner_scope_code = ""
        if self.parser.current_token().type == Token.LBRACE:  # do we have {} as for's statement?
            # compiling <for> scope inside { }:
            if manually_inserted_variable_in_for_definition:
                inner_scope_code += "<" * get_variable_size(variable)
            inner_scope_code += self.insert_scope_variables_into_ids_map()
            inner_scope_code += self.compile_scope_statements()
        else:
            inner_scope_code += self.compile_statement()
        # =============== exit FOR scope ===============
        inner_scope_code += self.exit_scope()
        if manually_inserted_variable_in_for_definition:
            inner_scope_code += ">" * get_variable_size(variable)
        # ==============================================

        code += initial_statement
        code += condition_expression  # evaluate expression
        code += "<"  # point to the expression
        code += "["  # if it is 0, jump to after the <for> scope
        code += inner_scope_code  # <for> scope code
        code += modification_expression
        code += condition_expression  # re-evaluate the expression
        code += "<"  # point to the expression
        code += "]"  # after <for> scope

        if manually_inserted_variable_in_for_definition:
            code += "<" * get_variable_size(variable)

        return code

    def compile_statement(self, allow_declaration=True):
        # returns code that performs the current statement
        # at the end, the pointer points to the same location it pointed before the statement was executed

        token = self.parser.current_token()
        if token.type == Token.INT:  # INT ID ((= EXPRESSION) | ([NUM])+ (= ARRAY_INITIALIZATION)?)? SEMICOLON
            if not allow_declaration:
                raise BFSemanticError(
                    "Cannot define variable (%s) directly inside case. "
                    "Can define inside new scope {} or outside the switch statement" % token
                )
            return self.compile_variable_declaration()

        elif token.type in [Token.INCREMENT, Token.DECREMENT, Token.UNARY_MULTIPLICATIVE]:  # ++ID;
            return self.compile_expression_as_statement()

        elif token.type == Token.ID:
            if self.parser.next_token().type in [
                Token.ASSIGN,
                Token.LBRACK,
                Token.INCREMENT,
                Token.DECREMENT,
                Token.UNARY_MULTIPLICATIVE,
            ]:
                # ID ASSIGN expression; or ID([expression])+ ASSIGN expression; or ID++;
                return self.compile_expression_as_statement()
            elif self.parser.next_token().type == Token.LPAREN:  # ID(...);  (function call)
                return self.compile_function_call_statement()
            raise BFSyntaxError(
                "Unexpected '%s' after '%s'. Expected '=|+=|-=|*=|/=|%%=|<<=|>>=|&=|(|=)|^=' (assignment), '++|--' (modification) or '(' (function call)"
                % (str(self.parser.next_token()), str(token))
            )

        elif token.type == Token.PRINT:
            return self.compile_print_string()

        elif token.type == Token.IF:
            return self.compile_if()

        elif token.type == Token.LBRACE:
            return self.compile_scope()

        elif token.type == Token.WHILE:
            return self.compile_while()

        elif token.type == Token.DO:
            return self.compile_do_while()

        elif token.type == Token.SWITCH:
            return self.compile_switch()

        elif token.type == Token.BREAK:
            return self.compile_break()

        elif token.type == Token.RETURN:
            return self.compile_return()

        elif token.type == Token.FOR:
            return self.compile_for()

        elif token.type == Token.SEMICOLON:
            # empty statement
            self.parser.advance_token()  # skip ;
            return ""

        elif token.type in [Token.CASE, Token.DEFAULT]:
            raise BFSyntaxError("%s not inside a switch statement" % token)

        raise BFSyntaxError("Invalid statement at " + str(token))

    def compile_scope_statements(self):
        tokens = self.tokens

        code = ""
        while self.parser.current_token() is not None:
            if self.parser.current_token().type == Token.RBRACE:
                # we reached the end of our scope
                self.parser.advance_token()  # skip RBRACE
                return code
            else:
                code += self.compile_statement()

        # should never get here
        raise BFSyntaxError("expected } after the last token in scope " + str(tokens[-1]))

    def compile_scope(self):
        assert self.parser.current_token().type == Token.LBRACE

        code = self.enter_scope()
        code += self.compile_scope_statements()
        code += self.exit_scope()

        return code

    def compile_function_scope(self, parameters):
        # returns code for the current function
        # parameters is a list of parameters, in the order of their declaration
        # will be inserted into the new scope prior to the scope's compilation

        """
        example layout:
            int global_var1;
            int global_var2;
            int foo(int a, int b) {
                int x;
                int y;
                return 5;
            }

            int main() {
                int n;
                foo(1, 2);
            }

            global_var1 global_var2 main_return_value n foo_return_value a=1 b=2 x y

            calling convention:
            caller responsibility: make room for return_value (and zero its cell), place parameters, point to return_value cell
            callee responsibility: put return value in return_value cell and point to it (thus "cleaning" parameters)
                can assume that there is a zeroed cell at current_stack_pointer (return_value_cell) (therefore ids_map starts at index current_stack_pointer+1)
                can assume that the next cells match your parameters
                assumes that initially, the pointer points to the first cell (return_value_cell).
                therefore begin with '>' * (1 + parameters + scope variables)
        """

        assert self.parser.current_token().type == Token.LBRACE

        code = self.enter_function_scope(parameters)
        code += self.compile_scope_statements()
        code += self.exit_scope()
        code += "<"  # point to return_value_cell

        return code