mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-19 12:58:07 +00:00
1247 lines
52 KiB
Python
1247 lines
52 KiB
Python
from collections import namedtuple
|
|
from functools import reduce
|
|
|
|
from .Exceptions import BFSemanticError, BFSyntaxError
|
|
from .Functions import check_function_exists, get_function_object
|
|
from .General import (
|
|
get_literal_token_value,
|
|
get_move_to_return_value_cell_code,
|
|
get_print_string_code,
|
|
get_variable_dimensions_from_token,
|
|
get_variable_from_ID_token,
|
|
is_token_literal,
|
|
process_switch_cases,
|
|
)
|
|
from .Globals import create_variable_from_definition, get_global_variables, get_variable_size, is_variable_array
|
|
from .Node import (
|
|
NodeArrayAssignment,
|
|
NodeArrayGetElement,
|
|
NodeArraySetElement,
|
|
NodeFunctionCall,
|
|
NodeTernary,
|
|
NodeToken,
|
|
NodeUnaryPostfix,
|
|
NodeUnaryPrefix,
|
|
)
|
|
from .Parser import Parser
|
|
from .Token import Token
|
|
|
|
"""
|
|
This file implements the FunctionCompiler object
|
|
This is where we actually compile the code - statements, assignments, calculations, etc
|
|
The syntax of the language is defined here as compilation rules
|
|
|
|
A function is position-dependent - it needs to know where on the tape it runs
|
|
So that it can access global variables, which are at the beginning of the stack, correctly
|
|
Because of that, the function's code is dependent on when/where we call it
|
|
The idea is that we compile the function on demand - every time it is called
|
|
And every time we compile it - we pass the current stack pointer to it
|
|
This is implemented in the get_code() function
|
|
|
|
The FunctionCompiler object holds tokens that correspond to the function so that we can compile it on demand
|
|
"""
|
|
|
|
|
|
class FunctionCompiler:
|
|
def __init__(self, name, tokens):
|
|
self.name = name
|
|
self.tokens = tokens
|
|
self.parser = Parser(self.tokens)
|
|
self.ids_map_list = list()
|
|
self.type = None
|
|
self.parameters = None
|
|
self.process_function_definition() # sets type and parameters
|
|
self.return_value_cell = None # will be set on every call to this function
|
|
|
|
"""
|
|
ids_map_list is a list of named tuples. Each tuple represents a scope, and holds 2 items:
|
|
1. an index of the next available cell. (if we want to insert a new ID to the ids_map_list, it will be in that index)
|
|
2. a dictionary that maps an ID (string) to an index - the cell where we hold that variable
|
|
|
|
We use this list as a stack:
|
|
when entering a scope, we insert a (available_cell, dictionary) to the BEGINNING of the list
|
|
when exiting a scope, we pop the last inserted tuple (the one at the BEGINNING of the list)
|
|
|
|
When declaring a variable in the current scope, we add it to the dictionary at the beginning of the list,
|
|
and increase the 'next_available_cell' by 1
|
|
When retrieving a variable, we go through the list and return the first occurrence that matches the ID
|
|
"""
|
|
|
|
def process_function_definition(self):
|
|
# sets function type and parameters, advances parser
|
|
|
|
function_return_type = self.parser.current_token()
|
|
self.parser.advance_token() # skip return type
|
|
function_name = self.parser.current_token().data
|
|
assert function_name == self.name
|
|
self.parser.advance_token() # skip ID
|
|
parameters = self.get_function_parameters_declaration()
|
|
# parser now points to LBRACE = beginning of function scope
|
|
|
|
self.type = function_return_type
|
|
self.parameters = parameters
|
|
|
|
def get_code(self, current_stack_pointer):
|
|
"""
|
|
layout:
|
|
current_stack_pointer -------
|
|
|
|
|
v
|
|
global1 global2 unknown1 unknown2 my_return_value param1 param2 local1 local2
|
|
|
|
current_stack_pointer is current next available cell
|
|
which is the value of the caller's current_stack_pointer plus this function's parameters
|
|
create ids map for global variables
|
|
make room for return_value
|
|
"""
|
|
self.insert_global_variables_to_function_scope()
|
|
|
|
# self.current_stack_pointer is now equal to the size of the global variables plus 1 (next_available_cell)
|
|
# new stack pointer should be at least that size
|
|
assert self.current_stack_pointer() <= current_stack_pointer
|
|
self.return_value_cell = current_stack_pointer
|
|
self.set_stack_pointer(
|
|
current_stack_pointer + 1
|
|
) # make room for return_value cell. next available cell is the next one after it.
|
|
function_code = self.compile_function_scope(self.parameters)
|
|
self.remove_ids_map() # Global variables
|
|
return function_code
|
|
|
|
# =================
|
|
# helper functions
|
|
# =================
|
|
def insert_global_variables_to_function_scope(self):
|
|
self.add_ids_map()
|
|
for variable in get_global_variables():
|
|
self.insert_to_ids_map(variable)
|
|
|
|
def get_array_index_expression(self):
|
|
"""
|
|
the idea - address the multi-dimensional array as a one-dimensional array
|
|
calculate the appropriate index in the one-dimensional array
|
|
by multiplying the index in each dimension by its size (i.e the multiplication of all sizes of the following dimensions)
|
|
and then using the NodeArrayGetElement/NodeArraySetElement class which gets an element in a one-dimensional array
|
|
|
|
in order to do that, we need to create our own sub-tree of multiplications,
|
|
and pass it as the "index expression"
|
|
|
|
e.g if the array is: arr[10][5][2] and we want to get arr[4][3][1]
|
|
then we want to calculate index = (4*(5*2) + 3*(2) + 1)
|
|
"""
|
|
ID_token = self.parser.current_token()
|
|
self.parser.advance_token(2) # skip ID, LBRACK
|
|
first_index_expression = index_expression = self.expression() # first dimension
|
|
self.parser.check_current_token_is(Token.RBRACK)
|
|
self.parser.advance_token() # skip RBRACK
|
|
|
|
# now handle the next dimensions (if multi-dimensional array)
|
|
dimensions = get_variable_dimensions_from_token(self.ids_map_list, ID_token)
|
|
if len(dimensions) > 1:
|
|
multiply_token = Token(Token.BINOP, ID_token.line, ID_token.column, data="*")
|
|
add_token = Token(Token.BINOP, ID_token.line, ID_token.column, data="+")
|
|
|
|
# multiply by next dimensions sizes
|
|
multiply_amount = reduce(lambda x, y: x * y, dimensions[1:]) # size of the following dimensions
|
|
node_token_multiply_amount = NodeToken(
|
|
self.ids_map_list, token=Token(Token.NUM, ID_token.line, ID_token.column, data=str(multiply_amount))
|
|
)
|
|
index_expression = NodeToken(
|
|
self.ids_map_list, token=multiply_token, left=first_index_expression, right=node_token_multiply_amount
|
|
)
|
|
|
|
# handle next dimensions
|
|
dimension = 1
|
|
while dimension < len(dimensions):
|
|
if self.parser.current_token().type != Token.LBRACK: # too few indexes given...
|
|
if dimension == 1:
|
|
return first_index_expression # allow use of only one dimension for multi-dimensional array
|
|
raise BFSemanticError(
|
|
"%s is a %s-dimensional array, but only %s dimension(s) given as index"
|
|
% (str(ID_token), len(dimensions), dimension)
|
|
)
|
|
self.parser.check_current_token_is(Token.LBRACK)
|
|
self.parser.advance_token() # skip LBRACK
|
|
exp = self.expression()
|
|
|
|
self.parser.check_current_token_is(Token.RBRACK)
|
|
self.parser.advance_token() # skip RBRACK
|
|
|
|
# current_dimension_index *= size_of_following_dimensions
|
|
if dimension + 1 < len(dimensions): # not last dimension - need to multiply and add
|
|
multiply_amount = reduce(
|
|
lambda x, y: x * y, dimensions[dimension + 1 :]
|
|
) # size of the following dimensions
|
|
node_token_multiply_amount = NodeToken(
|
|
self.ids_map_list,
|
|
token=Token(Token.NUM, ID_token.line, ID_token.column, data=str(multiply_amount)),
|
|
)
|
|
multiply_node = NodeToken(
|
|
self.ids_map_list, token=multiply_token, left=exp, right=node_token_multiply_amount
|
|
)
|
|
|
|
# prev_dimensions_index += current_dimension_index
|
|
index_expression = NodeToken(
|
|
self.ids_map_list, token=add_token, left=index_expression, right=multiply_node
|
|
)
|
|
else: # last dimension - no need to multiply, just add
|
|
index_expression = NodeToken(self.ids_map_list, token=add_token, left=index_expression, right=exp)
|
|
dimension += 1
|
|
|
|
if self.parser.current_token().type == Token.LBRACK: # too many indexes given...
|
|
raise BFSemanticError(
|
|
"%s is a %s-dimensional array. Unexpected %s"
|
|
% (str(ID_token), len(dimensions), self.parser.current_token())
|
|
)
|
|
return index_expression
|
|
|
|
def get_token_after_array_access(self, offset=0):
|
|
# in case we have: "ID[a][b][c]...[z] next_token", return "next_token"
|
|
idx = self.parser.current_token_index + offset
|
|
self.parser.check_next_tokens_are([Token.ID, Token.LBRACK], starting_index=idx - 1)
|
|
idx += 1 # point to LBRACK
|
|
while self.parser.token_at_index(idx).type == Token.LBRACK:
|
|
idx = self.parser.find_matching(idx) # point to RBRACK
|
|
idx += 1 # advance to one after the RBRACK
|
|
|
|
return self.parser.token_at_index(idx)
|
|
|
|
def compile_array_assignment(self, token_id):
|
|
# int id[a][b][c]... = {1, 2, 3, ...};
|
|
# or int id[a][b][c]... = "\1\2\3...";
|
|
# or int id[a][b][c]... = {{1, 2}, {3, 4}, ...};
|
|
# or array assignment: id = {1, 2, 3, ...};
|
|
self.parser.check_current_token_is(Token.ASSIGN)
|
|
if self.parser.current_token().data != "=":
|
|
raise BFSyntaxError("Unexpected %s when assigning array. Expected ASSIGN (=)" % self.parser.current_token())
|
|
|
|
if self.parser.next_token().type not in [Token.LBRACE, Token.STRING]:
|
|
raise BFSyntaxError("Expected LBRACE or STRING at '%s'" % self.parser.next_token())
|
|
|
|
self.parser.advance_token() # skip to LBRACE or STRING
|
|
literal_tokens_list = self.parser.compile_array_initialization_list()
|
|
|
|
return NodeArrayAssignment(self.ids_map_list, token_id, literal_tokens_list)
|
|
|
|
def compile_variable_declaration(self):
|
|
self.parser.check_next_token_is(Token.ID)
|
|
self.parser.advance_token() # skip "INT" (now points to ID)
|
|
assert self.parser.current_token().type == Token.ID
|
|
|
|
if self.parser.next_token().type == Token.SEMICOLON: # INT ID SEMICOLON
|
|
self.parser.advance_token(2) # skip ID SEMICOLON
|
|
return (
|
|
"" # no code is generated here. code was generated for defining this variable when we entered the scope
|
|
)
|
|
|
|
elif (
|
|
self.parser.next_token().type == Token.ASSIGN and self.parser.next_token().data == "="
|
|
): # INT ID = EXPRESSION SEMICOLON
|
|
return self.compile_expression_as_statement() # compile_expression_as_statement skips the SEMICOLON
|
|
|
|
elif (
|
|
self.parser.next_token().type == Token.LBRACK
|
|
): # INT ID (LBRACK NUM RBRACK)+ (= ARRAY_INITIALIZATION)? SEMICOLON
|
|
# array definition (int arr[2][3]...[];) or array definition and initialization (arr[2][3]...[] = {...};)
|
|
token_id = self.parser.current_token()
|
|
self.parser.advance_token() # skip ID
|
|
while self.parser.current_token().type == Token.LBRACK: # loop to skip to after last RBRACK ]
|
|
self.parser.check_current_tokens_are([Token.LBRACK, Token.NUM, Token.RBRACK])
|
|
self.parser.advance_token(3) # skip LBRACK, NUM, RBRACK
|
|
|
|
if self.parser.current_token().type == Token.ASSIGN: # initialization
|
|
initialization_node = self.compile_array_assignment(token_id)
|
|
code = initialization_node.get_code(self.current_stack_pointer()) + "<" # discard expression value
|
|
else:
|
|
code = "" # just array definition
|
|
# no code is generated here. code was generated for defining this variable when we entered the scope
|
|
self.parser.check_current_token_is(Token.SEMICOLON)
|
|
self.parser.advance_token() # skip SEMICOLON
|
|
return code
|
|
else:
|
|
raise BFSyntaxError("Unexpected %s after %s" % (self.parser.next_token(), self.parser.current_token()))
|
|
|
|
def add_ids_map(self):
|
|
"""
|
|
the first cells are global variable cells (index 0 to n)
|
|
the next cell (index n+1) is the return_value cell
|
|
every function assumes that these cells exist
|
|
"""
|
|
|
|
next_available_cell = 0 if len(self.ids_map_list) == 0 else self.ids_map_list[0].next_available_cell
|
|
|
|
ids_map = namedtuple("ids_map", ["next_available_cell", "IDs_dict"])
|
|
ids_map.next_available_cell = next_available_cell
|
|
ids_map.IDs_dict = dict()
|
|
|
|
self.ids_map_list.insert(0, ids_map)
|
|
|
|
def remove_ids_map(self):
|
|
self.ids_map_list.pop(0)
|
|
|
|
def insert_to_ids_map(self, variable):
|
|
ids_map = self.ids_map_list[0]
|
|
|
|
self.check_id_doesnt_exist(variable.name)
|
|
|
|
variable.cell_index = ids_map.next_available_cell
|
|
ids_map.next_available_cell += get_variable_size(variable)
|
|
ids_map.IDs_dict[variable.name] = variable
|
|
|
|
def reserve_cell_in_ids_map(self):
|
|
"""
|
|
reserve cell by increasing the "pointer" of the next available cell
|
|
this is used for making room for return_value cell
|
|
"""
|
|
ids_map = self.ids_map_list[0]
|
|
ids_map.next_available_cell += 1
|
|
|
|
def variables_dict_size(self, variables_dict_index):
|
|
variables_dict = self.ids_map_list[variables_dict_index].IDs_dict
|
|
|
|
size = 0
|
|
for variable in variables_dict.values():
|
|
size += get_variable_size(variable)
|
|
|
|
return size
|
|
|
|
def size_of_variables_current_scope(self):
|
|
return self.variables_dict_size(0)
|
|
|
|
def size_of_global_variables(self):
|
|
return self.variables_dict_size(-1)
|
|
|
|
def increase_stack_pointer(self, amount=1):
|
|
# sometimes it is needed to increase the stack pointer
|
|
# for example, when compiling "if ... else ...", we need 2 temporary cells before the inner scope code of both the if and the else
|
|
# another example - when evaluating expression list in function call, each expression is evaluated while pointing to a different cell
|
|
# therefore, it is needed to "update" the stack pointer to represent the new pointer
|
|
self.ids_map_list[0].next_available_cell += amount
|
|
|
|
def decrease_stack_pointer(self, amount=1):
|
|
self.ids_map_list[0].next_available_cell -= amount
|
|
|
|
def set_stack_pointer(self, new_value):
|
|
assert new_value >= self.ids_map_list[0].next_available_cell
|
|
self.ids_map_list[0].next_available_cell = new_value
|
|
|
|
def current_stack_pointer(self):
|
|
return self.ids_map_list[0].next_available_cell
|
|
|
|
def insert_scope_variables_into_ids_map(self):
|
|
# go through all the variable definitions in this scope (not including sub-scopes), and add them to the ids map
|
|
# move the pointer to the next available cell (the one after the last variable declared in this scope)
|
|
|
|
assert self.parser.current_token().type == Token.LBRACE
|
|
self.parser.advance_token()
|
|
|
|
i = self.parser.current_token_index
|
|
while i < len(self.tokens):
|
|
token = self.tokens[i]
|
|
|
|
if token.type == Token.INT:
|
|
if (
|
|
self.tokens[i - 2].type != Token.FOR
|
|
): # if it is not a definition inside a FOR statement (for (int i = 0...))
|
|
variable = create_variable_from_definition(self.parser, index=i)
|
|
self.insert_to_ids_map(variable)
|
|
|
|
elif token.type == Token.LBRACE:
|
|
i = self.parser.find_matching(starting_index=i)
|
|
|
|
elif token.type == Token.RBRACE:
|
|
break # we have reached the end of the scope
|
|
|
|
i += 1
|
|
|
|
return ">" * self.size_of_variables_current_scope() # advance pointer to the next available cell
|
|
|
|
def enter_scope(self):
|
|
# create an ids map to the current scope, and then inserts the scope variables into it
|
|
self.add_ids_map()
|
|
return self.insert_scope_variables_into_ids_map()
|
|
|
|
def exit_scope(self):
|
|
# remove the ids map of the current scope
|
|
# return pointer to the previous scope's next available cell
|
|
code = "<" * self.size_of_variables_current_scope()
|
|
self.remove_ids_map()
|
|
return code
|
|
|
|
def enter_function_scope(self, parameters):
|
|
# make room for return_value cell
|
|
# create an ids map to the current function scope
|
|
# insert parameters into the ids map
|
|
# insert scope variables into the ids map
|
|
|
|
self.add_ids_map()
|
|
for parameter in parameters:
|
|
self.insert_to_ids_map(parameter)
|
|
|
|
code = ">" # skip return_value_cell
|
|
code += self.insert_scope_variables_into_ids_map()
|
|
# this inserts scope variables AND moves pointer right, with the amount of BOTH parameters and scope variables
|
|
|
|
return code
|
|
|
|
def check_id_doesnt_exist(self, ID):
|
|
# make sure that the id does not exist in the current scope
|
|
# used when defining a variable
|
|
if ID in self.ids_map_list[0].IDs_dict:
|
|
raise BFSemanticError("ID %s is already defined" % ID)
|
|
|
|
# =================
|
|
# compilation rules
|
|
# =================
|
|
|
|
# expression
|
|
def function_call(self):
|
|
# function_call: ID LPAREN expression_list RPAREN
|
|
# returns NodeFunctionCall
|
|
assert self.parser.current_token().type == Token.ID
|
|
|
|
function_token = self.parser.current_token()
|
|
function_name = function_token.data
|
|
self.parser.advance_token() # skip ID
|
|
|
|
if function_name == self.name:
|
|
raise BFSemanticError("No support for recursion yet :(.... in function call '%s'" % str(function_token))
|
|
|
|
parameters = self.compile_expression_list()
|
|
|
|
check_function_exists(function_token, len(parameters))
|
|
function_to_call = get_function_object(function_name)
|
|
|
|
return NodeFunctionCall(self.ids_map_list, function_to_call, parameters)
|
|
|
|
def literal(self):
|
|
# literal: NUM | CHAR | ID | ID (LBRACK expression RBRACK)+ | TRUE | FALSE | function_call | ( expression )
|
|
|
|
token = self.parser.current_token()
|
|
|
|
if token.type == Token.ID and self.parser.next_token().type == Token.LPAREN:
|
|
return self.function_call()
|
|
|
|
if (
|
|
token.type == Token.ID and self.parser.next_token().type == Token.LBRACK
|
|
): # array - ID(LBRACK expression RBRACK)+
|
|
index_expression = self.get_array_index_expression()
|
|
return NodeArrayGetElement(self.ids_map_list, token, index_expression)
|
|
|
|
if is_token_literal(token) or token.type == Token.ID:
|
|
self.parser.advance_token()
|
|
return NodeToken(self.ids_map_list, token=token)
|
|
|
|
if token.type != Token.LPAREN:
|
|
raise BFSyntaxError(
|
|
"Unexpected '%s'. expected literal (NUM | ID | ID(LBRACK expression RBRACK)+ | TRUE | FALSE | function_call | ( expression ))"
|
|
% str(token)
|
|
)
|
|
|
|
# ( expression )
|
|
self.parser.check_current_token_is(Token.LPAREN)
|
|
self.parser.advance_token() # skip LPAREN
|
|
exp = self.expression()
|
|
self.parser.check_current_token_is(Token.RPAREN)
|
|
self.parser.advance_token() # skip RPAREN
|
|
|
|
return exp
|
|
|
|
def unary_postfix(self):
|
|
# unary_postfix: literal ( ++ | -- | UNARY_MULTIPLICATIVE)?
|
|
|
|
literal = self.literal()
|
|
token = self.parser.current_token()
|
|
|
|
if token.type in [Token.INCREMENT, Token.DECREMENT, Token.UNARY_MULTIPLICATIVE]:
|
|
self.parser.advance_token()
|
|
new_node = NodeUnaryPostfix(self.ids_map_list, operation=token, literal=literal)
|
|
return new_node
|
|
else:
|
|
return literal
|
|
|
|
def unary_prefix(self):
|
|
# unary_prefix: ( (!|+|-)* unary_prefix ) | ( ( ++ | -- | UNARY_MULTIPLICATIVE | ~ ) literal ) | unary_postfix
|
|
|
|
token = self.parser.current_token()
|
|
|
|
if token.type in [Token.NOT, Token.BITWISE_NOT, Token.BINOP]:
|
|
if token.type == Token.BINOP and token.data not in ["+", "-"]:
|
|
raise BFSyntaxError(
|
|
"Expected either + or - as unary prefix instead of token %s" % self.parser.current_token()
|
|
)
|
|
self.parser.advance_token()
|
|
unary_prefix = self.unary_prefix()
|
|
|
|
new_node = NodeUnaryPrefix(self.ids_map_list, operation=token, literal=unary_prefix)
|
|
return new_node
|
|
|
|
elif token.type in [Token.INCREMENT, Token.DECREMENT, Token.UNARY_MULTIPLICATIVE]:
|
|
self.parser.advance_token()
|
|
literal = self.literal()
|
|
|
|
new_node = NodeUnaryPrefix(self.ids_map_list, operation=token, literal=literal)
|
|
return new_node
|
|
|
|
else:
|
|
return self.unary_postfix()
|
|
|
|
def multiplicative(self):
|
|
# multiplicative: unary_prefix ((MUL|DIV|MOD) unary_prefix)*
|
|
|
|
n = self.unary_prefix()
|
|
|
|
token = self.parser.current_token()
|
|
while token is not None and token.type == Token.BINOP and token.data in ["*", "/", "%"]:
|
|
self.parser.advance_token()
|
|
next_factor = self.unary_prefix()
|
|
|
|
new_node = NodeToken(self.ids_map_list, token=token, left=n, right=next_factor)
|
|
n = new_node
|
|
|
|
token = self.parser.current_token()
|
|
|
|
return n
|
|
|
|
def additive(self):
|
|
# additive: multiplicative ((PLUS|MINUS) multiplicative)*
|
|
|
|
n = self.multiplicative()
|
|
|
|
token = self.parser.current_token()
|
|
while token is not None and token.type == Token.BINOP and token.data in ["+", "-"]:
|
|
self.parser.advance_token()
|
|
next_term = self.multiplicative()
|
|
|
|
new_node = NodeToken(self.ids_map_list, token=token, left=n, right=next_term)
|
|
n = new_node
|
|
|
|
token = self.parser.current_token()
|
|
|
|
return n
|
|
|
|
def shift(self):
|
|
# shift: additive (<<|>> additive)*
|
|
|
|
n = self.additive()
|
|
|
|
token = self.parser.current_token()
|
|
while token is not None and token.type == Token.BITWISE_SHIFT:
|
|
self.parser.advance_token()
|
|
next_additive = self.additive()
|
|
|
|
new_node = NodeToken(self.ids_map_list, token=token, left=n, right=next_additive)
|
|
n = new_node
|
|
|
|
token = self.parser.current_token()
|
|
|
|
return n
|
|
|
|
def relational(self):
|
|
# relational: shift (==|!=|<|>|<=|>= shift)?
|
|
|
|
a = self.shift()
|
|
|
|
token = self.parser.current_token()
|
|
if token.type != Token.RELOP: # just an arithmetic expression
|
|
return a
|
|
|
|
self.parser.advance_token()
|
|
b = self.shift()
|
|
|
|
new_node = NodeToken(self.ids_map_list, token=token, left=a, right=b)
|
|
return new_node
|
|
|
|
def bitwise_and(self):
|
|
# bitwise_and: relational (& relational)*
|
|
|
|
n = self.relational()
|
|
|
|
token = self.parser.current_token()
|
|
while token is not None and token.type == Token.BITWISE_AND:
|
|
self.parser.advance_token()
|
|
next_relational = self.relational()
|
|
|
|
new_node = NodeToken(self.ids_map_list, token=token, left=n, right=next_relational)
|
|
n = new_node
|
|
|
|
token = self.parser.current_token()
|
|
|
|
return n
|
|
|
|
def bitwise_xor(self):
|
|
# bitwise_xor: bitwise_and (| bitwise_and)*
|
|
|
|
n = self.bitwise_and()
|
|
|
|
token = self.parser.current_token()
|
|
while token is not None and token.type == Token.BITWISE_XOR:
|
|
self.parser.advance_token()
|
|
next_bitwise_and = self.bitwise_and()
|
|
|
|
new_node = NodeToken(self.ids_map_list, token=token, left=n, right=next_bitwise_and)
|
|
n = new_node
|
|
|
|
token = self.parser.current_token()
|
|
|
|
return n
|
|
|
|
def bitwise_or(self):
|
|
# bitwise_or: bitwise_xor (| bitwise_xor)*
|
|
|
|
n = self.bitwise_xor()
|
|
|
|
token = self.parser.current_token()
|
|
while token is not None and token.type == Token.BITWISE_OR:
|
|
self.parser.advance_token()
|
|
next_bitwise_xor = self.bitwise_xor()
|
|
|
|
new_node = NodeToken(self.ids_map_list, token=token, left=n, right=next_bitwise_xor)
|
|
n = new_node
|
|
|
|
token = self.parser.current_token()
|
|
|
|
return n
|
|
|
|
def logical_and(self):
|
|
# logical_and: bitwise_or (&& bitwise_or)*
|
|
|
|
n = self.bitwise_or()
|
|
|
|
token = self.parser.current_token()
|
|
while token is not None and token.type == Token.AND:
|
|
self.parser.advance_token()
|
|
next_bitwise_or = self.bitwise_or()
|
|
|
|
new_node = NodeToken(self.ids_map_list, token=token, left=n, right=next_bitwise_or)
|
|
n = new_node
|
|
|
|
token = self.parser.current_token()
|
|
|
|
return n
|
|
|
|
def logical_or(self):
|
|
# logical_or: logical_and (|| logical_and)*
|
|
|
|
n = self.logical_and()
|
|
|
|
token = self.parser.current_token()
|
|
while token is not None and token.type == Token.OR:
|
|
self.parser.advance_token()
|
|
next_and = self.logical_and()
|
|
|
|
new_node = NodeToken(self.ids_map_list, token=token, left=n, right=next_and)
|
|
n = new_node
|
|
|
|
token = self.parser.current_token()
|
|
|
|
return n
|
|
|
|
def ternary_expression(self):
|
|
# ternary_expression: logical_or (? expression : ternary_expression)?
|
|
n = self.logical_or()
|
|
if self.parser.current_token().type != Token.TERNARY:
|
|
return n
|
|
|
|
self.parser.advance_token() # skip ?
|
|
node_true = self.expression()
|
|
self.parser.check_current_token_is(Token.COLON)
|
|
self.parser.advance_token() # skip :
|
|
node_false = self.ternary_expression()
|
|
return NodeTernary(self.ids_map_list, n, node_true, node_false)
|
|
|
|
def assignment(self):
|
|
# assignment: ID ASSIGN expression | ID ASSIGN ARRAY_INITIALIZATION | ID (LBRACK expression RBRACK)+ ASSIGN expression | ternary_expression
|
|
|
|
if self.parser.current_token().type == Token.ID and self.parser.next_token().type == Token.ASSIGN:
|
|
|
|
if self.parser.next_token(2).type in [Token.LBRACE, Token.STRING]: # ID ASSIGN ARRAY_INITIALIZATION
|
|
token_ID = self.parser.current_token()
|
|
self.parser.advance_token() # skip ID
|
|
variable_ID = get_variable_from_ID_token(self.ids_map_list, token_ID)
|
|
if not is_variable_array(variable_ID):
|
|
raise BFSemanticError("Trying to assign array to non-array variable %s" % token_ID)
|
|
return self.compile_array_assignment(token_ID)
|
|
|
|
# ID ASSIGN expression
|
|
id_token = self.parser.current_token()
|
|
assign_token = self.parser.next_token()
|
|
self.parser.advance_token(amount=2) # skip ID ASSIGN
|
|
|
|
expression_node = self.expression()
|
|
|
|
new_node = NodeToken(
|
|
self.ids_map_list,
|
|
left=NodeToken(self.ids_map_list, token=id_token),
|
|
token=assign_token,
|
|
right=expression_node,
|
|
)
|
|
return new_node
|
|
|
|
elif (
|
|
self.parser.current_token().type == Token.ID
|
|
and self.parser.next_token().type == Token.LBRACK
|
|
and self.get_token_after_array_access().type == Token.ASSIGN
|
|
):
|
|
# ID (LBRACK expression RBRACK)+ ASSIGN value_expression
|
|
id_token = self.parser.current_token()
|
|
index_expression = self.get_array_index_expression()
|
|
self.parser.check_current_token_is(Token.ASSIGN)
|
|
assign_token = self.parser.current_token()
|
|
self.parser.advance_token() # skip ASSIGN
|
|
value_expression = self.expression()
|
|
|
|
return NodeArraySetElement(self.ids_map_list, id_token, index_expression, assign_token, value_expression)
|
|
else:
|
|
return self.ternary_expression()
|
|
|
|
def expression(self):
|
|
# expression: assignment
|
|
return self.assignment()
|
|
|
|
def compile_expression(self):
|
|
# parses mathematical expressions (+-*/ ())
|
|
# increments/decrements (++, --)
|
|
# relative operations (==, !=, <, >, <=, >=)
|
|
# bitwise operations (|, &, ^, ~)
|
|
# logical operations (!, &&, ||, ~)
|
|
# ternary expression (?)
|
|
# assignment (=, +=, -=, *=, /=, %=, <<=, >>=, &=, |=, ^=)
|
|
# this is implemented using a Node class that represents a parse tree
|
|
|
|
"""
|
|
(used reference: https://introcs.cs.princeton.edu/java/11precedence/)
|
|
order of operations (lowest precedence to highest precedence)
|
|
assignment (=, +=, -=, *=, /=, %=, <<=, >>=, &=, |=, ^=)
|
|
ternary_expression (?)
|
|
logical_or (||)
|
|
logical_and (&&)
|
|
bitwise_or (|)
|
|
bitwise_xor (^)
|
|
bitwise_and (&)
|
|
bitwise_not (~)
|
|
relational (==|!=|<|>|<=|>=)
|
|
shift (<<|>>)
|
|
additive (+-)
|
|
multiplicative (*/%)
|
|
unary_prefix (!, ++, --, ~)
|
|
unary_postfix (++, --)
|
|
|
|
expression: assignment
|
|
assignment: ID (=|+=|-=|*=|/=|%=|<<=|>>=|&=|(|=)|^=) expression | ternary_expression
|
|
ternary_expression: logical_or (? expression : ternary_expression)?
|
|
logical_or: logical_and (|| logical_and)*
|
|
logical_and: bitwise_or (&& bitwise_or)*
|
|
bitwise_or: bitwise_xor (| bitwise_xor)*
|
|
bitwise_xor: bitwise_and (^ bitwise_and)*
|
|
bitwise_and: relational (& relational)*
|
|
relational: shift (==|!=|<|>|<=|>= shift)?
|
|
shift: additive ((<<|>>) additive)*
|
|
additive: multiplicative ((PLUS|MINUS) multiplicative)*
|
|
multiplicative: unary_prefix ((MUL|DIV|MOD) unary_prefix)*
|
|
unary_prefix: ( (!|+|-)* unary_prefix ) | ( ( ++ | -- | ~ ) literal ) | unary_postfix
|
|
unary_postfix: literal ( ++ | -- )?
|
|
literal: NUM | CHAR | ID | ID[expression] | TRUE | FALSE | function_call | ( expression )
|
|
"""
|
|
|
|
parse_tree = self.expression()
|
|
expression_code = parse_tree.get_code(self.current_stack_pointer())
|
|
return expression_code
|
|
|
|
# functions-related
|
|
def get_function_parameters_declaration(self):
|
|
# parameters declaration: LPAREN (int ID (LBRACK NUM RBRACK)? (COMMA int ID)*)? RPAREN
|
|
# return list of parameters (named tuples (type, ID)) at the same order as declared
|
|
|
|
assert self.parser.current_token().type == Token.LPAREN
|
|
self.parser.advance_token()
|
|
|
|
res = list()
|
|
|
|
token = self.parser.current_token()
|
|
while token.type != Token.RPAREN:
|
|
if token.type != Token.INT:
|
|
raise BFSemanticError("Only int type is supported as a function parameter, and not '%s'" % str(token))
|
|
|
|
parameter = create_variable_from_definition(self.parser, advance_tokens=True)
|
|
res.append(parameter)
|
|
|
|
if self.parser.current_token().type == Token.COMMA:
|
|
self.parser.advance_token()
|
|
else:
|
|
self.parser.check_current_token_is(Token.RPAREN)
|
|
|
|
token = self.parser.current_token()
|
|
|
|
self.parser.advance_token() # skip RPAREN
|
|
return res
|
|
|
|
def compile_expression_list(self):
|
|
# expression_list: ( expression (COMMA expression)* )?
|
|
# returns a list of Nodes - one node for each expression
|
|
assert self.parser.current_token().type == Token.LPAREN
|
|
self.parser.advance_token()
|
|
|
|
expressions = list()
|
|
|
|
token = self.parser.current_token()
|
|
while token.type != Token.RPAREN:
|
|
expressions.append(self.expression())
|
|
|
|
if self.parser.current_token().type == Token.COMMA:
|
|
self.parser.advance_token()
|
|
else:
|
|
self.parser.check_current_token_is(Token.RPAREN)
|
|
token = self.parser.current_token()
|
|
|
|
self.parser.advance_token() # skip RPAREN
|
|
return expressions
|
|
|
|
def compile_return(self):
|
|
# this assumes that the return is the last statement in the function
|
|
|
|
self.parser.advance_token() # skip return
|
|
if self.parser.current_token().type == Token.SEMICOLON:
|
|
# return;
|
|
self.parser.advance_token() # skip ;
|
|
return "" # nothing to do
|
|
|
|
# return exp;
|
|
expression_code = self.compile_expression()
|
|
self.parser.check_current_token_is(Token.SEMICOLON)
|
|
|
|
self.parser.advance_token() # skip ;
|
|
|
|
code = expression_code # after this, we point to next available cell
|
|
code += "<" # point to value to return
|
|
code += get_move_to_return_value_cell_code(self.return_value_cell, self.current_stack_pointer())
|
|
|
|
return code
|
|
|
|
# statements
|
|
def compile_expression_as_statement(self):
|
|
# this expression can be used as a statement.
|
|
# e.g: x+=5; or x++ or ++x;
|
|
|
|
assert self.parser.current_token().type in [
|
|
Token.ID,
|
|
Token.INCREMENT,
|
|
Token.DECREMENT,
|
|
Token.UNARY_MULTIPLICATIVE,
|
|
]
|
|
|
|
code = self.compile_expression()
|
|
self.parser.check_current_token_is(Token.SEMICOLON)
|
|
self.parser.advance_token() # skip ;
|
|
|
|
code += "<" # discard the expression's value
|
|
|
|
return code
|
|
|
|
def compile_print_string(self):
|
|
# print(string);
|
|
self.parser.check_next_tokens_are([Token.LPAREN, Token.STRING, Token.RPAREN, Token.SEMICOLON])
|
|
self.parser.advance_token(amount=2) # skip print (
|
|
string_to_print = self.parser.current_token().data
|
|
self.parser.advance_token(amount=3) # skip string ) ;
|
|
|
|
code = get_print_string_code(string_to_print)
|
|
return code
|
|
|
|
def compile_function_call_statement(self):
|
|
# compile statement: function_call SEMICOLON
|
|
function_call_node = self.function_call()
|
|
function_call_code = function_call_node.get_code(current_pointer=self.current_stack_pointer())
|
|
|
|
self.parser.check_current_token_is(Token.SEMICOLON)
|
|
self.parser.advance_token() # skip ;
|
|
|
|
code = function_call_code # at this point, we point to one after the return value
|
|
code += "<" # discard return value
|
|
return code
|
|
|
|
def compile_if(self):
|
|
# if (expression) statement (else statement)? note - statement can be scope { }
|
|
|
|
self.parser.check_next_token_is(Token.LPAREN)
|
|
self.parser.advance_token(amount=2) # skip to after LPAREN
|
|
|
|
expression_code = self.compile_expression()
|
|
self.parser.check_current_token_is(Token.RPAREN)
|
|
self.parser.advance_token() # point to after RPAREN
|
|
|
|
# if ... (else ...)?
|
|
# need to use 2 temp cells
|
|
# expression, execute_else
|
|
|
|
self.increase_stack_pointer(amount=2)
|
|
inside_if_code = self.compile_statement()
|
|
|
|
have_else = self.parser.current_token().type == Token.ELSE
|
|
if have_else:
|
|
self.parser.advance_token() # skip the 'else'
|
|
inside_else_code = self.compile_statement()
|
|
self.decrease_stack_pointer(amount=2)
|
|
|
|
code = expression_code # evaluate expression. after this we point to "execute_else" cell
|
|
if have_else:
|
|
code += "[-]+" # execute_else = 1
|
|
code += "<" # point to the expression
|
|
code += "[" # if it is non-zero
|
|
code += ">" # point to execute_else
|
|
if have_else:
|
|
code += "-" # execute_else = 0
|
|
code += ">" # point to next available cell
|
|
code += inside_if_code # after this we point to the same cell (one after execute_else)
|
|
code += "<<" # point to expression
|
|
code += "[-]" # expression = 0
|
|
code += "]" # end if
|
|
# now we point to next available cell (what used to be expression_code)
|
|
|
|
if have_else:
|
|
code += ">" # point to execute_else
|
|
code += "[" # if it is non-zero
|
|
code += ">" # point to next available cell
|
|
code += inside_else_code # after this we point to the same cell (one after execute_else)
|
|
code += "<" # point to execute_else
|
|
code += "-" # execute_else = 0
|
|
code += "]" # end if
|
|
code += "<" # point to next available cell (what used to be expression_code)
|
|
|
|
return code
|
|
|
|
def compile_while(self): # while (expression) statement note - statement can be scope { }
|
|
self.parser.check_next_token_is(Token.LPAREN)
|
|
self.parser.advance_token(amount=2) # skip to after LPAREN
|
|
|
|
expression_code = self.compile_expression()
|
|
|
|
self.parser.check_current_token_is(Token.RPAREN)
|
|
self.parser.advance_token() # point to after RPAREN
|
|
|
|
inner_scope_code = self.compile_statement()
|
|
|
|
code = expression_code # evaluate expression
|
|
code += "<" # point to the expression
|
|
code += "[" # if it is 0, jump to after the <while> scope
|
|
code += inner_scope_code # <while> scope code. after this code, pointer points to the next available cell. i.e one after the expression
|
|
code += expression_code # re-evaluate the expression
|
|
code += "<" # point to the expression
|
|
code += "]" # after <while> scope
|
|
|
|
return code
|
|
|
|
def compile_do_while(self): # do statement while (expression) semicolon note - statement can be scope { }
|
|
self.parser.check_current_token_is(Token.DO)
|
|
self.parser.advance_token()
|
|
|
|
inner_scope_code = self.compile_statement()
|
|
|
|
self.parser.check_current_tokens_are([Token.WHILE, Token.LPAREN])
|
|
self.parser.advance_token(amount=2) # point to after LPAREN
|
|
|
|
expression_code = self.compile_expression()
|
|
|
|
self.parser.check_current_tokens_are([Token.RPAREN, Token.SEMICOLON])
|
|
self.parser.advance_token(amount=2) # point to after SEMICOLON
|
|
|
|
code = "[-]+" # set expression to 1. since do while loops executes the scope code first.
|
|
code += "[" # go in scope
|
|
code += inner_scope_code # <do-while> scope code. after this code, pointer points to the same cell. i.e the expression
|
|
code += expression_code # evaluate the expression, after this code, the pointer is pointing to the next cell
|
|
code += "<" # point to the expression
|
|
code += "]" # after <do-while> scope
|
|
|
|
return code
|
|
|
|
def compile_switch(self): # switch (expression) { ((default | case literal): statements* break;? statements*)* }
|
|
self.parser.check_current_tokens_are([Token.SWITCH, Token.LPAREN])
|
|
self.parser.advance_token(amount=2) # point to after LPAREN
|
|
|
|
self.increase_stack_pointer() # use 1 temp cell before evaluating the expression
|
|
expression_code = self.compile_expression()
|
|
self.parser.check_current_tokens_are([Token.RPAREN, Token.LBRACE])
|
|
self.parser.advance_token(amount=2) # point to after LBRACE
|
|
|
|
self.increase_stack_pointer() # use 1 additional temp cell for indicating we need to execute a case
|
|
cases = list() # list of tuples: (value/"default" (int or string), case_code (string), has_break(bool))
|
|
|
|
while self.parser.current_token().type in [
|
|
Token.CASE,
|
|
Token.DEFAULT,
|
|
]: # (default | CASE literal) COLON statement* break;? statements*
|
|
if self.parser.current_token().type == Token.CASE:
|
|
self.parser.advance_token() # skip CASE
|
|
constant_value_token = self.parser.current_token()
|
|
if not is_token_literal(constant_value_token):
|
|
raise BFSemanticError("Switch case value is not a literal. Token is %s" % constant_value_token)
|
|
|
|
value = get_literal_token_value(constant_value_token)
|
|
if value in [case for (case, _, _) in cases]:
|
|
raise BFSemanticError("Case %d already exists. Token is %s" % (value, constant_value_token))
|
|
else:
|
|
assert self.parser.current_token().type == Token.DEFAULT
|
|
value = "default"
|
|
if value in [case for (case, _, _) in cases]:
|
|
raise BFSemanticError("default case %s already exists." % self.parser.current_token())
|
|
|
|
self.parser.check_next_token_is(Token.COLON)
|
|
self.parser.advance_token(amount=2) # point to after COLON
|
|
|
|
inner_case_code = ""
|
|
while self.parser.current_token().type not in [Token.CASE, Token.DEFAULT, Token.RBRACE, Token.BREAK]:
|
|
inner_case_code += self.compile_statement(
|
|
allow_declaration=False
|
|
) # not allowed to declare variables directly inside case
|
|
|
|
has_break = False
|
|
if self.parser.current_token().type == Token.BREAK: # ignore all statements after break
|
|
self.parser.check_next_token_is(Token.SEMICOLON)
|
|
self.parser.advance_token(amount=2) # skip break SEMICOLON
|
|
has_break = True
|
|
while self.parser.current_token().type not in [Token.CASE, Token.DEFAULT, Token.RBRACE]:
|
|
self.compile_statement() # advance the parser and discard the code
|
|
cases.append((value, inner_case_code, has_break))
|
|
|
|
if self.parser.current_token().type not in [Token.CASE, Token.DEFAULT, Token.RBRACE]:
|
|
raise BFSyntaxError(
|
|
"Expected case / default / RBRACE (}) instead of token %s" % self.parser.current_token()
|
|
)
|
|
self.parser.check_current_token_is(Token.RBRACE)
|
|
self.parser.advance_token()
|
|
self.decrease_stack_pointer(amount=2)
|
|
|
|
return process_switch_cases(expression_code, cases)
|
|
|
|
def compile_break(self):
|
|
# TODO: Make the break statement in scopes inside switch-case (including if/else), and for/do/while
|
|
raise NotImplementedError(
|
|
"Break statement found outside of switch case first scope.\nBreak is not currently implemented for while/for/do statements.\nToken is %s"
|
|
% self.parser.current_token()
|
|
)
|
|
|
|
def compile_for(self):
|
|
# for (statement expression; expression) inner_scope_code note: statement contains ;, and inner_scope_code can be scope { }
|
|
# (the statement/second expression/inner_scope_code can be empty)
|
|
# (the statement cannot contain scope - { and } )
|
|
|
|
"""
|
|
<for> is a special case of scope
|
|
the initial code (int i = 0;) is executed INSIDE the scope, but BEFORE the LBRACE
|
|
so we manually compile the scope instead of using self.compile_scope():
|
|
|
|
we first create an ids map, and in the case that there is a variable definition inside the <for> definition:
|
|
we manually insert the ID into the ids map, and move the pointer to the right once, to make room for it
|
|
(this needs to be done before the <for> definition's statement)
|
|
next, inside the for's scope {}:
|
|
after calling insert_scope_variables_into_ids_map, we move the pointer to the left once, since it counts the ID we entered manually as well
|
|
after calling exit_scope, we move the pointer to the right, since it counts the ID we entered manually, and we don't want it to be discarded after every iteration
|
|
finally, at the end of the <for> loop, we move the pointer once to the left, to discard the variable we defined manually
|
|
"""
|
|
|
|
self.parser.check_current_tokens_are([Token.FOR, Token.LPAREN])
|
|
self.parser.advance_token(amount=2) # skip for (
|
|
|
|
manually_inserted_variable_in_for_definition = False
|
|
variable = None
|
|
code = ""
|
|
|
|
# =============== enter FOR scope ===============
|
|
self.add_ids_map()
|
|
# ===============================================
|
|
|
|
if self.parser.current_token().type == Token.INT:
|
|
# we are defining a variable inside the for statement definition (for (int i = 0....))
|
|
variable = create_variable_from_definition(self.parser, advance_tokens=False)
|
|
self.insert_to_ids_map(variable)
|
|
manually_inserted_variable_in_for_definition = True
|
|
code += ">" * get_variable_size(variable)
|
|
|
|
show_side_effect_warning = self.parser.next_token(2).type != Token.ASSIGN
|
|
if self.parser.next_token(2).type == Token.LBRACK:
|
|
show_side_effect_warning = self.get_token_after_array_access(offset=1).type != Token.ASSIGN
|
|
|
|
if show_side_effect_warning:
|
|
print(
|
|
"[Warning] For loop variable '%s' isn't assigned to anything and may cause side effects"
|
|
% self.parser.next_token()
|
|
)
|
|
|
|
if self.parser.current_token().type == Token.LBRACE: # statement is a scope
|
|
raise BFSyntaxError("Unexpected scope inside for loop statement - %s" % self.parser.current_token())
|
|
initial_statement = self.compile_statement()
|
|
|
|
condition_expression = self.compile_expression()
|
|
self.parser.check_current_token_is(Token.SEMICOLON)
|
|
self.parser.advance_token() # skip ;
|
|
|
|
if self.parser.current_token().type == Token.RPAREN:
|
|
modification_expression = "" # no modification expression
|
|
else:
|
|
modification_expression = self.compile_expression()
|
|
modification_expression += "<" # discard expression value
|
|
self.parser.check_current_token_is(Token.RPAREN)
|
|
self.parser.advance_token() # skip )
|
|
|
|
inner_scope_code = ""
|
|
if self.parser.current_token().type == Token.LBRACE: # do we have {} as for's statement?
|
|
# compiling <for> scope inside { }:
|
|
if manually_inserted_variable_in_for_definition:
|
|
inner_scope_code += "<" * get_variable_size(variable)
|
|
inner_scope_code += self.insert_scope_variables_into_ids_map()
|
|
inner_scope_code += self.compile_scope_statements()
|
|
else:
|
|
inner_scope_code += self.compile_statement()
|
|
# =============== exit FOR scope ===============
|
|
inner_scope_code += self.exit_scope()
|
|
if manually_inserted_variable_in_for_definition:
|
|
inner_scope_code += ">" * get_variable_size(variable)
|
|
# ==============================================
|
|
|
|
code += initial_statement
|
|
code += condition_expression # evaluate expression
|
|
code += "<" # point to the expression
|
|
code += "[" # if it is 0, jump to after the <for> scope
|
|
code += inner_scope_code # <for> scope code
|
|
code += modification_expression
|
|
code += condition_expression # re-evaluate the expression
|
|
code += "<" # point to the expression
|
|
code += "]" # after <for> scope
|
|
|
|
if manually_inserted_variable_in_for_definition:
|
|
code += "<" * get_variable_size(variable)
|
|
|
|
return code
|
|
|
|
def compile_statement(self, allow_declaration=True):
|
|
# returns code that performs the current statement
|
|
# at the end, the pointer points to the same location it pointed before the statement was executed
|
|
|
|
token = self.parser.current_token()
|
|
if token.type == Token.INT: # INT ID ((= EXPRESSION) | ([NUM])+ (= ARRAY_INITIALIZATION)?)? SEMICOLON
|
|
if not allow_declaration:
|
|
raise BFSemanticError(
|
|
"Cannot define variable (%s) directly inside case. "
|
|
"Can define inside new scope {} or outside the switch statement" % token
|
|
)
|
|
return self.compile_variable_declaration()
|
|
|
|
elif token.type in [Token.INCREMENT, Token.DECREMENT, Token.UNARY_MULTIPLICATIVE]: # ++ID;
|
|
return self.compile_expression_as_statement()
|
|
|
|
elif token.type == Token.ID:
|
|
if self.parser.next_token().type in [
|
|
Token.ASSIGN,
|
|
Token.LBRACK,
|
|
Token.INCREMENT,
|
|
Token.DECREMENT,
|
|
Token.UNARY_MULTIPLICATIVE,
|
|
]:
|
|
# ID ASSIGN expression; or ID([expression])+ ASSIGN expression; or ID++;
|
|
return self.compile_expression_as_statement()
|
|
elif self.parser.next_token().type == Token.LPAREN: # ID(...); (function call)
|
|
return self.compile_function_call_statement()
|
|
raise BFSyntaxError(
|
|
"Unexpected '%s' after '%s'. Expected '=|+=|-=|*=|/=|%%=|<<=|>>=|&=|(|=)|^=' (assignment), '++|--' (modification) or '(' (function call)"
|
|
% (str(self.parser.next_token()), str(token))
|
|
)
|
|
|
|
elif token.type == Token.PRINT:
|
|
return self.compile_print_string()
|
|
|
|
elif token.type == Token.IF:
|
|
return self.compile_if()
|
|
|
|
elif token.type == Token.LBRACE:
|
|
return self.compile_scope()
|
|
|
|
elif token.type == Token.WHILE:
|
|
return self.compile_while()
|
|
|
|
elif token.type == Token.DO:
|
|
return self.compile_do_while()
|
|
|
|
elif token.type == Token.SWITCH:
|
|
return self.compile_switch()
|
|
|
|
elif token.type == Token.BREAK:
|
|
return self.compile_break()
|
|
|
|
elif token.type == Token.RETURN:
|
|
return self.compile_return()
|
|
|
|
elif token.type == Token.FOR:
|
|
return self.compile_for()
|
|
|
|
elif token.type == Token.SEMICOLON:
|
|
# empty statement
|
|
self.parser.advance_token() # skip ;
|
|
return ""
|
|
|
|
elif token.type in [Token.CASE, Token.DEFAULT]:
|
|
raise BFSyntaxError("%s not inside a switch statement" % token)
|
|
|
|
raise BFSyntaxError("Invalid statement at " + str(token))
|
|
|
|
def compile_scope_statements(self):
|
|
tokens = self.tokens
|
|
|
|
code = ""
|
|
while self.parser.current_token() is not None:
|
|
if self.parser.current_token().type == Token.RBRACE:
|
|
# we reached the end of our scope
|
|
self.parser.advance_token() # skip RBRACE
|
|
return code
|
|
else:
|
|
code += self.compile_statement()
|
|
|
|
# should never get here
|
|
raise BFSyntaxError("expected } after the last token in scope " + str(tokens[-1]))
|
|
|
|
def compile_scope(self):
|
|
assert self.parser.current_token().type == Token.LBRACE
|
|
|
|
code = self.enter_scope()
|
|
code += self.compile_scope_statements()
|
|
code += self.exit_scope()
|
|
|
|
return code
|
|
|
|
def compile_function_scope(self, parameters):
|
|
# returns code for the current function
|
|
# parameters is a list of parameters, in the order of their declaration
|
|
# will be inserted into the new scope prior to the scope's compilation
|
|
|
|
"""
|
|
example layout:
|
|
int global_var1;
|
|
int global_var2;
|
|
int foo(int a, int b) {
|
|
int x;
|
|
int y;
|
|
return 5;
|
|
}
|
|
|
|
int main() {
|
|
int n;
|
|
foo(1, 2);
|
|
}
|
|
|
|
global_var1 global_var2 main_return_value n foo_return_value a=1 b=2 x y
|
|
|
|
calling convention:
|
|
caller responsibility: make room for return_value (and zero its cell), place parameters, point to return_value cell
|
|
callee responsibility: put return value in return_value cell and point to it (thus "cleaning" parameters)
|
|
can assume that there is a zeroed cell at current_stack_pointer (return_value_cell) (therefore ids_map starts at index current_stack_pointer+1)
|
|
can assume that the next cells match your parameters
|
|
assumes that initially, the pointer points to the first cell (return_value_cell).
|
|
therefore begin with '>' * (1 + parameters + scope variables)
|
|
"""
|
|
|
|
assert self.parser.current_token().type == Token.LBRACE
|
|
|
|
code = self.enter_function_scope(parameters)
|
|
code += self.compile_scope_statements()
|
|
code += self.exit_scope()
|
|
code += "<" # point to return_value_cell
|
|
|
|
return code
|