reasoning-gym/reasoning_gym/code/contrib/bfit/Compiler/Compiler.py
Andreas Koepf ebb88e6c6a lint
2025-01-30 22:55:04 +01:00

193 lines
8.7 KiB
Python

#!/usr/bin/env python3
from .Exceptions import BFSemanticError, BFSyntaxError
from .FunctionCompiler import FunctionCompiler
from .Functions import check_function_exists, get_function_object, insert_function_object
from .General import get_literal_token_code, is_token_literal, unpack_literal_tokens_to_array_dimensions
from .Globals import (
create_variable_from_definition,
get_global_variables_size,
get_variable_dimensions,
get_variable_size,
insert_global_variable,
)
from .Lexical_analyzer import analyze
from .LibraryFunctionCompiler import insert_library_functions
from .Optimizer import optimize
from .Parser import Parser
from .Token import Token
"""
This file is responsible for creating FunctionCompiler objects and global variables objects
And finally, return the code of the main function
"""
class Compiler:
def __init__(self, code, optimize_code=False):
tokens = analyze(code)
if optimize_code:
optimize(tokens)
self.parser = Parser(tokens)
# global variables and functions
def create_function_object(self):
# function: (INT | VOID) ID LPAREN expression_list RPAREN LBRACE statements RBRACE
# returns function named tuple
if self.parser.current_token().type not in [Token.VOID, Token.INT]:
raise BFSemanticError(
"Function return type can be either void or int, not '%s'" % str(self.parser.current_token())
)
self.parser.check_next_tokens_are([Token.ID, Token.LPAREN])
# save all tokens of this function
function_name = self.parser.next_token(next_amount=1).data
RPAREN_index = self.parser.find_matching(
starting_index=self.parser.current_token_index + 2
) # first find RPAREN
self.parser.check_next_token_is(Token.LBRACE, starting_index=RPAREN_index)
RBRACE_index = self.parser.find_matching(starting_index=RPAREN_index + 1) # then find RBRACE
# take all tokens between INT and RBRACE and pass them to function object
function_tokens = self.parser.tokens[self.parser.current_token_index : RBRACE_index + 1]
# skip function definition
self.parser.advance_to_token_at_index(RBRACE_index + 1)
function = FunctionCompiler(function_name, function_tokens)
return function
def compile_global_variable_definition(self):
# INT ID (ASSIGN NUM | (LBRACK NUM RBRACK)+ (ASSIGN LBRACE ... RBRACE)?)? SEMICOLON
# returns code that initializes this variable, and advances pointer according to variable size
self.parser.check_current_tokens_are([Token.INT, Token.ID])
ID_token = self.parser.next_token()
variable = create_variable_from_definition(self.parser, advance_tokens=True)
insert_global_variable(variable)
# if this is set to False, then the compiler assumes that initially all cells are set to zero
# if this is set to True, then the compiler zeros each cell before using it (may generate a lot of unnecessary BF code)
ZERO_CELLS_BEFORE_USE = False
code = "[-]" if ZERO_CELLS_BEFORE_USE else ""
if get_variable_size(variable) > 1: # its an array
if self.parser.current_token().type == Token.SEMICOLON:
# array definition - INT ID (LBRACK NUM RBRACK)+ SEMICOLON
self.parser.advance_token() # skip SEMICOLON
code = (code + ">") * get_variable_size(variable) # advance to after this variable
return code
elif self.parser.current_token().type == Token.ASSIGN and self.parser.current_token().data == "=":
# array definition and initialization - INT ID (LBRACK NUM RBRACK)+ ASSIGN ((LBRACE ... RBRACE)+|STRING) SEMICOLON
self.parser.advance_token() # skip ASSIGN
if self.parser.current_token().type not in [Token.LBRACE, Token.STRING]:
raise BFSyntaxError("Expected LBRACE or STRING at '%s'" % self.parser.current_token())
literal_tokens_list = self.parser.compile_array_initialization_list()
self.parser.check_current_token_is(Token.SEMICOLON)
self.parser.advance_token() # skip SEMICOLON
array_dimensions = get_variable_dimensions(variable)
unpacked_literals_list = unpack_literal_tokens_to_array_dimensions(
ID_token, array_dimensions, literal_tokens_list
)
for literal in unpacked_literals_list:
code += get_literal_token_code(literal) # evaluate this literal and point to next array element
return code
else:
raise BFSyntaxError(
"Unexpected %s in array definition. Expected SEMICOLON (;) or ASSIGN (=)"
% self.parser.current_token()
)
elif self.parser.current_token().type == Token.SEMICOLON: # no need to initialize
self.parser.advance_token() # skip SEMICOLON
code += ">" # advance to after this variable
else:
self.parser.check_current_token_is(Token.ASSIGN)
if self.parser.current_token().data != "=":
raise BFSyntaxError(
"Unexpected %s when initializing global variable. Expected ASSIGN (=)" % self.parser.current_token()
)
self.parser.advance_token() # skip ASSIGN
if not is_token_literal(self.parser.current_token()):
raise BFSemanticError(
"Unexpected '%s'. expected literal (NUM | CHAR | TRUE | FALSE )" % str(self.parser.current_token())
)
code += get_literal_token_code(self.parser.current_token())
self.parser.check_next_token_is(Token.SEMICOLON)
self.parser.advance_token(amount=2) # skip (NUM|CHAR|TRUE|FALSE) SEMICOLON
return code
def process_global_definitions(self):
"""
Iterate through all tokens
When encountering function definition - create Function object and pass it the function's tokens
When encountering global variable definition - create Variable object
Returns code that initializes global variables and advances the pointer to after them
"""
code = ""
token = self.parser.current_token()
while token is not None and token.type in [Token.VOID, Token.INT, Token.SEMICOLON]:
if token.type == Token.SEMICOLON: # can have random semicolons ;)
self.parser.advance_token()
token = self.parser.current_token()
continue
self.parser.check_next_token_is(Token.ID)
if self.parser.next_token(next_amount=2).type == Token.LPAREN:
function = self.create_function_object()
insert_function_object(function)
elif token.type is Token.INT and self.parser.next_token(next_amount=2).type in [
Token.SEMICOLON,
Token.ASSIGN,
Token.LBRACK,
]:
code += self.compile_global_variable_definition()
else:
raise BFSyntaxError(
"Unexpected '%s' after '%s'. Expected '(' (function definition) or one of: '=', ';', '[' (global variable definition)"
% (str(self.parser.next_token(next_amount=2)), str(self.parser.next_token()))
)
token = self.parser.current_token()
if self.parser.current_token() is not None: # we have not reached the last token
untouched_tokens = [str(t) for t in self.parser.tokens[self.parser.current_token_index :]]
raise BFSyntaxError("Did not reach the end of the code. Untouched tokens:\n%s" % untouched_tokens)
return code
def compile(self):
insert_library_functions()
code = (
self.process_global_definitions()
) # code that initializes global variables and advances pointer to after them
check_function_exists(Token(Token.ID, 0, 0, "main"), 0)
code += get_function_object("main").get_code(get_global_variables_size())
code += "<" * get_global_variables_size() # point to the first cell to end the program nicely :)
return code
def compile(code, optimize_code=False):
"""
:param code: C-like code (string)
:param optimize_code: syntax optimization (bool)
:return code: Brainfuck code (string)
"""
compiler = Compiler(code, optimize_code)
brainfuck_code = compiler.compile()
return brainfuck_code
if __name__ == "__main__":
print("This file cannot be directly run")
print("Please import it and use the 'compile' function")
print("Which receives a C-like code (string) and returns Brainfuck code (string)")