mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-22 16:49:06 +00:00
Restructure {reasoning_gym, tests}/{core, exercises, curricula}
This commit is contained in:
parent
8b0f634f4c
commit
10dbb374b0
110 changed files with 0 additions and 0 deletions
|
|
@ -1,193 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
from .Exceptions import BFSemanticError, BFSyntaxError
|
||||
from .FunctionCompiler import FunctionCompiler
|
||||
from .Functions import check_function_exists, get_function_object, insert_function_object
|
||||
from .General import get_literal_token_code, is_token_literal, unpack_literal_tokens_to_array_dimensions
|
||||
from .Globals import (
|
||||
create_variable_from_definition,
|
||||
get_global_variables_size,
|
||||
get_variable_dimensions,
|
||||
get_variable_size,
|
||||
insert_global_variable,
|
||||
)
|
||||
from .Lexical_analyzer import analyze
|
||||
from .LibraryFunctionCompiler import insert_library_functions
|
||||
from .Optimizer import optimize
|
||||
from .Parser import Parser
|
||||
from .Token import Token
|
||||
|
||||
"""
|
||||
This file is responsible for creating FunctionCompiler objects and global variables objects
|
||||
And finally, return the code of the main function
|
||||
"""
|
||||
|
||||
|
||||
class Compiler:
|
||||
def __init__(self, code, optimize_code=False):
|
||||
tokens = analyze(code)
|
||||
if optimize_code:
|
||||
optimize(tokens)
|
||||
self.parser = Parser(tokens)
|
||||
|
||||
# global variables and functions
|
||||
def create_function_object(self):
|
||||
# function: (INT | VOID) ID LPAREN expression_list RPAREN LBRACE statements RBRACE
|
||||
# returns function named tuple
|
||||
|
||||
if self.parser.current_token().type not in [Token.VOID, Token.INT]:
|
||||
raise BFSemanticError(
|
||||
"Function return type can be either void or int, not '%s'" % str(self.parser.current_token())
|
||||
)
|
||||
|
||||
self.parser.check_next_tokens_are([Token.ID, Token.LPAREN])
|
||||
|
||||
# save all tokens of this function
|
||||
function_name = self.parser.next_token(next_amount=1).data
|
||||
RPAREN_index = self.parser.find_matching(
|
||||
starting_index=self.parser.current_token_index + 2
|
||||
) # first find RPAREN
|
||||
self.parser.check_next_token_is(Token.LBRACE, starting_index=RPAREN_index)
|
||||
RBRACE_index = self.parser.find_matching(starting_index=RPAREN_index + 1) # then find RBRACE
|
||||
|
||||
# take all tokens between INT and RBRACE and pass them to function object
|
||||
function_tokens = self.parser.tokens[self.parser.current_token_index : RBRACE_index + 1]
|
||||
# skip function definition
|
||||
self.parser.advance_to_token_at_index(RBRACE_index + 1)
|
||||
|
||||
function = FunctionCompiler(function_name, function_tokens)
|
||||
return function
|
||||
|
||||
def compile_global_variable_definition(self):
|
||||
# INT ID (ASSIGN NUM | (LBRACK NUM RBRACK)+ (ASSIGN LBRACE ... RBRACE)?)? SEMICOLON
|
||||
# returns code that initializes this variable, and advances pointer according to variable size
|
||||
|
||||
self.parser.check_current_tokens_are([Token.INT, Token.ID])
|
||||
ID_token = self.parser.next_token()
|
||||
variable = create_variable_from_definition(self.parser, advance_tokens=True)
|
||||
insert_global_variable(variable)
|
||||
|
||||
# if this is set to False, then the compiler assumes that initially all cells are set to zero
|
||||
# if this is set to True, then the compiler zeros each cell before using it (may generate a lot of unnecessary BF code)
|
||||
ZERO_CELLS_BEFORE_USE = False
|
||||
|
||||
code = "[-]" if ZERO_CELLS_BEFORE_USE else ""
|
||||
if get_variable_size(variable) > 1: # its an array
|
||||
if self.parser.current_token().type == Token.SEMICOLON:
|
||||
# array definition - INT ID (LBRACK NUM RBRACK)+ SEMICOLON
|
||||
self.parser.advance_token() # skip SEMICOLON
|
||||
code = (code + ">") * get_variable_size(variable) # advance to after this variable
|
||||
return code
|
||||
elif self.parser.current_token().type == Token.ASSIGN and self.parser.current_token().data == "=":
|
||||
# array definition and initialization - INT ID (LBRACK NUM RBRACK)+ ASSIGN ((LBRACE ... RBRACE)+|STRING) SEMICOLON
|
||||
self.parser.advance_token() # skip ASSIGN
|
||||
|
||||
if self.parser.current_token().type not in [Token.LBRACE, Token.STRING]:
|
||||
raise BFSyntaxError("Expected LBRACE or STRING at '%s'" % self.parser.current_token())
|
||||
|
||||
literal_tokens_list = self.parser.compile_array_initialization_list()
|
||||
self.parser.check_current_token_is(Token.SEMICOLON)
|
||||
self.parser.advance_token() # skip SEMICOLON
|
||||
|
||||
array_dimensions = get_variable_dimensions(variable)
|
||||
unpacked_literals_list = unpack_literal_tokens_to_array_dimensions(
|
||||
ID_token, array_dimensions, literal_tokens_list
|
||||
)
|
||||
|
||||
for literal in unpacked_literals_list:
|
||||
code += get_literal_token_code(literal) # evaluate this literal and point to next array element
|
||||
return code
|
||||
else:
|
||||
raise BFSyntaxError(
|
||||
"Unexpected %s in array definition. Expected SEMICOLON (;) or ASSIGN (=)"
|
||||
% self.parser.current_token()
|
||||
)
|
||||
|
||||
elif self.parser.current_token().type == Token.SEMICOLON: # no need to initialize
|
||||
self.parser.advance_token() # skip SEMICOLON
|
||||
code += ">" # advance to after this variable
|
||||
else:
|
||||
self.parser.check_current_token_is(Token.ASSIGN)
|
||||
if self.parser.current_token().data != "=":
|
||||
raise BFSyntaxError(
|
||||
"Unexpected %s when initializing global variable. Expected ASSIGN (=)" % self.parser.current_token()
|
||||
)
|
||||
self.parser.advance_token() # skip ASSIGN
|
||||
|
||||
if not is_token_literal(self.parser.current_token()):
|
||||
raise BFSemanticError(
|
||||
"Unexpected '%s'. expected literal (NUM | CHAR | TRUE | FALSE )" % str(self.parser.current_token())
|
||||
)
|
||||
|
||||
code += get_literal_token_code(self.parser.current_token())
|
||||
|
||||
self.parser.check_next_token_is(Token.SEMICOLON)
|
||||
self.parser.advance_token(amount=2) # skip (NUM|CHAR|TRUE|FALSE) SEMICOLON
|
||||
|
||||
return code
|
||||
|
||||
def process_global_definitions(self):
|
||||
"""
|
||||
Iterate through all tokens
|
||||
When encountering function definition - create Function object and pass it the function's tokens
|
||||
When encountering global variable definition - create Variable object
|
||||
Returns code that initializes global variables and advances the pointer to after them
|
||||
"""
|
||||
code = ""
|
||||
token = self.parser.current_token()
|
||||
while token is not None and token.type in [Token.VOID, Token.INT, Token.SEMICOLON]:
|
||||
if token.type == Token.SEMICOLON: # can have random semicolons ;)
|
||||
self.parser.advance_token()
|
||||
token = self.parser.current_token()
|
||||
continue
|
||||
self.parser.check_next_token_is(Token.ID)
|
||||
|
||||
if self.parser.next_token(next_amount=2).type == Token.LPAREN:
|
||||
function = self.create_function_object()
|
||||
insert_function_object(function)
|
||||
elif token.type is Token.INT and self.parser.next_token(next_amount=2).type in [
|
||||
Token.SEMICOLON,
|
||||
Token.ASSIGN,
|
||||
Token.LBRACK,
|
||||
]:
|
||||
code += self.compile_global_variable_definition()
|
||||
else:
|
||||
raise BFSyntaxError(
|
||||
"Unexpected '%s' after '%s'. Expected '(' (function definition) or one of: '=', ';', '[' (global variable definition)"
|
||||
% (str(self.parser.next_token(next_amount=2)), str(self.parser.next_token()))
|
||||
)
|
||||
|
||||
token = self.parser.current_token()
|
||||
|
||||
if self.parser.current_token() is not None: # we have not reached the last token
|
||||
untouched_tokens = [str(t) for t in self.parser.tokens[self.parser.current_token_index :]]
|
||||
raise BFSyntaxError("Did not reach the end of the code. Untouched tokens:\n%s" % untouched_tokens)
|
||||
|
||||
return code
|
||||
|
||||
def compile(self):
|
||||
insert_library_functions()
|
||||
code = (
|
||||
self.process_global_definitions()
|
||||
) # code that initializes global variables and advances pointer to after them
|
||||
|
||||
check_function_exists(Token(Token.ID, 0, 0, "main"), 0)
|
||||
code += get_function_object("main").get_code(get_global_variables_size())
|
||||
code += "<" * get_global_variables_size() # point to the first cell to end the program nicely :)
|
||||
return code
|
||||
|
||||
|
||||
def compile(code, optimize_code=False):
|
||||
"""
|
||||
:param code: C-like code (string)
|
||||
:param optimize_code: syntax optimization (bool)
|
||||
:return code: Brainfuck code (string)
|
||||
"""
|
||||
compiler = Compiler(code, optimize_code)
|
||||
brainfuck_code = compiler.compile()
|
||||
return brainfuck_code
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("This file cannot be directly run")
|
||||
print("Please import it and use the 'compile' function")
|
||||
print("Which receives a C-like code (string) and returns Brainfuck code (string)")
|
||||
|
|
@ -1,6 +0,0 @@
|
|||
class BFSyntaxError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class BFSemanticError(Exception):
|
||||
pass
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,37 +0,0 @@
|
|||
from copy import deepcopy
|
||||
|
||||
from .Exceptions import BFSemanticError
|
||||
|
||||
functions = dict() # Global dictionary of function_name --> FunctionCompiler objects
|
||||
|
||||
|
||||
def insert_function_object(function):
|
||||
functions[function.name] = function
|
||||
|
||||
|
||||
def get_function_object(name):
|
||||
"""
|
||||
must return a copy of the function
|
||||
because we might need to compile function recursively
|
||||
and if we don't work on different copies then we will interfere with the current token pointer etc
|
||||
|
||||
for example:
|
||||
int increase(int n) { return n+1;}
|
||||
int main() {int x = increase(increase(1));}
|
||||
|
||||
while compiling the first call, we start a compilation of the same function object in the second call
|
||||
"""
|
||||
return deepcopy(functions[name])
|
||||
|
||||
|
||||
def check_function_exists(function_token, parameters_amount):
|
||||
function_name = function_token.data
|
||||
if function_name not in functions:
|
||||
raise BFSemanticError("Function '%s' is undefined" % str(function_token))
|
||||
|
||||
function = functions[function_name]
|
||||
if len(function.parameters) != parameters_amount:
|
||||
raise BFSemanticError(
|
||||
"Function '%s' has %s parameters (called it with %s parameters)"
|
||||
% (str(function_token), len(function.parameters), parameters_amount)
|
||||
)
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,83 +0,0 @@
|
|||
from collections import namedtuple
|
||||
|
||||
from .General import dimensions_to_size, get_NUM_token_value
|
||||
from .Token import Token
|
||||
|
||||
"""
|
||||
This file holds the program's functions and global variables
|
||||
(as global variables, hehe)
|
||||
And related functions
|
||||
"""
|
||||
|
||||
global_variables = list() # Global list of global variables
|
||||
|
||||
|
||||
# variables
|
||||
def get_global_variables():
|
||||
return global_variables
|
||||
|
||||
|
||||
def insert_global_variable(variable):
|
||||
get_global_variables().append(variable)
|
||||
|
||||
|
||||
def get_global_variables_size():
|
||||
return sum(get_variable_size(variable) for variable in get_global_variables())
|
||||
|
||||
|
||||
def create_variable(name, type, dimensions):
|
||||
# return variable named tuple
|
||||
variable = namedtuple("variable", ["name", "type", "size", "cell_index"])
|
||||
|
||||
variable.name = name
|
||||
variable.type = type
|
||||
variable.dimensions = dimensions # list of array dimensions sizes (for non-arrays it will be [1])
|
||||
variable.cell_index = None # will be updated when we insert this variable into an ids map
|
||||
|
||||
return variable
|
||||
|
||||
|
||||
def get_variable_size(variable):
|
||||
# return total variable size
|
||||
return dimensions_to_size(variable.dimensions)
|
||||
|
||||
|
||||
def get_variable_dimensions(variable):
|
||||
return variable.dimensions
|
||||
|
||||
|
||||
def is_variable_array(variable):
|
||||
return variable.dimensions != [1]
|
||||
|
||||
|
||||
def create_variable_from_definition(parser, index=None, advance_tokens=False):
|
||||
"""
|
||||
processes the variable definition at index, and returns the variable named tuple
|
||||
if index is None, then assumes we start at the current_token_index
|
||||
if advance_tokens is True, then modifies current_token_index accordingly using parser.advance_token()
|
||||
"""
|
||||
|
||||
if index is None:
|
||||
index = parser.current_token_index
|
||||
|
||||
assert parser.tokens[index].type == Token.INT
|
||||
|
||||
parser.check_next_token_is(Token.ID, starting_index=index)
|
||||
ID = parser.tokens[index + 1].data
|
||||
|
||||
if advance_tokens:
|
||||
parser.advance_token(amount=2) # skip INT ID
|
||||
|
||||
if parser.tokens[index + 2].type == Token.LBRACK: # array (support multi-dimensional arrays)
|
||||
dimensions = [] # element[i] holds the size of dimension[i]
|
||||
while parser.tokens[index + 2].type == Token.LBRACK:
|
||||
parser.check_next_tokens_are([Token.LBRACK, Token.NUM, Token.RBRACK], starting_index=index + 1)
|
||||
dimensions.append(get_NUM_token_value(parser.tokens[index + 3]))
|
||||
|
||||
if advance_tokens:
|
||||
parser.advance_token(amount=3) # skip LBRACK NUM RBRACK
|
||||
index += 3
|
||||
else:
|
||||
dimensions = [1]
|
||||
|
||||
return create_variable(ID, Token.INT, dimensions)
|
||||
|
|
@ -1,233 +0,0 @@
|
|||
import re
|
||||
|
||||
from .Optimizer import optimize
|
||||
from .Token import Token
|
||||
|
||||
|
||||
class LexicalErrorException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def analyze(text):
|
||||
"""
|
||||
:returns list of tokens in the text
|
||||
raises exception in case of lexical error
|
||||
"""
|
||||
|
||||
rules = [
|
||||
(r"\s+", Token.WHITESPACE),
|
||||
("void", Token.VOID),
|
||||
("int", Token.INT),
|
||||
("bool", Token.INT), # treat bool as int
|
||||
("char", Token.INT), # treat char as int
|
||||
("true", Token.TRUE),
|
||||
("false", Token.FALSE),
|
||||
("&&", Token.AND),
|
||||
(r"\|\|", Token.OR),
|
||||
(r"\!", Token.NOT),
|
||||
("return", Token.RETURN),
|
||||
("if", Token.IF),
|
||||
("else", Token.ELSE),
|
||||
("while", Token.WHILE),
|
||||
("for", Token.FOR),
|
||||
("do", Token.DO),
|
||||
("print", Token.PRINT),
|
||||
("switch", Token.SWITCH),
|
||||
("case", Token.CASE),
|
||||
("default", Token.DEFAULT),
|
||||
("break", Token.BREAK),
|
||||
("continue", Token.CONTINUE), # todo
|
||||
(":", Token.COLON),
|
||||
(";", Token.SEMICOLON),
|
||||
(",", Token.COMMA),
|
||||
(r"\(", Token.LPAREN),
|
||||
(r"\)", Token.RPAREN),
|
||||
(r"\{", Token.LBRACE),
|
||||
(r"\}", Token.RBRACE),
|
||||
(r"\[", Token.LBRACK),
|
||||
(r"\]", Token.RBRACK),
|
||||
(r"=|\+=|-=|\*=|/=|%=|<<=|>>=|&=|\|=|\^=", Token.ASSIGN),
|
||||
(r"\?", Token.TERNARY),
|
||||
(r"<=|>=|==|!=|<|>", Token.RELOP),
|
||||
(r"\+\+", Token.INCREMENT),
|
||||
("--", Token.DECREMENT),
|
||||
(r"\+|-|\*|/|%", Token.BINOP),
|
||||
(r"\*\*|//|%%", Token.UNARY_MULTIPLICATIVE),
|
||||
("<<|>>", Token.BITWISE_SHIFT),
|
||||
("~", Token.BITWISE_NOT),
|
||||
("&", Token.BITWISE_AND),
|
||||
(r"\|", Token.BITWISE_OR),
|
||||
(r"\^", Token.BITWISE_XOR),
|
||||
("([a-zA-Z_][a-zA-Z0-9_]*)", Token.ID),
|
||||
(r"(\d+)", Token.NUM),
|
||||
(r"(0x[A-Fa-f\d]+)", Token.NUM), # hexadecimal number
|
||||
("(0o[0-7]+)", Token.NUM), # octal number
|
||||
("(0b[01]+)", Token.NUM), # binary number
|
||||
(r'\"(\\\"|[^"])*"', Token.STRING),
|
||||
(r"\'(\\\'|(\\)?[^\'])\'", Token.CHAR),
|
||||
("//.*(\\n|$)", Token.COMMENT),
|
||||
(r"/\*[\s\S]*?\*/", Token.COMMENT), # multiline comments
|
||||
(".", Token.UNIDENTIFIED),
|
||||
]
|
||||
|
||||
rules = [(re.compile(r), t) for r, t in rules]
|
||||
|
||||
tokens = []
|
||||
|
||||
# create a mapping of [line number] to [offset of that line from the beginning of the text]
|
||||
newline = re.compile("\n")
|
||||
lines = [0] + [m.end() for m in re.finditer(newline, text)]
|
||||
|
||||
i = 0
|
||||
while i < len(text):
|
||||
current_matches = []
|
||||
for regex, token_type in rules:
|
||||
m = regex.match(text, i)
|
||||
if m:
|
||||
current_matches.append((m, token_type))
|
||||
|
||||
# pick the token that fits the longest match
|
||||
# if tie - pick the one defined first in the rules list
|
||||
longest_match, max_i, matched_token = None, i, None
|
||||
for match, token_type in current_matches:
|
||||
if match.end() > max_i:
|
||||
longest_match, max_i, matched_token = match, match.end(), token_type
|
||||
|
||||
# calculate line and column
|
||||
line, column = None, None
|
||||
for line_idx in range(len(lines) - 1):
|
||||
if lines[line_idx] <= longest_match.start() < lines[line_idx + 1]:
|
||||
line, column = line_idx + 1, (longest_match.start() - lines[line_idx]) + 1 # humans count from 1 :)
|
||||
break
|
||||
if not line:
|
||||
line, column = len(lines), (longest_match.start() - lines[-1]) + 1
|
||||
|
||||
if matched_token in [Token.COMMENT, Token.WHITESPACE]:
|
||||
pass # do nothing
|
||||
elif matched_token == Token.UNIDENTIFIED:
|
||||
raise LexicalErrorException("Unidentified Character '%s' (line %s column %s)" % (text[i], line, column))
|
||||
elif matched_token in [Token.STRING, Token.CHAR]:
|
||||
# remove quotes at beginning and end, un-escape characters
|
||||
tokens.append(
|
||||
Token(matched_token, line, column, longest_match.group()[1:-1].encode("utf8").decode("unicode_escape"))
|
||||
)
|
||||
elif matched_token in [
|
||||
Token.NUM,
|
||||
Token.ID,
|
||||
Token.BINOP,
|
||||
Token.RELOP,
|
||||
Token.ASSIGN,
|
||||
Token.UNARY_MULTIPLICATIVE,
|
||||
Token.BITWISE_SHIFT,
|
||||
]:
|
||||
tokens.append(Token(matched_token, line, column, longest_match.group()))
|
||||
else:
|
||||
tokens.append(Token(matched_token, line, column))
|
||||
i = longest_match.end()
|
||||
|
||||
return tokens
|
||||
|
||||
|
||||
def tests():
|
||||
def test1():
|
||||
# test token priorities: INT should not be confused with ID even if ID contains "int"
|
||||
text = "my international int ; int; pints; international;"
|
||||
res = analyze(text)
|
||||
|
||||
expected = [
|
||||
Token.ID,
|
||||
Token.ID,
|
||||
Token.INT,
|
||||
Token.SEMICOLON,
|
||||
Token.INT,
|
||||
Token.SEMICOLON,
|
||||
Token.ID,
|
||||
Token.SEMICOLON,
|
||||
Token.ID,
|
||||
Token.SEMICOLON,
|
||||
]
|
||||
assert len(res) == len(expected) and all(res[i].type == expected[i] for i in range(len(res)))
|
||||
|
||||
def test2():
|
||||
text = "true !||!false falsek k||y+-a&&x"
|
||||
res = analyze(text)
|
||||
|
||||
expected = [
|
||||
Token.TRUE,
|
||||
Token.NOT,
|
||||
Token.OR,
|
||||
Token.NOT,
|
||||
Token.FALSE,
|
||||
Token.ID,
|
||||
Token.ID,
|
||||
Token.OR,
|
||||
Token.ID,
|
||||
Token.BINOP,
|
||||
Token.BINOP,
|
||||
Token.ID,
|
||||
Token.AND,
|
||||
Token.ID,
|
||||
]
|
||||
assert len(res) == len(expected) and all(res[i].type == expected[i] for i in range(len(res)))
|
||||
|
||||
def test3():
|
||||
text = "1+2"
|
||||
tokens = analyze(text)
|
||||
expected = [Token.NUM, Token.BINOP, Token.NUM]
|
||||
assert len(tokens) == len(expected) and all(tokens[i].type == expected[i] for i in range(len(tokens)))
|
||||
optimize(tokens)
|
||||
assert len(tokens) == 1 and tokens[0].type == Token.NUM and tokens[0].data == "3"
|
||||
|
||||
text = "1+2+3"
|
||||
tokens = analyze(text)
|
||||
expected = [Token.NUM, Token.BINOP, Token.NUM, Token.BINOP, Token.NUM]
|
||||
assert len(tokens) == len(expected) and all(tokens[i].type == expected[i] for i in range(len(tokens)))
|
||||
optimize(tokens)
|
||||
assert len(tokens) == 1 and tokens[0].type == Token.NUM and tokens[0].data == "6"
|
||||
|
||||
# make sure it is not optimized to 9 (3*3)
|
||||
text = "1+2*3"
|
||||
tokens = analyze(text)
|
||||
expected = [Token.NUM, Token.BINOP, Token.NUM, Token.BINOP, Token.NUM]
|
||||
assert len(tokens) == len(expected) and all(tokens[i].type == expected[i] for i in range(len(tokens)))
|
||||
optimize(tokens)
|
||||
assert len(tokens) == 1 and tokens[0].type == Token.NUM and tokens[0].data == "7"
|
||||
|
||||
# test all arithmetic operations
|
||||
text = "(1+2*3/6)+(1%3)*(6-1)"
|
||||
tokens = analyze(text)
|
||||
expected = [
|
||||
Token.LPAREN,
|
||||
Token.NUM,
|
||||
Token.BINOP,
|
||||
Token.NUM,
|
||||
Token.BINOP,
|
||||
Token.NUM,
|
||||
Token.BINOP,
|
||||
Token.NUM,
|
||||
Token.RPAREN,
|
||||
Token.BINOP,
|
||||
Token.LPAREN,
|
||||
Token.NUM,
|
||||
Token.BINOP,
|
||||
Token.NUM,
|
||||
Token.RPAREN,
|
||||
Token.BINOP,
|
||||
Token.LPAREN,
|
||||
Token.NUM,
|
||||
Token.BINOP,
|
||||
Token.NUM,
|
||||
Token.RPAREN,
|
||||
]
|
||||
assert len(tokens) == len(expected) and all(tokens[i].type == expected[i] for i in range(len(tokens)))
|
||||
optimize(tokens)
|
||||
assert tokens[1].data == "2" and tokens[5].data == "1" and tokens[9].data == "5"
|
||||
|
||||
# todo find a better way to test?
|
||||
test1()
|
||||
test2()
|
||||
test3()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
tests()
|
||||
|
|
@ -1,129 +0,0 @@
|
|||
from .Functions import insert_function_object
|
||||
from .Token import Token
|
||||
|
||||
|
||||
class LibraryFunctionCompiler:
|
||||
def __init__(self, name, type, parameters, code):
|
||||
self.name = name
|
||||
self.type = type
|
||||
self.parameters = parameters
|
||||
self.code = code
|
||||
|
||||
def get_code(self, current_stack_pointer):
|
||||
return self.code
|
||||
|
||||
|
||||
def get_readint_code():
|
||||
# res, tmp, input, loop
|
||||
# tmp is used for multiplication
|
||||
"""
|
||||
res = 0
|
||||
loop = 1
|
||||
|
||||
while loop
|
||||
loop = 0
|
||||
input = input()
|
||||
if input != newline # todo add a eof check as well. run it in several interpreters to look for common ways for "end of number" input
|
||||
loop = 1
|
||||
res *= 10 + char_to_digit(input)
|
||||
"""
|
||||
|
||||
code = "[-]" # clear res = 0
|
||||
code += ">[-]" # tmp = 0
|
||||
code += ">>[-]+" # loop = 1
|
||||
|
||||
code += "[" # while loop == 1
|
||||
code += "[-]" # loop = 0
|
||||
code += "<" # point to input
|
||||
code += "," # input character
|
||||
code += "----------" # sub 10 (check for newline)
|
||||
|
||||
code += "[" # if input is not newline
|
||||
code += ">" # point to loop
|
||||
code += "+" # loop = 1
|
||||
|
||||
# multiply res by 10 and add the input digit
|
||||
code += "<<<" # point to res
|
||||
code += "[>+<-]" # move res to tmp
|
||||
code += ">" # point to tmp
|
||||
code += "[<++++++++++>-]" # res = tmp * 10, tmp = 0
|
||||
code += ">" # point to input
|
||||
code += "-" * (
|
||||
0x30 - 10
|
||||
) # convert character to a digit by subtracting 0x30 from it (we already subtracted 10 before)
|
||||
code += "[<<+>>-]" # res += input
|
||||
code += "]" # end if
|
||||
|
||||
code += ">" # point to loop
|
||||
code += "]" # end while
|
||||
|
||||
code += "<<<" # point to res
|
||||
|
||||
return code
|
||||
|
||||
|
||||
def get_printint_code():
|
||||
# return_cell value_to_print_cell
|
||||
|
||||
code = ">" # point to value_to_print cell
|
||||
code += ">[-]" * 8 + "<" * 8 # zero some cells
|
||||
|
||||
code += ">++++++++++<" # div amount
|
||||
code += "[->-[>+>>]>[+[<+>-]>+>>]<<<<<]" # value_to_print/10
|
||||
code += ">[-]" # zero d-n%d
|
||||
code += ">>" # point to div result
|
||||
|
||||
code += ">++++++++++<" # div amount
|
||||
code += "[->-[>+>>]>[+[<+>-]>+>>]<<<<<]" # res/10
|
||||
code += ">[-]" # zero d-n%d
|
||||
code += ">>" # point to div result
|
||||
|
||||
code += "[" # if the first digit is not 0
|
||||
code += ">++++++[<++++++++>-]<." # add 48 to the first digit and print it
|
||||
code += "<<"
|
||||
code += "+>" # set is_over_100 to true
|
||||
code += "+>" # add 1 to the second digit so it prints even when it's 0
|
||||
code += "[-]" # zero the first digit
|
||||
code += "]" # end if
|
||||
|
||||
code += "<" # point to the second digit
|
||||
|
||||
code += "[" # if the second digit is not 0
|
||||
code += "<[>-<-]" # if is_over_100 is true then subtract 1 from the second digit
|
||||
code += "++++++[>++++++++<-]>." # add 48 to the second digit and print it
|
||||
code += "[-]" # zero the second digit
|
||||
code += "]" # end if
|
||||
|
||||
code += "<<" # point to the cell after the third digit
|
||||
code += "++++++[<++++++++>-]<." # add 48 to the third digit and print it
|
||||
code += "[-]" # zero the third digit
|
||||
code += "<<" # point to value_to_print_cell which is 0
|
||||
|
||||
code += "<" # point to return_cell
|
||||
return code
|
||||
|
||||
|
||||
def get_readchar_code():
|
||||
# read input into "return value cell". no need to move the pointer
|
||||
code = ","
|
||||
return code
|
||||
|
||||
|
||||
def get_printchar_code():
|
||||
# point to parameter, output it, and then point back to "return value cell"
|
||||
code = ">.<"
|
||||
return code
|
||||
|
||||
|
||||
def insert_library_functions():
|
||||
readint = LibraryFunctionCompiler("readint", Token.INT, list(), get_readint_code())
|
||||
insert_function_object(readint)
|
||||
|
||||
printint = LibraryFunctionCompiler("printint", Token.VOID, [Token.INT], get_printint_code())
|
||||
insert_function_object(printint)
|
||||
|
||||
readchar = LibraryFunctionCompiler("readchar", Token.INT, list(), get_readchar_code())
|
||||
insert_function_object(readchar)
|
||||
|
||||
printchar = LibraryFunctionCompiler("printchar", Token.VOID, [Token.INT], get_printchar_code())
|
||||
insert_function_object(printchar)
|
||||
|
|
@ -1,14 +0,0 @@
|
|||
def minify(code):
|
||||
old_code = ""
|
||||
|
||||
while old_code != code:
|
||||
old_code = code
|
||||
|
||||
code = code.replace("><", "")
|
||||
code = code.replace("<>", "")
|
||||
code = code.replace("+-", "")
|
||||
code = code.replace("-+", "")
|
||||
|
||||
code = code.replace("][-]", "]")
|
||||
|
||||
return code
|
||||
|
|
@ -1,437 +0,0 @@
|
|||
from .Exceptions import BFSemanticError
|
||||
from .General import (
|
||||
get_copy_from_variable_code,
|
||||
get_copy_to_variable_code,
|
||||
get_literal_token_code,
|
||||
get_move_left_index_cell_code,
|
||||
get_move_right_index_cells_code,
|
||||
get_offset_to_variable,
|
||||
get_op_between_literals_code,
|
||||
get_op_boolean_operator_code,
|
||||
get_token_ID_code,
|
||||
get_unary_postfix_op_code,
|
||||
get_unary_prefix_op_code,
|
||||
get_variable_dimensions_from_token,
|
||||
is_token_literal,
|
||||
unpack_literal_tokens_to_array_dimensions,
|
||||
)
|
||||
from .Token import Token
|
||||
|
||||
"""
|
||||
This file holds classes that are used to create the parse tree of expressions
|
||||
Each class implements a get_code() function that receives a "stack pointer" and returns code that evaluates the expression
|
||||
"""
|
||||
|
||||
|
||||
class Node:
|
||||
def __init__(self, ids_map_list):
|
||||
# holds a copy of ids_map_list as it was when we parsed the expression
|
||||
self.ids_map_list = ids_map_list[:]
|
||||
|
||||
def assign_token_to_op_token(self, assign_token):
|
||||
assert assign_token.data in ["+=", "-=", "*=", "/=", "%=", "<<=", ">>=", "&=", "|=", "^="]
|
||||
|
||||
assignment_map = {
|
||||
"+=": Token(Token.BINOP, assign_token.line, assign_token.column, data="+"),
|
||||
"-=": Token(Token.BINOP, assign_token.line, assign_token.column, data="-"),
|
||||
"*=": Token(Token.BINOP, assign_token.line, assign_token.column, data="*"),
|
||||
"/=": Token(Token.BINOP, assign_token.line, assign_token.column, data="/"),
|
||||
"%=": Token(Token.BINOP, assign_token.line, assign_token.column, data="%"),
|
||||
"<<=": Token(Token.BITWISE_SHIFT, assign_token.line, assign_token.column, data="<<"),
|
||||
">>=": Token(Token.BITWISE_SHIFT, assign_token.line, assign_token.column, data=">>"),
|
||||
"&=": Token(Token.BITWISE_AND, assign_token.line, assign_token.column),
|
||||
"|=": Token(Token.BITWISE_OR, assign_token.line, assign_token.column),
|
||||
"^=": Token(Token.BITWISE_XOR, assign_token.line, assign_token.column),
|
||||
}
|
||||
|
||||
op_token = assignment_map[assign_token.data]
|
||||
op_node = NodeToken(self.ids_map_list, token=op_token)
|
||||
return op_node
|
||||
|
||||
def get_code(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
|
||||
class NodeToken(Node):
|
||||
def __init__(self, ids_map_list, left=None, token=None, right=None):
|
||||
Node.__init__(self, ids_map_list)
|
||||
self.left = left
|
||||
self.right = right
|
||||
self.token = token
|
||||
|
||||
def get_code(self, current_pointer, *args, **kwargs):
|
||||
# returns the code that evaluates the parse tree
|
||||
|
||||
if is_token_literal(self.token) or self.token.type == Token.ID:
|
||||
# its a literal (leaf)
|
||||
assert self.left is None and self.right is None
|
||||
if self.token.type == Token.ID:
|
||||
return get_token_ID_code(self.ids_map_list, self.token, current_pointer)
|
||||
else:
|
||||
return get_literal_token_code(self.token)
|
||||
|
||||
elif self.token.type in [
|
||||
Token.BINOP,
|
||||
Token.RELOP,
|
||||
Token.BITWISE_SHIFT,
|
||||
Token.BITWISE_AND,
|
||||
Token.BITWISE_OR,
|
||||
Token.BITWISE_XOR,
|
||||
]:
|
||||
code = self.left.get_code(current_pointer)
|
||||
code += self.right.get_code(current_pointer + 1)
|
||||
code += "<<" # point to the first operand
|
||||
|
||||
right_token = None
|
||||
if isinstance(self.right, NodeToken):
|
||||
right_token = self.right.token
|
||||
|
||||
code += get_op_between_literals_code(self.token, right_token)
|
||||
return code
|
||||
|
||||
elif self.token.type in [Token.AND, Token.OR]: # short-circuit evaluation treated differently
|
||||
return get_op_boolean_operator_code(self, current_pointer)
|
||||
|
||||
elif self.token.type == Token.ASSIGN:
|
||||
assert self.left.token.type == Token.ID
|
||||
|
||||
if self.token.data == "=":
|
||||
# id = expression
|
||||
code = self.right.get_code(current_pointer)
|
||||
|
||||
# create code to copy from evaluated expression to ID's cell
|
||||
code += "<" # point to evaluated expression cell
|
||||
code += get_copy_to_variable_code(self.ids_map_list, self.left.token, current_pointer)
|
||||
code += ">" # point to next available cell
|
||||
|
||||
return code
|
||||
|
||||
else:
|
||||
assert self.token.data in ["+=", "-=", "*=", "/=", "%=", "<<=", ">>=", "&=", "|=", "^="]
|
||||
# id += expression
|
||||
# create a node for id + expression
|
||||
|
||||
op_node = self.assign_token_to_op_token(self.token)
|
||||
op_node.left = self.left
|
||||
op_node.right = self.right
|
||||
|
||||
# create a node for id = id + expression
|
||||
assign_token = Token(Token.ASSIGN, self.token.line, self.token.column, data="=")
|
||||
assignment_node = NodeToken(self.ids_map_list, left=self.left, token=assign_token, right=op_node)
|
||||
|
||||
return assignment_node.get_code(current_pointer)
|
||||
|
||||
|
||||
class NodeTernary(Node):
|
||||
def __init__(self, ids_map_list, condition, node_true, node_false):
|
||||
# node_condition ? node_true : node_false;
|
||||
Node.__init__(self, ids_map_list)
|
||||
self.condition = condition
|
||||
self.node_true = node_true
|
||||
self.node_false = node_false
|
||||
|
||||
def get_code(self, current_pointer, *args, **kwargs):
|
||||
# cells layout:
|
||||
# result, bool_evaluate_node_false, condition
|
||||
code = ">" # point to bool_evaluate_node_false
|
||||
code += "[-]+" # bool_evaluate_node_false=1
|
||||
code += ">" # point to condition
|
||||
code += self.condition.get_code(current_pointer + 2) # evaluate condition
|
||||
code += "<" # point to condition
|
||||
|
||||
code += "[" # if condition is non-zero
|
||||
code += "<<" # point to result
|
||||
code += self.node_true.get_code(current_pointer) # evaluate node_true
|
||||
# now we point to bool_evaluate_node_false
|
||||
code += "[-]" # zero bool_evaluate_node_false
|
||||
code += ">" # point to condition
|
||||
code += "[-]" # zero condition
|
||||
code += "]" # end if
|
||||
|
||||
code += "<" # point to bool_evaluate_node_false
|
||||
code += "[" # if bool_evaluate_node_false is 1
|
||||
code += "<" # point to result
|
||||
code += self.node_false.get_code(current_pointer) # evaluate node_false
|
||||
# now we point to bool_evaluate_node_false
|
||||
code += "[-]" # zero bool_evaluate_node_false
|
||||
code += "]" # end if
|
||||
# now we point to one cell after result - next available cell
|
||||
return code
|
||||
|
||||
|
||||
class NodeUnaryPrefix(Node):
|
||||
def __init__(self, ids_map_list, operation, literal):
|
||||
Node.__init__(self, ids_map_list)
|
||||
self.token_operation = operation
|
||||
self.node_literal = literal
|
||||
|
||||
def get_code(self, current_pointer, *args, **kwargs):
|
||||
# unary prefix (!x or ++x or ~x or -x)
|
||||
assert self.token_operation.type in [
|
||||
Token.NOT,
|
||||
Token.INCREMENT,
|
||||
Token.DECREMENT,
|
||||
Token.UNARY_MULTIPLICATIVE,
|
||||
Token.BITWISE_NOT,
|
||||
Token.BINOP,
|
||||
]
|
||||
|
||||
if self.token_operation.type in [Token.NOT, Token.BITWISE_NOT, Token.BINOP]:
|
||||
code = self.node_literal.get_code(current_pointer)
|
||||
code += "<" # point to operand
|
||||
code += get_unary_prefix_op_code(self.token_operation)
|
||||
|
||||
return code
|
||||
else:
|
||||
# its INCREMENT or DECREMENT
|
||||
if isinstance(self.node_literal, NodeArrayGetElement):
|
||||
token_id, index_node = self.node_literal.token_id, self.node_literal.node_expression
|
||||
code = get_move_right_index_cells_code(current_pointer, index_node)
|
||||
|
||||
offset_to_array = get_offset_to_variable(self.ids_map_list, token_id, current_pointer + 2)
|
||||
# it is +2 because in "get_move_right_index_cells_code", we moved 2 extra cells to the right, for retrieving the value
|
||||
|
||||
code += get_unary_prefix_op_code(self.token_operation, offset_to_array)
|
||||
|
||||
code += "<" # point to res
|
||||
code += "[<<+>>-]" # move res to old "index cell"
|
||||
code += "<" # point to new index cell
|
||||
|
||||
code += get_move_left_index_cell_code()
|
||||
return code
|
||||
|
||||
# the token to apply on must be an ID
|
||||
if isinstance(self.node_literal, NodeToken) is False:
|
||||
raise BFSemanticError(
|
||||
"Prefix operator %s can only be applied to a variable" % str(self.token_operation)
|
||||
)
|
||||
|
||||
if self.node_literal.token.type != Token.ID:
|
||||
raise BFSemanticError(
|
||||
"Prefix operator %s cannot be applied to %s, but only to a variable"
|
||||
% (str(self.token_operation), str(self.node_literal.token))
|
||||
)
|
||||
|
||||
offset_to_ID = get_offset_to_variable(self.ids_map_list, self.node_literal.token, current_pointer)
|
||||
return get_unary_prefix_op_code(self.token_operation, offset_to_ID)
|
||||
|
||||
|
||||
class NodeUnaryPostfix(Node):
|
||||
def __init__(self, ids_map_list, operation, literal):
|
||||
Node.__init__(self, ids_map_list)
|
||||
self.token_operation = operation
|
||||
self.node_literal = literal
|
||||
|
||||
def get_code(self, current_pointer, *args, **kwargs):
|
||||
# its an unary postfix operation (x++)
|
||||
assert self.token_operation.type in [Token.INCREMENT, Token.DECREMENT, Token.UNARY_MULTIPLICATIVE]
|
||||
|
||||
if isinstance(self.node_literal, NodeArrayGetElement):
|
||||
token_id, index_node = self.node_literal.token_id, self.node_literal.node_expression
|
||||
code = get_move_right_index_cells_code(current_pointer, index_node)
|
||||
|
||||
offset_to_array = get_offset_to_variable(self.ids_map_list, token_id, current_pointer + 2)
|
||||
# it is +2 because in "get_move_right_index_cells_code", we moved 2 extra cells to the right, for retrieving the value
|
||||
|
||||
code += get_unary_postfix_op_code(self.token_operation, offset_to_array)
|
||||
|
||||
code += "<" # point to res
|
||||
code += "[<<+>>-]" # move res to old "index cell"
|
||||
code += "<" # point to new index cell
|
||||
|
||||
code += get_move_left_index_cell_code()
|
||||
return code
|
||||
|
||||
# the token to apply on must be an ID
|
||||
if isinstance(self.node_literal, NodeToken) is False:
|
||||
raise BFSemanticError("Postfix operator %s can only be applied to a variable" % str(self.token_operation))
|
||||
|
||||
if self.node_literal.token.type != Token.ID:
|
||||
raise BFSemanticError(
|
||||
"Postfix operator %s cannot be applied to %s, but only to a variable"
|
||||
% (str(self.token_operation), str(self.node_literal.token))
|
||||
)
|
||||
|
||||
offset_to_ID = get_offset_to_variable(self.ids_map_list, self.node_literal.token, current_pointer)
|
||||
return get_unary_postfix_op_code(self.token_operation, offset_to_ID)
|
||||
|
||||
|
||||
class NodeFunctionCall(Node):
|
||||
def __init__(self, ids_map_list, function_to_call, parameters):
|
||||
"""
|
||||
receives a FunctionCompiler object
|
||||
that implements get_code() which gets a stack pointer and returns code
|
||||
receives a list of parameters - Node objects
|
||||
each one gets a stack pointer and returns code that evaluates the parameter
|
||||
"""
|
||||
Node.__init__(self, ids_map_list)
|
||||
self.function_to_call = function_to_call
|
||||
self.parameters = parameters
|
||||
|
||||
def get_code(self, current_pointer, *args, **kwargs):
|
||||
code = "[-]>" # return_value_cell=0
|
||||
|
||||
# evaluate parameters from left to right, and put them on the "stack" in that order
|
||||
# after each parameter code, the pointer points to the next available cell (one after the parameter)
|
||||
for i, parameter in enumerate(self.parameters):
|
||||
code += parameter.get_code(
|
||||
current_pointer + 1 + i
|
||||
) # evaluate each parameter at its cell offset (starting at one after return_value_cell)
|
||||
|
||||
# at this point we point to one after the last parameter
|
||||
code += "<" * len(self.parameters) # point back to first parameter
|
||||
code += "<" # point to return_value_cell
|
||||
code += self.function_to_call.get_code(
|
||||
current_stack_pointer=current_pointer
|
||||
) # after this we point to return value cell
|
||||
code += ">" # point to next available cell (one after return value)
|
||||
return code
|
||||
|
||||
|
||||
class NodeArrayElement(Node):
|
||||
def __init__(self, ids_map_list):
|
||||
Node.__init__(self, ids_map_list)
|
||||
|
||||
"""
|
||||
the idea:
|
||||
1. evaluate index. it is known only in run time, so we need to perform a little trick
|
||||
2. move <index> steps to the right, while counting how many steps we moved so far
|
||||
hold an index, and a steps_counter, and move them to the right while decreasing index and increasing steps_counter
|
||||
e.g: 4,0 --> 3,1 --> 2,2 --> 1,3 --> 0,4
|
||||
(move right until index is 0. counter will hold the old index)
|
||||
this way we know we moved <index> steps, and know how many steps to go back when we are done
|
||||
3. move <offset from stack pointer to array> steps left, to get/set the relevant array element
|
||||
this offset is known at compilation time
|
||||
"""
|
||||
|
||||
|
||||
class NodeArrayGetElement(NodeArrayElement):
|
||||
"""
|
||||
class for getting element of a one-dimensional array
|
||||
it receives an expression, indicating the required index
|
||||
and returns a code that gets that element
|
||||
"""
|
||||
|
||||
def __init__(self, ids_map_list, token_id, node_expression):
|
||||
Node.__init__(self, ids_map_list)
|
||||
self.token_id = token_id
|
||||
self.node_expression = node_expression
|
||||
|
||||
def get_code(self, current_pointer, *args, **kwargs):
|
||||
code = get_move_right_index_cells_code(current_pointer, self.node_expression)
|
||||
code += get_copy_from_variable_code(self.ids_map_list, self.token_id, current_pointer + 2)
|
||||
# it is +2 because in "get_move_right_index_cells_code", we moved 2 extra cells to the right, for retrieving the value
|
||||
|
||||
code += "<" # point to res
|
||||
code += "[<<+>>-]" # move res to old "index cell"
|
||||
code += "<" # point to new index cell
|
||||
|
||||
code += get_move_left_index_cell_code()
|
||||
return code
|
||||
|
||||
|
||||
class NodeArraySetElement(NodeArrayElement):
|
||||
"""
|
||||
class for setting element of a one-dimensional array
|
||||
it receives:
|
||||
1. an expression, indicating the required index
|
||||
2. assignment operator (=|+=|-=|*=|/=|%=|<<=|>>=|&=|(|=)|^=)
|
||||
3. an expression, indicating the value to be used for the assignment
|
||||
and returns a code that gets that element
|
||||
"""
|
||||
|
||||
def __init__(self, ids_map_list, token_id, node_expression_index, assign_token, node_expression_value):
|
||||
Node.__init__(self, ids_map_list)
|
||||
self.token_id = token_id
|
||||
self.node_expression_index = node_expression_index
|
||||
|
||||
if assign_token.data == "=":
|
||||
# id[exp] = expression
|
||||
|
||||
self.assign_token = assign_token
|
||||
self.node_expression_value = node_expression_value
|
||||
|
||||
else:
|
||||
# id[exp] += expression
|
||||
assert assign_token.data in ["+=", "-=", "*=", "/=", "%=", "<<=", ">>=", "&=", "|=", "^="]
|
||||
|
||||
self.assign_token = Token(Token.ASSIGN, assign_token.line, assign_token.column, data="=")
|
||||
|
||||
# create a node for id[exp] + expression
|
||||
op_node = self.assign_token_to_op_token(assign_token)
|
||||
op_node.left = NodeArrayGetElement(self.ids_map_list, token_id, node_expression_index)
|
||||
op_node.right = node_expression_value
|
||||
|
||||
self.node_expression_value = op_node
|
||||
|
||||
def get_code(self, current_pointer, *args, **kwargs):
|
||||
# index, steps_taken_counter, value
|
||||
|
||||
code = self.node_expression_index.get_code(current_pointer)
|
||||
code += "[-]" # counter = 0
|
||||
code += ">" # point to value cell
|
||||
code += self.node_expression_value.get_code(current_pointer + 2)
|
||||
code += "<<<" # point to index
|
||||
|
||||
code += "[" # while index != 0
|
||||
code += ">>>" # point to new_value (one after current value)
|
||||
code += "[-]" # zero new_value
|
||||
code += "<" # move to old value
|
||||
code += "[>+<-]" # move old value to new counter
|
||||
code += "<" # point to old counter
|
||||
code += "+" # increase old counter
|
||||
code += "[>+<-]" # move old counter to new counter
|
||||
code += "<" # point to old index
|
||||
code += "-" # decrease old index
|
||||
code += "[>+<-]" # move old index to new index
|
||||
code += ">" # point to new index
|
||||
code += "]" # end while
|
||||
|
||||
code += ">>" # point to value
|
||||
code += get_copy_to_variable_code(self.ids_map_list, self.token_id, current_pointer + 2)
|
||||
# it is +2 because we moved 2 extra cells to the right, for pointing to value
|
||||
|
||||
# layout: 0, idx, value (pointing to value)
|
||||
# create layout: value, idx
|
||||
code += "[<<+>>-]" # move value to old "index" cell (which is now 0)
|
||||
|
||||
# value, index (pointing to one after index)
|
||||
code += "<" # point to index
|
||||
code += "[" # while index != 0
|
||||
code += "<" # point to value
|
||||
code += "[<+>-]" # move value to the left
|
||||
code += ">" # point to index
|
||||
code += "-" # sub 1 from index
|
||||
code += "[<+>-]" # move index to left
|
||||
code += "<" # point to index
|
||||
code += "]" # end while
|
||||
|
||||
# now value is at the desired cell, and we point to the next available cell
|
||||
|
||||
return code
|
||||
|
||||
|
||||
class NodeArrayAssignment(Node):
|
||||
"""
|
||||
Used for array assignment
|
||||
E.g arr = = { 1, 2, 3... }
|
||||
"""
|
||||
|
||||
def __init__(self, ids_map_list, token_id, literal_tokens_list):
|
||||
Node.__init__(self, ids_map_list)
|
||||
self.token_id = token_id
|
||||
self.literal_tokens_list = literal_tokens_list
|
||||
|
||||
def get_code(self, current_pointer, *args, **kwargs):
|
||||
array_dimensions = get_variable_dimensions_from_token(self.ids_map_list, self.token_id)
|
||||
unpacked_literals_list = unpack_literal_tokens_to_array_dimensions(
|
||||
self.token_id, array_dimensions, self.literal_tokens_list
|
||||
)
|
||||
|
||||
offset = get_offset_to_variable(self.ids_map_list, self.token_id, current_pointer)
|
||||
code = "<" * offset # point to first array element
|
||||
for literal in unpacked_literals_list:
|
||||
code += get_literal_token_code(literal) # evaluate this literal and point to next array element
|
||||
code += ">" * (offset - len(unpacked_literals_list)) # move back to the original position
|
||||
code += ">" # point to the next cell
|
||||
return code
|
||||
|
|
@ -1,106 +0,0 @@
|
|||
from .General import get_NUM_token_value
|
||||
from .Token import Token
|
||||
|
||||
"""
|
||||
This file holds functions that optimize code on syntax-level. For example:
|
||||
The tokens corresponding to the code "3*5" will be replaced in-place by a token that represents "15"
|
||||
"""
|
||||
|
||||
|
||||
def optimize_once(tokens):
|
||||
# performs one pass on the tokens and optimizes them in-place if possible
|
||||
# optimization based on a list of rules
|
||||
|
||||
def optimize_binop(tokens, start_index):
|
||||
# optimize arithmetic operations. E.g replace 1+2 with 3
|
||||
|
||||
# need to be careful not to optimize (1+2*3) to (3*3)
|
||||
if (
|
||||
tokens[start_index + 1].data in ["*", "/", "%"]
|
||||
or (start_index + 3 >= len(tokens))
|
||||
or (tokens[start_index + 3].data not in ["*", "/", "%"])
|
||||
):
|
||||
num1, num2 = get_NUM_token_value(tokens[start_index]), get_NUM_token_value(tokens[start_index + 2])
|
||||
op = tokens[start_index + 1].data
|
||||
if op == "+":
|
||||
val = num1 + num2
|
||||
elif op == "-":
|
||||
val = num1 - num2
|
||||
if val < 0: # cannot optimize negative values
|
||||
return False
|
||||
elif op == "*":
|
||||
val = num1 * num2
|
||||
elif op in ["/", "%"]:
|
||||
if num2 == 0:
|
||||
print("WARNING (optimizer) - division by zero at %s" % str(tokens[start_index]))
|
||||
return False
|
||||
if op == "/":
|
||||
val = num1 // num2
|
||||
else:
|
||||
val = num1 % num2
|
||||
else:
|
||||
raise NotImplementedError(op)
|
||||
|
||||
# remove the 3 old tokens and replace them with new one
|
||||
new_token = Token(
|
||||
Token.NUM,
|
||||
tokens[start_index].line,
|
||||
tokens[start_index].column,
|
||||
data=str(val),
|
||||
original_tokens=tokens[start_index : start_index + 3],
|
||||
)
|
||||
|
||||
for _ in range(3):
|
||||
tokens.pop(start_index)
|
||||
tokens.insert(start_index, new_token)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def optimize_printint(tokens, start_index):
|
||||
# replace printint(50) with print("50")
|
||||
# since printing strings compiles into less Brainfuck code than printing ints
|
||||
if tokens[start_index].data == "printint":
|
||||
tokens[start_index] = Token(
|
||||
Token.PRINT, tokens[start_index].line, tokens[start_index].column, original_tokens=[tokens[start_index]]
|
||||
)
|
||||
tokens[start_index + 2] = Token(
|
||||
Token.STRING,
|
||||
tokens[start_index].line,
|
||||
tokens[start_index].column,
|
||||
data=str(tokens[start_index + 2].data),
|
||||
original_tokens=[tokens[start_index + 2]],
|
||||
)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
rules = [
|
||||
([Token.NUM, Token.BINOP, Token.NUM], optimize_binop), # arithmetic operations
|
||||
([Token.ID, Token.LPAREN, Token.NUM, Token.RPAREN], optimize_printint), # printint(50) to print("50")
|
||||
]
|
||||
|
||||
# try to match one of the rules to the tokens in a "sliding window" style
|
||||
i = 0
|
||||
while i < len(tokens):
|
||||
optimized = False
|
||||
for tokens_sequence, optimization_function in rules:
|
||||
if i + len(tokens_sequence) <= len(tokens):
|
||||
if all(tokens_sequence[n] == tokens[i + n].type for n in range(len(tokens_sequence))):
|
||||
if optimization_function(tokens, i):
|
||||
optimized = True
|
||||
if optimized:
|
||||
continue # don't increment i, try to optimize the same location again
|
||||
i += 1
|
||||
|
||||
|
||||
def optimize(tokens):
|
||||
# optimize tokens again and again until there is nothing left to optimize
|
||||
prev_tokens = [token.type for token in tokens]
|
||||
while True:
|
||||
optimize_once(tokens)
|
||||
print(".", end="")
|
||||
current_tokens = [token.type for token in tokens]
|
||||
if current_tokens == prev_tokens:
|
||||
break
|
||||
prev_tokens = current_tokens
|
||||
|
|
@ -1,133 +0,0 @@
|
|||
from .Exceptions import BFSemanticError, BFSyntaxError
|
||||
from .General import is_token_literal
|
||||
from .Token import Token
|
||||
|
||||
|
||||
class Parser:
|
||||
"""
|
||||
Used to easily iterate tokens
|
||||
"""
|
||||
|
||||
def __init__(self, tokens):
|
||||
self.tokens = tokens
|
||||
self.current_token_index = 0
|
||||
|
||||
# parsing tokens
|
||||
def current_token(self):
|
||||
if self.current_token_index >= len(self.tokens):
|
||||
return None
|
||||
else:
|
||||
return self.token_at_index(self.current_token_index)
|
||||
|
||||
def advance_token(self, amount=1):
|
||||
self.current_token_index += amount
|
||||
|
||||
def advance_to_token_at_index(self, token_index):
|
||||
self.current_token_index = token_index
|
||||
|
||||
def token_at_index(self, index):
|
||||
assert index < len(self.tokens)
|
||||
return self.tokens[index]
|
||||
|
||||
def next_token(self, next_amount=1):
|
||||
return self.token_at_index(self.current_token_index + next_amount)
|
||||
|
||||
def find_matching(self, starting_index=None):
|
||||
"""
|
||||
:return: the index of the token that matches the current token
|
||||
:param starting_index (optional) - the index of the token we want to match
|
||||
|
||||
for example, if current token is {
|
||||
it returns the index of the matching }
|
||||
"""
|
||||
if starting_index is None:
|
||||
starting_index = self.current_token_index
|
||||
|
||||
tokens = self.tokens
|
||||
token_to_match = tokens[starting_index]
|
||||
if token_to_match.type == Token.LBRACE:
|
||||
inc = Token.LBRACE
|
||||
dec = Token.RBRACE
|
||||
elif token_to_match.type == Token.LBRACK:
|
||||
inc = Token.LBRACK
|
||||
dec = Token.RBRACK
|
||||
elif token_to_match.type == Token.LPAREN:
|
||||
inc = Token.LPAREN
|
||||
dec = Token.RPAREN
|
||||
else:
|
||||
raise BFSemanticError("No support for matching %s" % str(token_to_match))
|
||||
|
||||
i = starting_index
|
||||
cnt = 0
|
||||
while i < len(tokens):
|
||||
if tokens[i].type == inc:
|
||||
cnt += 1
|
||||
elif tokens[i].type == dec:
|
||||
cnt -= 1
|
||||
|
||||
if cnt == 0:
|
||||
return i
|
||||
|
||||
i += 1
|
||||
|
||||
raise BFSyntaxError("Did not find matching %s for %s" % (dec, str(token_to_match)))
|
||||
|
||||
def check_next_tokens_are(self, tokens_list, starting_index=None):
|
||||
if starting_index is None:
|
||||
starting_index = self.current_token_index
|
||||
|
||||
# used for "assertion" and print a nice message to the user
|
||||
if starting_index + len(tokens_list) >= len(self.tokens):
|
||||
raise BFSyntaxError("Expected %s after %s" % (str(tokens_list), str(self.tokens[starting_index])))
|
||||
for i in range(0, len(tokens_list)):
|
||||
if self.tokens[starting_index + 1 + i].type != tokens_list[i]:
|
||||
raise BFSyntaxError(
|
||||
"Expected %s after %s"
|
||||
% (str(tokens_list[i]), [str(t) for t in self.tokens[starting_index : starting_index + 1 + i]])
|
||||
)
|
||||
|
||||
def check_next_token_is(self, token, starting_index=None):
|
||||
self.check_next_tokens_are([token], starting_index=starting_index)
|
||||
|
||||
def check_current_tokens_are(self, tokens_list):
|
||||
self.check_next_tokens_are(tokens_list, starting_index=self.current_token_index - 1)
|
||||
|
||||
def check_current_token_is(self, token):
|
||||
self.check_current_tokens_are([token])
|
||||
|
||||
def compile_array_initialization_list(self):
|
||||
# {1, 2, 3, ...} or {array_initialization_list, array_initialization_list, array_initialization_list, ...} or string
|
||||
# parses the definition and returns a list (of list of list ....) of literal tokens (NUM, CHAR, TRUE, FALSE)
|
||||
|
||||
list_tokens = []
|
||||
|
||||
if self.current_token().type == Token.STRING:
|
||||
string_token = self.current_token()
|
||||
line, column = string_token.line, string_token.column
|
||||
for char in string_token.data:
|
||||
list_tokens.append(Token(Token.NUM, line, column, str(ord(char))))
|
||||
|
||||
self.advance_token() # point to after STRING
|
||||
return list_tokens
|
||||
|
||||
assert self.current_token().type == Token.LBRACE
|
||||
self.advance_token() # skip to after LBRACE
|
||||
|
||||
while is_token_literal(self.current_token()) or self.current_token().type == Token.LBRACE:
|
||||
if self.current_token().type == Token.LBRACE: # list of (literals | list)
|
||||
list_tokens.append(self.compile_array_initialization_list())
|
||||
else: # literal
|
||||
list_tokens.append(self.current_token())
|
||||
self.advance_token() # skip literal
|
||||
|
||||
if self.current_token().type not in [Token.COMMA, Token.RBRACE]:
|
||||
raise BFSyntaxError("Unexpected %s (expected comma (,) or RBRACE (}))" % self.current_token())
|
||||
|
||||
if self.current_token().type == Token.COMMA:
|
||||
self.advance_token() # skip comma
|
||||
if self.current_token().type == Token.RBRACE:
|
||||
break
|
||||
|
||||
self.check_current_token_is(Token.RBRACE)
|
||||
self.advance_token() # skip RBRACE
|
||||
return list_tokens
|
||||
|
|
@ -1,70 +0,0 @@
|
|||
class Token:
|
||||
|
||||
INT = "INT"
|
||||
VOID = "VOID"
|
||||
TRUE = "TRUE"
|
||||
FALSE = "FALSE"
|
||||
AND = "AND"
|
||||
OR = "OR"
|
||||
NOT = "NOT"
|
||||
RETURN = "RETURN"
|
||||
IF = "IF"
|
||||
ELSE = "ELSE"
|
||||
WHILE = "WHILE"
|
||||
FOR = "FOR"
|
||||
DO = "DO"
|
||||
BREAK = "BREAK"
|
||||
CONTINUE = "CONTINUE"
|
||||
SWITCH = "SWITCH"
|
||||
CASE = "CASE"
|
||||
DEFAULT = "DEFAULT"
|
||||
COLON = "COLON"
|
||||
SEMICOLON = "SEMICOLON"
|
||||
COMMA = "COMMA"
|
||||
|
||||
LPAREN = "LPAREN"
|
||||
RPAREN = "RPAREN"
|
||||
LBRACE = "LBRACE"
|
||||
RBRACE = "RBRACE"
|
||||
LBRACK = "LBRACK"
|
||||
RBRACK = "RBRACK"
|
||||
|
||||
ASSIGN = "ASSIGN"
|
||||
TERNARY = "TERNARY"
|
||||
RELOP = "RELOP"
|
||||
BINOP = "BINOP"
|
||||
INCREMENT = "INCREMENT"
|
||||
DECREMENT = "DECREMENT"
|
||||
UNARY_MULTIPLICATIVE = "UNARY_MULTIPLICATIVE"
|
||||
|
||||
BITWISE_SHIFT = "BITWISE_SHIFT"
|
||||
BITWISE_NOT = "BITWISE_NOT"
|
||||
BITWISE_AND = "BITWISE_AND"
|
||||
BITWISE_OR = "BITWISE_OR"
|
||||
BITWISE_XOR = "BITWISE_XOR"
|
||||
|
||||
WHITESPACE = "WHITESPACE"
|
||||
ID = "ID"
|
||||
NUM = "NUM"
|
||||
STRING = "STRING"
|
||||
CHAR = "CHAR"
|
||||
|
||||
PRINT = "PRINT"
|
||||
COMMENT = "COMMENT"
|
||||
UNIDENTIFIED = "UNIDENTIFIED"
|
||||
|
||||
def __init__(self, type, line, column, data=None, original_tokens=None):
|
||||
self.type = type
|
||||
self.line = line
|
||||
self.column = column
|
||||
self.data = data
|
||||
self.original_tokens = original_tokens
|
||||
|
||||
def __str__(self):
|
||||
result = self.type
|
||||
if self.data:
|
||||
result += " " + self.data
|
||||
result += " (line %s column %s)" % (self.line, self.column)
|
||||
if self.original_tokens:
|
||||
result += " (original tokens: " + ", ".join([str(t) for t in self.original_tokens]) + ")"
|
||||
return result
|
||||
Loading…
Add table
Add a link
Reference in a new issue