This commit is contained in:
Andreas Koepf 2025-01-30 22:55:04 +01:00
parent 21c47db6c1
commit ebb88e6c6a
24 changed files with 1215 additions and 814 deletions

1143
GALLERY.md

File diff suppressed because it is too large Load diff

View file

@ -14,8 +14,8 @@ requires-python = ">=3.11"
dependencies = [ dependencies = [
"bfi==1.0.4", "bfi==1.0.4",
"cellpylib==2.4.0", "cellpylib==2.4.0",
"sympy>=1.13.1", "sympy>=1.13.1",
"magiccube==0.3.0", "magiccube==0.3.0",
"pyfiglet==1.0.2" "pyfiglet==1.0.2"
] ]
classifiers = [ classifiers = [

View file

@ -7,7 +7,4 @@ Cognition tasks for training reasoning capabilities:
from .bf import BFConfig, BFDataset from .bf import BFConfig, BFDataset
__all__ = [ __all__ = ["BFConfig", "BFDataset"]
"BFConfig",
"BFDataset"
]

View file

@ -3,10 +3,10 @@ from random import Random
from typing import Dict, Optional from typing import Dict, Optional
import bfi import bfi
from .contrib.bfit.Compiler import Compiler, Minify
from ..data.wordle_words import wordle_words from ..data.wordle_words import wordle_words
from ..factory import ProceduralDataset, register_dataset from ..factory import ProceduralDataset, register_dataset
from .contrib.bfit.Compiler import Compiler, Minify
@dataclass @dataclass
@ -122,10 +122,11 @@ int main() {{
if answer == None: if answer == None:
return 0.0 return 0.0
if answer != entry['answer']: if answer != entry["answer"]:
return 0.01 return 0.01
else: else:
return 1.0 # Yay return 1.0 # Yay
# Register the dataset # Register the dataset
register_dataset("bf", BFDataset, BFConfig) register_dataset("bf", BFDataset, BFConfig)

View file

@ -2,9 +2,9 @@
import argparse import argparse
import os import os
import Interpreter import Interpreter
from Compiler import Compiler from Compiler import Compiler, Minify
from Compiler import Minify
def process_args(): def process_args():
@ -54,5 +54,5 @@ def compile_file():
Interpreter.brainfuck(brainfuck_code) Interpreter.brainfuck(brainfuck_code)
if __name__ == '__main__': if __name__ == "__main__":
compile_file() compile_file()

View file

@ -1,12 +1,18 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from .Exceptions import BFSyntaxError, BFSemanticError from .Exceptions import BFSemanticError, BFSyntaxError
from .FunctionCompiler import FunctionCompiler from .FunctionCompiler import FunctionCompiler
from .Functions import check_function_exists, get_function_object, insert_function_object from .Functions import check_function_exists, get_function_object, insert_function_object
from .General import is_token_literal, get_literal_token_code, unpack_literal_tokens_to_array_dimensions from .General import get_literal_token_code, is_token_literal, unpack_literal_tokens_to_array_dimensions
from .Globals import get_global_variables_size, get_variable_size, get_variable_dimensions, insert_global_variable, create_variable_from_definition from .Globals import (
create_variable_from_definition,
get_global_variables_size,
get_variable_dimensions,
get_variable_size,
insert_global_variable,
)
from .Lexical_analyzer import analyze from .Lexical_analyzer import analyze
from .Optimizer import optimize
from .LibraryFunctionCompiler import insert_library_functions from .LibraryFunctionCompiler import insert_library_functions
from .Optimizer import optimize
from .Parser import Parser from .Parser import Parser
from .Token import Token from .Token import Token
@ -29,20 +35,24 @@ class Compiler:
# returns function named tuple # returns function named tuple
if self.parser.current_token().type not in [Token.VOID, Token.INT]: if self.parser.current_token().type not in [Token.VOID, Token.INT]:
raise BFSemanticError("Function return type can be either void or int, not '%s'" % str(self.parser.current_token())) raise BFSemanticError(
"Function return type can be either void or int, not '%s'" % str(self.parser.current_token())
)
self.parser.check_next_tokens_are([Token.ID, Token.LPAREN]) self.parser.check_next_tokens_are([Token.ID, Token.LPAREN])
# save all tokens of this function # save all tokens of this function
function_name = self.parser.next_token(next_amount=1).data function_name = self.parser.next_token(next_amount=1).data
RPAREN_index = self.parser.find_matching(starting_index=self.parser.current_token_index+2) # first find RPAREN RPAREN_index = self.parser.find_matching(
starting_index=self.parser.current_token_index + 2
) # first find RPAREN
self.parser.check_next_token_is(Token.LBRACE, starting_index=RPAREN_index) self.parser.check_next_token_is(Token.LBRACE, starting_index=RPAREN_index)
RBRACE_index = self.parser.find_matching(starting_index=RPAREN_index+1) # then find RBRACE RBRACE_index = self.parser.find_matching(starting_index=RPAREN_index + 1) # then find RBRACE
# take all tokens between INT and RBRACE and pass them to function object # take all tokens between INT and RBRACE and pass them to function object
function_tokens = self.parser.tokens[self.parser.current_token_index:RBRACE_index+1] function_tokens = self.parser.tokens[self.parser.current_token_index : RBRACE_index + 1]
# skip function definition # skip function definition
self.parser.advance_to_token_at_index(RBRACE_index+1) self.parser.advance_to_token_at_index(RBRACE_index + 1)
function = FunctionCompiler(function_name, function_tokens) function = FunctionCompiler(function_name, function_tokens)
return function return function
@ -60,12 +70,12 @@ class Compiler:
# if this is set to True, then the compiler zeros each cell before using it (may generate a lot of unnecessary BF code) # if this is set to True, then the compiler zeros each cell before using it (may generate a lot of unnecessary BF code)
ZERO_CELLS_BEFORE_USE = False ZERO_CELLS_BEFORE_USE = False
code = '[-]' if ZERO_CELLS_BEFORE_USE else '' code = "[-]" if ZERO_CELLS_BEFORE_USE else ""
if get_variable_size(variable) > 1: # its an array if get_variable_size(variable) > 1: # its an array
if self.parser.current_token().type == Token.SEMICOLON: if self.parser.current_token().type == Token.SEMICOLON:
# array definition - INT ID (LBRACK NUM RBRACK)+ SEMICOLON # array definition - INT ID (LBRACK NUM RBRACK)+ SEMICOLON
self.parser.advance_token() # skip SEMICOLON self.parser.advance_token() # skip SEMICOLON
code = (code + '>') * get_variable_size(variable) # advance to after this variable code = (code + ">") * get_variable_size(variable) # advance to after this variable
return code return code
elif self.parser.current_token().type == Token.ASSIGN and self.parser.current_token().data == "=": elif self.parser.current_token().type == Token.ASSIGN and self.parser.current_token().data == "=":
# array definition and initialization - INT ID (LBRACK NUM RBRACK)+ ASSIGN ((LBRACE ... RBRACE)+|STRING) SEMICOLON # array definition and initialization - INT ID (LBRACK NUM RBRACK)+ ASSIGN ((LBRACE ... RBRACE)+|STRING) SEMICOLON
@ -79,25 +89,34 @@ class Compiler:
self.parser.advance_token() # skip SEMICOLON self.parser.advance_token() # skip SEMICOLON
array_dimensions = get_variable_dimensions(variable) array_dimensions = get_variable_dimensions(variable)
unpacked_literals_list = unpack_literal_tokens_to_array_dimensions(ID_token, array_dimensions, literal_tokens_list) unpacked_literals_list = unpack_literal_tokens_to_array_dimensions(
ID_token, array_dimensions, literal_tokens_list
)
for literal in unpacked_literals_list: for literal in unpacked_literals_list:
code += get_literal_token_code(literal) # evaluate this literal and point to next array element code += get_literal_token_code(literal) # evaluate this literal and point to next array element
return code return code
else: else:
raise BFSyntaxError("Unexpected %s in array definition. Expected SEMICOLON (;) or ASSIGN (=)" % self.parser.current_token()) raise BFSyntaxError(
"Unexpected %s in array definition. Expected SEMICOLON (;) or ASSIGN (=)"
% self.parser.current_token()
)
elif self.parser.current_token().type == Token.SEMICOLON: # no need to initialize elif self.parser.current_token().type == Token.SEMICOLON: # no need to initialize
self.parser.advance_token() # skip SEMICOLON self.parser.advance_token() # skip SEMICOLON
code += '>' # advance to after this variable code += ">" # advance to after this variable
else: else:
self.parser.check_current_token_is(Token.ASSIGN) self.parser.check_current_token_is(Token.ASSIGN)
if self.parser.current_token().data != "=": if self.parser.current_token().data != "=":
raise BFSyntaxError("Unexpected %s when initializing global variable. Expected ASSIGN (=)" % self.parser.current_token()) raise BFSyntaxError(
"Unexpected %s when initializing global variable. Expected ASSIGN (=)" % self.parser.current_token()
)
self.parser.advance_token() # skip ASSIGN self.parser.advance_token() # skip ASSIGN
if not is_token_literal(self.parser.current_token()): if not is_token_literal(self.parser.current_token()):
raise BFSemanticError("Unexpected '%s'. expected literal (NUM | CHAR | TRUE | FALSE )" % str(self.parser.current_token())) raise BFSemanticError(
"Unexpected '%s'. expected literal (NUM | CHAR | TRUE | FALSE )" % str(self.parser.current_token())
)
code += get_literal_token_code(self.parser.current_token()) code += get_literal_token_code(self.parser.current_token())
@ -113,7 +132,7 @@ class Compiler:
When encountering global variable definition - create Variable object When encountering global variable definition - create Variable object
Returns code that initializes global variables and advances the pointer to after them Returns code that initializes global variables and advances the pointer to after them
""" """
code = '' code = ""
token = self.parser.current_token() token = self.parser.current_token()
while token is not None and token.type in [Token.VOID, Token.INT, Token.SEMICOLON]: while token is not None and token.type in [Token.VOID, Token.INT, Token.SEMICOLON]:
if token.type == Token.SEMICOLON: # can have random semicolons ;) if token.type == Token.SEMICOLON: # can have random semicolons ;)
@ -125,22 +144,31 @@ class Compiler:
if self.parser.next_token(next_amount=2).type == Token.LPAREN: if self.parser.next_token(next_amount=2).type == Token.LPAREN:
function = self.create_function_object() function = self.create_function_object()
insert_function_object(function) insert_function_object(function)
elif token.type is Token.INT and self.parser.next_token(next_amount=2).type in [Token.SEMICOLON, Token.ASSIGN, Token.LBRACK]: elif token.type is Token.INT and self.parser.next_token(next_amount=2).type in [
Token.SEMICOLON,
Token.ASSIGN,
Token.LBRACK,
]:
code += self.compile_global_variable_definition() code += self.compile_global_variable_definition()
else: else:
raise BFSyntaxError("Unexpected '%s' after '%s'. Expected '(' (function definition) or one of: '=', ';', '[' (global variable definition)" % (str(self.parser.next_token(next_amount=2)), str(self.parser.next_token()))) raise BFSyntaxError(
"Unexpected '%s' after '%s'. Expected '(' (function definition) or one of: '=', ';', '[' (global variable definition)"
% (str(self.parser.next_token(next_amount=2)), str(self.parser.next_token()))
)
token = self.parser.current_token() token = self.parser.current_token()
if self.parser.current_token() is not None: # we have not reached the last token if self.parser.current_token() is not None: # we have not reached the last token
untouched_tokens = [str(t) for t in self.parser.tokens[self.parser.current_token_index:]] untouched_tokens = [str(t) for t in self.parser.tokens[self.parser.current_token_index :]]
raise BFSyntaxError("Did not reach the end of the code. Untouched tokens:\n%s" % untouched_tokens) raise BFSyntaxError("Did not reach the end of the code. Untouched tokens:\n%s" % untouched_tokens)
return code return code
def compile(self): def compile(self):
insert_library_functions() insert_library_functions()
code = self.process_global_definitions() # code that initializes global variables and advances pointer to after them code = (
self.process_global_definitions()
) # code that initializes global variables and advances pointer to after them
check_function_exists(Token(Token.ID, 0, 0, "main"), 0) check_function_exists(Token(Token.ID, 0, 0, "main"), 0)
code += get_function_object("main").get_code(get_global_variables_size()) code += get_function_object("main").get_code(get_global_variables_size())
@ -159,7 +187,7 @@ def compile(code, optimize_code=False):
return brainfuck_code return brainfuck_code
if __name__ == '__main__': if __name__ == "__main__":
print("This file cannot be directly run") print("This file cannot be directly run")
print("Please import it and use the 'compile' function") print("Please import it and use the 'compile' function")
print("Which receives a C-like code (string) and returns Brainfuck code (string)") print("Which receives a C-like code (string) and returns Brainfuck code (string)")

View file

@ -1,11 +1,28 @@
from collections import namedtuple from collections import namedtuple
from functools import reduce from functools import reduce
from .Exceptions import BFSyntaxError, BFSemanticError
from .Exceptions import BFSemanticError, BFSyntaxError
from .Functions import check_function_exists, get_function_object from .Functions import check_function_exists, get_function_object
from .General import get_variable_dimensions_from_token, get_move_to_return_value_cell_code, get_print_string_code, get_variable_from_ID_token from .General import (
from .General import get_literal_token_value, process_switch_cases, is_token_literal get_literal_token_value,
get_move_to_return_value_cell_code,
get_print_string_code,
get_variable_dimensions_from_token,
get_variable_from_ID_token,
is_token_literal,
process_switch_cases,
)
from .Globals import create_variable_from_definition, get_global_variables, get_variable_size, is_variable_array from .Globals import create_variable_from_definition, get_global_variables, get_variable_size, is_variable_array
from .Node import NodeToken, NodeTernary, NodeArraySetElement, NodeUnaryPrefix, NodeUnaryPostfix, NodeArrayGetElement, NodeFunctionCall, NodeArrayAssignment from .Node import (
NodeArrayAssignment,
NodeArrayGetElement,
NodeArraySetElement,
NodeFunctionCall,
NodeTernary,
NodeToken,
NodeUnaryPostfix,
NodeUnaryPrefix,
)
from .Parser import Parser from .Parser import Parser
from .Token import Token from .Token import Token
@ -83,7 +100,9 @@ class FunctionCompiler:
# new stack pointer should be at least that size # new stack pointer should be at least that size
assert self.current_stack_pointer() <= current_stack_pointer assert self.current_stack_pointer() <= current_stack_pointer
self.return_value_cell = current_stack_pointer self.return_value_cell = current_stack_pointer
self.set_stack_pointer(current_stack_pointer+1) # make room for return_value cell. next available cell is the next one after it. self.set_stack_pointer(
current_stack_pointer + 1
) # make room for return_value cell. next available cell is the next one after it.
function_code = self.compile_function_scope(self.parameters) function_code = self.compile_function_scope(self.parameters)
self.remove_ids_map() # Global variables self.remove_ids_map() # Global variables
return function_code return function_code
@ -123,8 +142,12 @@ class FunctionCompiler:
# multiply by next dimensions sizes # multiply by next dimensions sizes
multiply_amount = reduce(lambda x, y: x * y, dimensions[1:]) # size of the following dimensions multiply_amount = reduce(lambda x, y: x * y, dimensions[1:]) # size of the following dimensions
node_token_multiply_amount = NodeToken(self.ids_map_list, token=Token(Token.NUM, ID_token.line, ID_token.column, data=str(multiply_amount))) node_token_multiply_amount = NodeToken(
index_expression = NodeToken(self.ids_map_list, token=multiply_token, left=first_index_expression, right=node_token_multiply_amount) self.ids_map_list, token=Token(Token.NUM, ID_token.line, ID_token.column, data=str(multiply_amount))
)
index_expression = NodeToken(
self.ids_map_list, token=multiply_token, left=first_index_expression, right=node_token_multiply_amount
)
# handle next dimensions # handle next dimensions
dimension = 1 dimension = 1
@ -132,8 +155,10 @@ class FunctionCompiler:
if self.parser.current_token().type != Token.LBRACK: # too few indexes given... if self.parser.current_token().type != Token.LBRACK: # too few indexes given...
if dimension == 1: if dimension == 1:
return first_index_expression # allow use of only one dimension for multi-dimensional array return first_index_expression # allow use of only one dimension for multi-dimensional array
raise BFSemanticError("%s is a %s-dimensional array, but only %s dimension(s) given as index" % raise BFSemanticError(
(str(ID_token), len(dimensions), dimension)) "%s is a %s-dimensional array, but only %s dimension(s) given as index"
% (str(ID_token), len(dimensions), dimension)
)
self.parser.check_current_token_is(Token.LBRACK) self.parser.check_current_token_is(Token.LBRACK)
self.parser.advance_token() # skip LBRACK self.parser.advance_token() # skip LBRACK
exp = self.expression() exp = self.expression()
@ -143,19 +168,30 @@ class FunctionCompiler:
# current_dimension_index *= size_of_following_dimensions # current_dimension_index *= size_of_following_dimensions
if dimension + 1 < len(dimensions): # not last dimension - need to multiply and add if dimension + 1 < len(dimensions): # not last dimension - need to multiply and add
multiply_amount = reduce(lambda x, y: x * y, dimensions[dimension + 1:]) # size of the following dimensions multiply_amount = reduce(
node_token_multiply_amount = NodeToken(self.ids_map_list, token=Token(Token.NUM, ID_token.line, ID_token.column, data=str(multiply_amount))) lambda x, y: x * y, dimensions[dimension + 1 :]
multiply_node = NodeToken(self.ids_map_list, token=multiply_token, left=exp, right=node_token_multiply_amount) ) # size of the following dimensions
node_token_multiply_amount = NodeToken(
self.ids_map_list,
token=Token(Token.NUM, ID_token.line, ID_token.column, data=str(multiply_amount)),
)
multiply_node = NodeToken(
self.ids_map_list, token=multiply_token, left=exp, right=node_token_multiply_amount
)
# prev_dimensions_index += current_dimension_index # prev_dimensions_index += current_dimension_index
index_expression = NodeToken(self.ids_map_list, token=add_token, left=index_expression, right=multiply_node) index_expression = NodeToken(
self.ids_map_list, token=add_token, left=index_expression, right=multiply_node
)
else: # last dimension - no need to multiply, just add else: # last dimension - no need to multiply, just add
index_expression = NodeToken(self.ids_map_list, token=add_token, left=index_expression, right=exp) index_expression = NodeToken(self.ids_map_list, token=add_token, left=index_expression, right=exp)
dimension += 1 dimension += 1
if self.parser.current_token().type == Token.LBRACK: # too many indexes given... if self.parser.current_token().type == Token.LBRACK: # too many indexes given...
raise BFSemanticError("%s is a %s-dimensional array. Unexpected %s" % raise BFSemanticError(
(str(ID_token), len(dimensions), self.parser.current_token())) "%s is a %s-dimensional array. Unexpected %s"
% (str(ID_token), len(dimensions), self.parser.current_token())
)
return index_expression return index_expression
def get_token_after_array_access(self, offset=0): def get_token_after_array_access(self, offset=0):
@ -193,12 +229,18 @@ class FunctionCompiler:
if self.parser.next_token().type == Token.SEMICOLON: # INT ID SEMICOLON if self.parser.next_token().type == Token.SEMICOLON: # INT ID SEMICOLON
self.parser.advance_token(2) # skip ID SEMICOLON self.parser.advance_token(2) # skip ID SEMICOLON
return '' # no code is generated here. code was generated for defining this variable when we entered the scope return (
"" # no code is generated here. code was generated for defining this variable when we entered the scope
)
elif self.parser.next_token().type == Token.ASSIGN and self.parser.next_token().data == "=": # INT ID = EXPRESSION SEMICOLON elif (
self.parser.next_token().type == Token.ASSIGN and self.parser.next_token().data == "="
): # INT ID = EXPRESSION SEMICOLON
return self.compile_expression_as_statement() # compile_expression_as_statement skips the SEMICOLON return self.compile_expression_as_statement() # compile_expression_as_statement skips the SEMICOLON
elif self.parser.next_token().type == Token.LBRACK: # INT ID (LBRACK NUM RBRACK)+ (= ARRAY_INITIALIZATION)? SEMICOLON elif (
self.parser.next_token().type == Token.LBRACK
): # INT ID (LBRACK NUM RBRACK)+ (= ARRAY_INITIALIZATION)? SEMICOLON
# array definition (int arr[2][3]...[];) or array definition and initialization (arr[2][3]...[] = {...};) # array definition (int arr[2][3]...[];) or array definition and initialization (arr[2][3]...[] = {...};)
token_id = self.parser.current_token() token_id = self.parser.current_token()
self.parser.advance_token() # skip ID self.parser.advance_token() # skip ID
@ -210,7 +252,7 @@ class FunctionCompiler:
initialization_node = self.compile_array_assignment(token_id) initialization_node = self.compile_array_assignment(token_id)
code = initialization_node.get_code(self.current_stack_pointer()) + "<" # discard expression value code = initialization_node.get_code(self.current_stack_pointer()) + "<" # discard expression value
else: else:
code = '' # just array definition code = "" # just array definition
# no code is generated here. code was generated for defining this variable when we entered the scope # no code is generated here. code was generated for defining this variable when we entered the scope
self.parser.check_current_token_is(Token.SEMICOLON) self.parser.check_current_token_is(Token.SEMICOLON)
self.parser.advance_token() # skip SEMICOLON self.parser.advance_token() # skip SEMICOLON
@ -297,7 +339,9 @@ class FunctionCompiler:
token = self.tokens[i] token = self.tokens[i]
if token.type == Token.INT: if token.type == Token.INT:
if self.tokens[i-2].type != Token.FOR: # if it is not a definition inside a FOR statement (for (int i = 0...)) if (
self.tokens[i - 2].type != Token.FOR
): # if it is not a definition inside a FOR statement (for (int i = 0...))
variable = create_variable_from_definition(self.parser, index=i) variable = create_variable_from_definition(self.parser, index=i)
self.insert_to_ids_map(variable) self.insert_to_ids_map(variable)
@ -333,7 +377,7 @@ class FunctionCompiler:
for parameter in parameters: for parameter in parameters:
self.insert_to_ids_map(parameter) self.insert_to_ids_map(parameter)
code = '>' # skip return_value_cell code = ">" # skip return_value_cell
code += self.insert_scope_variables_into_ids_map() code += self.insert_scope_variables_into_ids_map()
# this inserts scope variables AND moves pointer right, with the amount of BOTH parameters and scope variables # this inserts scope variables AND moves pointer right, with the amount of BOTH parameters and scope variables
@ -377,7 +421,9 @@ class FunctionCompiler:
if token.type == Token.ID and self.parser.next_token().type == Token.LPAREN: if token.type == Token.ID and self.parser.next_token().type == Token.LPAREN:
return self.function_call() return self.function_call()
if token.type == Token.ID and self.parser.next_token().type == Token.LBRACK: # array - ID(LBRACK expression RBRACK)+ if (
token.type == Token.ID and self.parser.next_token().type == Token.LBRACK
): # array - ID(LBRACK expression RBRACK)+
index_expression = self.get_array_index_expression() index_expression = self.get_array_index_expression()
return NodeArrayGetElement(self.ids_map_list, token, index_expression) return NodeArrayGetElement(self.ids_map_list, token, index_expression)
@ -386,7 +432,10 @@ class FunctionCompiler:
return NodeToken(self.ids_map_list, token=token) return NodeToken(self.ids_map_list, token=token)
if token.type != Token.LPAREN: if token.type != Token.LPAREN:
raise BFSyntaxError("Unexpected '%s'. expected literal (NUM | ID | ID(LBRACK expression RBRACK)+ | TRUE | FALSE | function_call | ( expression ))" % str(token)) raise BFSyntaxError(
"Unexpected '%s'. expected literal (NUM | ID | ID(LBRACK expression RBRACK)+ | TRUE | FALSE | function_call | ( expression ))"
% str(token)
)
# ( expression ) # ( expression )
self.parser.check_current_token_is(Token.LPAREN) self.parser.check_current_token_is(Token.LPAREN)
@ -417,7 +466,9 @@ class FunctionCompiler:
if token.type in [Token.NOT, Token.BITWISE_NOT, Token.BINOP]: if token.type in [Token.NOT, Token.BITWISE_NOT, Token.BINOP]:
if token.type == Token.BINOP and token.data not in ["+", "-"]: if token.type == Token.BINOP and token.data not in ["+", "-"]:
raise BFSyntaxError("Expected either + or - as unary prefix instead of token %s" % self.parser.current_token()) raise BFSyntaxError(
"Expected either + or - as unary prefix instead of token %s" % self.parser.current_token()
)
self.parser.advance_token() self.parser.advance_token()
unary_prefix = self.unary_prefix() unary_prefix = self.unary_prefix()
@ -618,11 +669,19 @@ class FunctionCompiler:
expression_node = self.expression() expression_node = self.expression()
new_node = NodeToken(self.ids_map_list, left=NodeToken(self.ids_map_list, token=id_token), token=assign_token, right=expression_node) new_node = NodeToken(
self.ids_map_list,
left=NodeToken(self.ids_map_list, token=id_token),
token=assign_token,
right=expression_node,
)
return new_node return new_node
elif self.parser.current_token().type == Token.ID and self.parser.next_token().type == Token.LBRACK and \ elif (
self.get_token_after_array_access().type == Token.ASSIGN: self.parser.current_token().type == Token.ID
and self.parser.next_token().type == Token.LBRACK
and self.get_token_after_array_access().type == Token.ASSIGN
):
# ID (LBRACK expression RBRACK)+ ASSIGN value_expression # ID (LBRACK expression RBRACK)+ ASSIGN value_expression
id_token = self.parser.current_token() id_token = self.parser.current_token()
index_expression = self.get_array_index_expression() index_expression = self.get_array_index_expression()
@ -744,7 +803,7 @@ class FunctionCompiler:
if self.parser.current_token().type == Token.SEMICOLON: if self.parser.current_token().type == Token.SEMICOLON:
# return; # return;
self.parser.advance_token() # skip ; self.parser.advance_token() # skip ;
return '' # nothing to do return "" # nothing to do
# return exp; # return exp;
expression_code = self.compile_expression() expression_code = self.compile_expression()
@ -763,7 +822,12 @@ class FunctionCompiler:
# this expression can be used as a statement. # this expression can be used as a statement.
# e.g: x+=5; or x++ or ++x; # e.g: x+=5; or x++ or ++x;
assert self.parser.current_token().type in [Token.ID, Token.INCREMENT, Token.DECREMENT, Token.UNARY_MULTIPLICATIVE] assert self.parser.current_token().type in [
Token.ID,
Token.INCREMENT,
Token.DECREMENT,
Token.UNARY_MULTIPLICATIVE,
]
code = self.compile_expression() code = self.compile_expression()
self.parser.check_current_token_is(Token.SEMICOLON) self.parser.check_current_token_is(Token.SEMICOLON)
@ -901,7 +965,10 @@ class FunctionCompiler:
self.increase_stack_pointer() # use 1 additional temp cell for indicating we need to execute a case self.increase_stack_pointer() # use 1 additional temp cell for indicating we need to execute a case
cases = list() # list of tuples: (value/"default" (int or string), case_code (string), has_break(bool)) cases = list() # list of tuples: (value/"default" (int or string), case_code (string), has_break(bool))
while self.parser.current_token().type in [Token.CASE, Token.DEFAULT]: # (default | CASE literal) COLON statement* break;? statements* while self.parser.current_token().type in [
Token.CASE,
Token.DEFAULT,
]: # (default | CASE literal) COLON statement* break;? statements*
if self.parser.current_token().type == Token.CASE: if self.parser.current_token().type == Token.CASE:
self.parser.advance_token() # skip CASE self.parser.advance_token() # skip CASE
constant_value_token = self.parser.current_token() constant_value_token = self.parser.current_token()
@ -922,7 +989,9 @@ class FunctionCompiler:
inner_case_code = "" inner_case_code = ""
while self.parser.current_token().type not in [Token.CASE, Token.DEFAULT, Token.RBRACE, Token.BREAK]: while self.parser.current_token().type not in [Token.CASE, Token.DEFAULT, Token.RBRACE, Token.BREAK]:
inner_case_code += self.compile_statement(allow_declaration=False) # not allowed to declare variables directly inside case inner_case_code += self.compile_statement(
allow_declaration=False
) # not allowed to declare variables directly inside case
has_break = False has_break = False
if self.parser.current_token().type == Token.BREAK: # ignore all statements after break if self.parser.current_token().type == Token.BREAK: # ignore all statements after break
@ -934,7 +1003,9 @@ class FunctionCompiler:
cases.append((value, inner_case_code, has_break)) cases.append((value, inner_case_code, has_break))
if self.parser.current_token().type not in [Token.CASE, Token.DEFAULT, Token.RBRACE]: if self.parser.current_token().type not in [Token.CASE, Token.DEFAULT, Token.RBRACE]:
raise BFSyntaxError("Expected case / default / RBRACE (}) instead of token %s" % self.parser.current_token()) raise BFSyntaxError(
"Expected case / default / RBRACE (}) instead of token %s" % self.parser.current_token()
)
self.parser.check_current_token_is(Token.RBRACE) self.parser.check_current_token_is(Token.RBRACE)
self.parser.advance_token() self.parser.advance_token()
self.decrease_stack_pointer(amount=2) self.decrease_stack_pointer(amount=2)
@ -943,7 +1014,10 @@ class FunctionCompiler:
def compile_break(self): def compile_break(self):
# TODO: Make the break statement in scopes inside switch-case (including if/else), and for/do/while # TODO: Make the break statement in scopes inside switch-case (including if/else), and for/do/while
raise NotImplementedError("Break statement found outside of switch case first scope.\nBreak is not currently implemented for while/for/do statements.\nToken is %s" % self.parser.current_token()) raise NotImplementedError(
"Break statement found outside of switch case first scope.\nBreak is not currently implemented for while/for/do statements.\nToken is %s"
% self.parser.current_token()
)
def compile_for(self): def compile_for(self):
# for (statement expression; expression) inner_scope_code note: statement contains ;, and inner_scope_code can be scope { } # for (statement expression; expression) inner_scope_code note: statement contains ;, and inner_scope_code can be scope { }
@ -951,17 +1025,17 @@ class FunctionCompiler:
# (the statement cannot contain scope - { and } ) # (the statement cannot contain scope - { and } )
""" """
<for> is a special case of scope <for> is a special case of scope
the initial code (int i = 0;) is executed INSIDE the scope, but BEFORE the LBRACE the initial code (int i = 0;) is executed INSIDE the scope, but BEFORE the LBRACE
so we manually compile the scope instead of using self.compile_scope(): so we manually compile the scope instead of using self.compile_scope():
we first create an ids map, and in the case that there is a variable definition inside the <for> definition: we first create an ids map, and in the case that there is a variable definition inside the <for> definition:
we manually insert the ID into the ids map, and move the pointer to the right once, to make room for it we manually insert the ID into the ids map, and move the pointer to the right once, to make room for it
(this needs to be done before the <for> definition's statement) (this needs to be done before the <for> definition's statement)
next, inside the for's scope {}: next, inside the for's scope {}:
after calling insert_scope_variables_into_ids_map, we move the pointer to the left once, since it counts the ID we entered manually as well after calling insert_scope_variables_into_ids_map, we move the pointer to the left once, since it counts the ID we entered manually as well
after calling exit_scope, we move the pointer to the right, since it counts the ID we entered manually, and we don't want it to be discarded after every iteration after calling exit_scope, we move the pointer to the right, since it counts the ID we entered manually, and we don't want it to be discarded after every iteration
finally, at the end of the <for> loop, we move the pointer once to the left, to discard the variable we defined manually finally, at the end of the <for> loop, we move the pointer once to the left, to discard the variable we defined manually
""" """
self.parser.check_current_tokens_are([Token.FOR, Token.LPAREN]) self.parser.check_current_tokens_are([Token.FOR, Token.LPAREN])
@ -969,7 +1043,7 @@ class FunctionCompiler:
manually_inserted_variable_in_for_definition = False manually_inserted_variable_in_for_definition = False
variable = None variable = None
code = '' code = ""
# =============== enter FOR scope =============== # =============== enter FOR scope ===============
self.add_ids_map() self.add_ids_map()
@ -987,7 +1061,10 @@ class FunctionCompiler:
show_side_effect_warning = self.get_token_after_array_access(offset=1).type != Token.ASSIGN show_side_effect_warning = self.get_token_after_array_access(offset=1).type != Token.ASSIGN
if show_side_effect_warning: if show_side_effect_warning:
print("[Warning] For loop variable '%s' isn't assigned to anything and may cause side effects" % self.parser.next_token()) print(
"[Warning] For loop variable '%s' isn't assigned to anything and may cause side effects"
% self.parser.next_token()
)
if self.parser.current_token().type == Token.LBRACE: # statement is a scope if self.parser.current_token().type == Token.LBRACE: # statement is a scope
raise BFSyntaxError("Unexpected scope inside for loop statement - %s" % self.parser.current_token()) raise BFSyntaxError("Unexpected scope inside for loop statement - %s" % self.parser.current_token())
@ -1042,20 +1119,31 @@ class FunctionCompiler:
token = self.parser.current_token() token = self.parser.current_token()
if token.type == Token.INT: # INT ID ((= EXPRESSION) | ([NUM])+ (= ARRAY_INITIALIZATION)?)? SEMICOLON if token.type == Token.INT: # INT ID ((= EXPRESSION) | ([NUM])+ (= ARRAY_INITIALIZATION)?)? SEMICOLON
if not allow_declaration: if not allow_declaration:
raise BFSemanticError("Cannot define variable (%s) directly inside case. " raise BFSemanticError(
"Can define inside new scope {} or outside the switch statement" % token) "Cannot define variable (%s) directly inside case. "
"Can define inside new scope {} or outside the switch statement" % token
)
return self.compile_variable_declaration() return self.compile_variable_declaration()
elif token.type in [Token.INCREMENT, Token.DECREMENT, Token.UNARY_MULTIPLICATIVE]: # ++ID; elif token.type in [Token.INCREMENT, Token.DECREMENT, Token.UNARY_MULTIPLICATIVE]: # ++ID;
return self.compile_expression_as_statement() return self.compile_expression_as_statement()
elif token.type == Token.ID: elif token.type == Token.ID:
if self.parser.next_token().type in [Token.ASSIGN, Token.LBRACK, Token.INCREMENT, Token.DECREMENT, Token.UNARY_MULTIPLICATIVE]: if self.parser.next_token().type in [
Token.ASSIGN,
Token.LBRACK,
Token.INCREMENT,
Token.DECREMENT,
Token.UNARY_MULTIPLICATIVE,
]:
# ID ASSIGN expression; or ID([expression])+ ASSIGN expression; or ID++; # ID ASSIGN expression; or ID([expression])+ ASSIGN expression; or ID++;
return self.compile_expression_as_statement() return self.compile_expression_as_statement()
elif self.parser.next_token().type == Token.LPAREN: # ID(...); (function call) elif self.parser.next_token().type == Token.LPAREN: # ID(...); (function call)
return self.compile_function_call_statement() return self.compile_function_call_statement()
raise BFSyntaxError("Unexpected '%s' after '%s'. Expected '=|+=|-=|*=|/=|%%=|<<=|>>=|&=|(|=)|^=' (assignment), '++|--' (modification) or '(' (function call)" % (str(self.parser.next_token()), str(token))) raise BFSyntaxError(
"Unexpected '%s' after '%s'. Expected '=|+=|-=|*=|/=|%%=|<<=|>>=|&=|(|=)|^=' (assignment), '++|--' (modification) or '(' (function call)"
% (str(self.parser.next_token()), str(token))
)
elif token.type == Token.PRINT: elif token.type == Token.PRINT:
return self.compile_print_string() return self.compile_print_string()
@ -1097,7 +1185,7 @@ class FunctionCompiler:
def compile_scope_statements(self): def compile_scope_statements(self):
tokens = self.tokens tokens = self.tokens
code = '' code = ""
while self.parser.current_token() is not None: while self.parser.current_token() is not None:
if self.parser.current_token().type == Token.RBRACE: if self.parser.current_token().type == Token.RBRACE:
# we reached the end of our scope # we reached the end of our scope
@ -1124,29 +1212,29 @@ class FunctionCompiler:
# will be inserted into the new scope prior to the scope's compilation # will be inserted into the new scope prior to the scope's compilation
""" """
example layout: example layout:
int global_var1; int global_var1;
int global_var2; int global_var2;
int foo(int a, int b) { int foo(int a, int b) {
int x; int x;
int y; int y;
return 5; return 5;
} }
int main() { int main() {
int n; int n;
foo(1, 2); foo(1, 2);
} }
global_var1 global_var2 main_return_value n foo_return_value a=1 b=2 x y global_var1 global_var2 main_return_value n foo_return_value a=1 b=2 x y
calling convention: calling convention:
caller responsibility: make room for return_value (and zero its cell), place parameters, point to return_value cell caller responsibility: make room for return_value (and zero its cell), place parameters, point to return_value cell
callee responsibility: put return value in return_value cell and point to it (thus "cleaning" parameters) callee responsibility: put return value in return_value cell and point to it (thus "cleaning" parameters)
can assume that there is a zeroed cell at current_stack_pointer (return_value_cell) (therefore ids_map starts at index current_stack_pointer+1) can assume that there is a zeroed cell at current_stack_pointer (return_value_cell) (therefore ids_map starts at index current_stack_pointer+1)
can assume that the next cells match your parameters can assume that the next cells match your parameters
assumes that initially, the pointer points to the first cell (return_value_cell). assumes that initially, the pointer points to the first cell (return_value_cell).
therefore begin with '>' * (1 + parameters + scope variables) therefore begin with '>' * (1 + parameters + scope variables)
""" """
assert self.parser.current_token().type == Token.LBRACE assert self.parser.current_token().type == Token.LBRACE

View file

@ -1,4 +1,5 @@
from copy import deepcopy from copy import deepcopy
from .Exceptions import BFSemanticError from .Exceptions import BFSemanticError
functions = dict() # Global dictionary of function_name --> FunctionCompiler objects functions = dict() # Global dictionary of function_name --> FunctionCompiler objects
@ -30,4 +31,7 @@ def check_function_exists(function_token, parameters_amount):
function = functions[function_name] function = functions[function_name]
if len(function.parameters) != parameters_amount: if len(function.parameters) != parameters_amount:
raise BFSemanticError("Function '%s' has %s parameters (called it with %s parameters)" % (str(function_token), len(function.parameters), parameters_amount)) raise BFSemanticError(
"Function '%s' has %s parameters (called it with %s parameters)"
% (str(function_token), len(function.parameters), parameters_amount)
)

View file

@ -1,7 +1,8 @@
from .Exceptions import BFSyntaxError, BFSemanticError
from .Token import Token
from functools import reduce from functools import reduce
from .Exceptions import BFSemanticError, BFSyntaxError
from .Token import Token
""" """
This file holds functions that generate general Brainfuck code This file holds functions that generate general Brainfuck code
And general functions that are not dependent on other objects And general functions that are not dependent on other objects
@ -126,23 +127,29 @@ def unpack_multidimensional_literal_tokens_to_array_dimensions(ID_token, array_d
if len(array_dimensions) == 0: if len(array_dimensions) == 0:
raise BFSemanticError("Tried to initialize array %s with too many nested sub-arrays" % ID_token) raise BFSemanticError("Tried to initialize array %s with too many nested sub-arrays" % ID_token)
if len(literal_tokens_list) > array_dimensions[0]: if len(literal_tokens_list) > array_dimensions[0]:
raise BFSemanticError("Tried to initialize array %s dimension %s with too many elements (%s)" raise BFSemanticError(
% (ID_token, str(array_dimensions), str(len(literal_tokens_list)))) "Tried to initialize array %s dimension %s with too many elements (%s)"
% (ID_token, str(array_dimensions), str(len(literal_tokens_list)))
)
result = [] result = []
for element in literal_tokens_list: for element in literal_tokens_list:
if isinstance(element, list): if isinstance(element, list):
# recursively unpack the list with the sub-dimension of the sub-array # recursively unpack the list with the sub-dimension of the sub-array
# E.g if we have arr[3][3][3] and then this call will fill [3][3]=9 elements # E.g if we have arr[3][3][3] and then this call will fill [3][3]=9 elements
result.extend(unpack_multidimensional_literal_tokens_to_array_dimensions(ID_token, array_dimensions[1:], element)) result.extend(
unpack_multidimensional_literal_tokens_to_array_dimensions(ID_token, array_dimensions[1:], element)
)
else: else:
result.append(element) result.append(element)
if len(array_dimensions) > 1: if len(array_dimensions) > 1:
dimension_size = dimensions_to_size(array_dimensions[1:]) # current size we need to fill dimension_size = dimensions_to_size(array_dimensions[1:]) # current size we need to fill
result.extend([Token(Token.NUM, 0, 0, "0")] * (dimension_size - 1)) # fill missing elements in this dimension with zeros result.extend(
[Token(Token.NUM, 0, 0, "0")] * (dimension_size - 1)
) # fill missing elements in this dimension with zeros
dimension_size = dimensions_to_size(array_dimensions) # current size we need to fill dimension_size = dimensions_to_size(array_dimensions) # current size we need to fill
result.extend([Token(Token.NUM, 0, 0, "0")] * (dimension_size-len(result))) # fill the result with zeros result.extend([Token(Token.NUM, 0, 0, "0")] * (dimension_size - len(result))) # fill the result with zeros
return result return result
@ -157,13 +164,20 @@ def unpack_literal_tokens_to_array_dimensions(ID_token, array_dimensions, litera
if all(not isinstance(element, list) for element in literal_tokens_list): if all(not isinstance(element, list) for element in literal_tokens_list):
# special case - if all elements are literals, then we allow assigning them as-is and not care about dimensions # special case - if all elements are literals, then we allow assigning them as-is and not care about dimensions
# E.g if we have arr[3][3][3] = {1,2,3,4} then return [1,2,3,4,0,0,0,0,0] # E.g if we have arr[3][3][3] = {1,2,3,4} then return [1,2,3,4,0,0,0,0,0]
unpacked_literals_list = literal_tokens_list + [Token(Token.NUM, 0, 0, "0")] * (array_size - len(literal_tokens_list)) # fill missing with zeros unpacked_literals_list = literal_tokens_list + [Token(Token.NUM, 0, 0, "0")] * (
array_size - len(literal_tokens_list)
) # fill missing with zeros
else: else:
unpacked_literals_list = unpack_multidimensional_literal_tokens_to_array_dimensions(ID_token, array_dimensions, literal_tokens_list) unpacked_literals_list = unpack_multidimensional_literal_tokens_to_array_dimensions(
ID_token, array_dimensions, literal_tokens_list
)
if len(unpacked_literals_list) > array_size: if len(unpacked_literals_list) > array_size:
raise BFSemanticError("Tried to initialize array %s with incompatible amount of literals." raise BFSemanticError(
" (array size is %s and literals size is %s)" % (ID_token, str(array_size), str(len(unpacked_literals_list)))) "Tried to initialize array %s with incompatible amount of literals."
" (array size is %s and literals size is %s)"
% (ID_token, str(array_size), str(len(unpacked_literals_list)))
)
assert len(unpacked_literals_list) == array_size assert len(unpacked_literals_list) == array_size
return unpacked_literals_list return unpacked_literals_list
@ -208,17 +222,19 @@ def process_switch_cases(expression_code, cases):
code += "<" # point to expression code += "<" # point to expression
if all_cases_have_break: # small optimization for evaluating the expression if all_cases_have_break: # small optimization for evaluating the expression
cases = [case for case in cases if case[0] != "default"] # remove default to be able to sort. it is handled differently cases = [
case for case in cases if case[0] != "default"
] # remove default to be able to sort. it is handled differently
cases.sort(key=lambda x: x[0], reverse=True) # Can sort since correct flow is not needed cases.sort(key=lambda x: x[0], reverse=True) # Can sort since correct flow is not needed
""" """
This loop compares the expression value to each case in the switch-case statement, in reverse order This loop compares the expression value to each case in the switch-case statement, in reverse order
It does so by increasing and decreasing expression, and comparing result to 0 It does so by increasing and decreasing expression, and comparing result to 0
E.G. if we have E.G. if we have
switch(x) { switch(x) {
case 2: case 2:
case 0: case 0:
case 5: case 5:
case 1: case 1:
} }
x will be put in <expression> cell, then: x will be put in <expression> cell, then:
@ -244,7 +260,7 @@ def process_switch_cases(expression_code, cases):
<need_to_execute=1> <need_to_execute=1>
<compare_with_1> [ <compare_with_1> [
<compare_with_5> [ <compare_with_5> [
<compare_with_0> [ <compare_with_0> [
<compare_with_2> [ <compare_with_2> [
<default_code> <expression_value=0> <need_to_execute=0> <default_code> <expression_value=0> <need_to_execute=0>
] <if need_to_execute> <code_for_2> <need_to_execute=0> ] <if need_to_execute> <code_for_2> <need_to_execute=0>
@ -487,22 +503,22 @@ def get_bitwise_code(code_logic):
code += "<<" # point to a code += "<<" # point to a
code += "[" # while a != 0: code += "[" # while a != 0:
code += "-" # a -= 1 code += "-" # a -= 1
code += ">>-" # c -= 1 code += ">>-" # c -= 1
code += "[>+>>+<<<-]>[<+>-]" # copy c to y (using w) code += "[>+>>+<<<-]>[<+>-]" # copy c to y (using w)
code += ">>" # point to y code += ">>" # point to y
code += ">>+<<" # bit1 += 1 code += ">>+<<" # bit1 += 1
code += "-[" # if y != 1: code += "-[" # if y != 1:
code += "<+" # x += 1 code += "<+" # x += 1
code += "<<++" # c += 2 (c was 0) code += "<<++" # c += 2 (c was 0)
code += ">" * 5 # point to bit1 code += ">" * 5 # point to bit1
code += "--" # bit1 -= 2 (bit1 was 2) code += "--" # bit1 -= 2 (bit1 was 2)
code += "<<" # point to y code += "<<" # point to y
code += "+" # set y to 0 code += "+" # set y to 0
code += "]" # end if code += "]" # end if
code += "<<<<<" # point to a code += "<<<<<" # point to a
code += "]" # end while code += "]" # end while
code += ">>>>[<<<<+>>>>-]" # move x to a (x is a/2) code += ">>>>[<<<<+>>>>-]" # move x to a (x is a/2)
@ -510,21 +526,21 @@ def get_bitwise_code(code_logic):
code += "<" # point to b code += "<" # point to b
code += "[" # while b != 0: code += "[" # while b != 0:
code += "-" # b -= 1 code += "-" # b -= 1
code += ">-" # c -= 1 code += ">-" # c -= 1
code += "[>+>>+<<<-]>[<+>-]" # copy c to y (using w) code += "[>+>>+<<<-]>[<+>-]" # copy c to y (using w)
code += ">>" # point to y code += ">>" # point to y
code += ">+<" # z += 1 code += ">+<" # z += 1
code += "-[" # if y != 1: code += "-[" # if y != 1:
code += ">--<" # z -= 2 (z was 2) code += ">--<" # z -= 2 (z was 2)
code += "<+" # x += 1 code += "<+" # x += 1
code += "<<++" # c += 2 (c was 0) code += "<<++" # c += 2 (c was 0)
code += ">>>" # point to y code += ">>>" # point to y
code += "+" # set y to 0 code += "+" # set y to 0
code += "]" code += "]"
code += "<<<<" # point to b code += "<<<<" # point to b
code += "]" # end while code += "]" # end while
# w is a % 2 # w is a % 2
@ -658,14 +674,14 @@ def get_unary_prefix_op_code(token, offset_to_variable=None):
assert token.data in ["+", "-"] assert token.data in ["+", "-"]
if token.data == "+": if token.data == "+":
# keep value as-is # keep value as-is
return '>' return ">"
elif token.data == "-": elif token.data == "-":
# a temp # a temp
code = ">[-]" # zero temp code = ">[-]" # zero temp
code += "<" # point to a code += "<" # point to a
code += "[->-<]" # sub a from temp code += "[->-<]" # sub a from temp
code += ">" # point to temp code += ">" # point to temp
code += "[<+>-]" # copy temp to a code += "[<+>-]" # copy temp to a
return code return code
raise NotImplementedError raise NotImplementedError
@ -1127,7 +1143,6 @@ def get_op_boolean_operator_code(node, current_pointer):
raise NotImplementedError raise NotImplementedError
def get_print_string_code(string): def get_print_string_code(string):
code = "[-]" # zero the current cell code = "[-]" # zero the current cell
code += ">[-]" # zero the next cell (will be used for loop counts) code += ">[-]" # zero the next cell (will be used for loop counts)
@ -1200,6 +1215,7 @@ def get_move_left_index_cell_code():
# General # General
# ================= # =================
def get_literal_token_value(token): def get_literal_token_value(token):
# known at compilation time # known at compilation time
assert is_token_literal(token) assert is_token_literal(token)

View file

@ -1,6 +1,7 @@
from collections import namedtuple from collections import namedtuple
from .Token import Token
from .General import dimensions_to_size, get_NUM_token_value from .General import dimensions_to_size, get_NUM_token_value
from .Token import Token
""" """
This file holds the program's functions and global variables This file holds the program's functions and global variables
@ -55,7 +56,7 @@ def create_variable_from_definition(parser, index=None, advance_tokens=False):
if index is None, then assumes we start at the current_token_index if index is None, then assumes we start at the current_token_index
if advance_tokens is True, then modifies current_token_index accordingly using parser.advance_token() if advance_tokens is True, then modifies current_token_index accordingly using parser.advance_token()
""" """
if index is None: if index is None:
index = parser.current_token_index index = parser.current_token_index

View file

@ -1,6 +1,7 @@
import re import re
from .Token import Token
from .Optimizer import optimize from .Optimizer import optimize
from .Token import Token
class LexicalErrorException(Exception): class LexicalErrorException(Exception):
@ -14,64 +15,59 @@ def analyze(text):
""" """
rules = [ rules = [
('\s+', Token.WHITESPACE), (r"\s+", Token.WHITESPACE),
('void', Token.VOID), ("void", Token.VOID),
('int', Token.INT), ("int", Token.INT),
('bool', Token.INT), # treat bool as int ("bool", Token.INT), # treat bool as int
('char', Token.INT), # treat char as int ("char", Token.INT), # treat char as int
("true", Token.TRUE),
('true', Token.TRUE), ("false", Token.FALSE),
('false', Token.FALSE), ("&&", Token.AND),
('&&', Token.AND), (r"\|\|", Token.OR),
('\|\|', Token.OR), (r"\!", Token.NOT),
('\!', Token.NOT), ("return", Token.RETURN),
('return', Token.RETURN), ("if", Token.IF),
('if', Token.IF), ("else", Token.ELSE),
('else', Token.ELSE), ("while", Token.WHILE),
('while', Token.WHILE), ("for", Token.FOR),
('for', Token.FOR), ("do", Token.DO),
('do', Token.DO), ("print", Token.PRINT),
('print', Token.PRINT), ("switch", Token.SWITCH),
('switch', Token.SWITCH), ("case", Token.CASE),
('case', Token.CASE), ("default", Token.DEFAULT),
('default', Token.DEFAULT), ("break", Token.BREAK),
('break', Token.BREAK), ("continue", Token.CONTINUE), # todo
('continue', Token.CONTINUE), # todo (":", Token.COLON),
(':', Token.COLON), (";", Token.SEMICOLON),
(';', Token.SEMICOLON), (",", Token.COMMA),
(',', Token.COMMA), (r"\(", Token.LPAREN),
(r"\)", Token.RPAREN),
('\(', Token.LPAREN), (r"\{", Token.LBRACE),
('\)', Token.RPAREN), (r"\}", Token.RBRACE),
('\{', Token.LBRACE), (r"\[", Token.LBRACK),
('\}', Token.RBRACE), (r"\]", Token.RBRACK),
('\[', Token.LBRACK), (r"=|\+=|-=|\*=|/=|%=|<<=|>>=|&=|\|=|\^=", Token.ASSIGN),
('\]', Token.RBRACK), (r"\?", Token.TERNARY),
('=|\+=|-=|\*=|/=|%=|<<=|>>=|&=|\|=|\^=', Token.ASSIGN), (r"<=|>=|==|!=|<|>", Token.RELOP),
('\?', Token.TERNARY), (r"\+\+", Token.INCREMENT),
("--", Token.DECREMENT),
('<=|>=|==|!=|<|>', Token.RELOP), (r"\+|-|\*|/|%", Token.BINOP),
('\+\+', Token.INCREMENT), (r"\*\*|//|%%", Token.UNARY_MULTIPLICATIVE),
('--', Token.DECREMENT), ("<<|>>", Token.BITWISE_SHIFT),
('\+|-|\*|/|%', Token.BINOP), ("~", Token.BITWISE_NOT),
('\*\*|//|%%', Token.UNARY_MULTIPLICATIVE), ("&", Token.BITWISE_AND),
(r"\|", Token.BITWISE_OR),
('<<|>>', Token.BITWISE_SHIFT), (r"\^", Token.BITWISE_XOR),
('~', Token.BITWISE_NOT), ("([a-zA-Z_][a-zA-Z0-9_]*)", Token.ID),
('&', Token.BITWISE_AND), (r"(\d+)", Token.NUM),
('\|', Token.BITWISE_OR), (r"(0x[A-Fa-f\d]+)", Token.NUM), # hexadecimal number
('\^', Token.BITWISE_XOR), ("(0o[0-7]+)", Token.NUM), # octal number
("(0b[01]+)", Token.NUM), # binary number
('([a-zA-Z_][a-zA-Z0-9_]*)', Token.ID), (r'\"(\\\"|[^"])*"', Token.STRING),
('(\d+)', Token.NUM), (r"\'(\\\'|(\\)?[^\'])\'", Token.CHAR),
('(0x[A-Fa-f\d]+)', Token.NUM), # hexadecimal number ("//.*(\\n|$)", Token.COMMENT),
('(0o[0-7]+)', Token.NUM), # octal number (r"/\*[\s\S]*?\*/", Token.COMMENT), # multiline comments
('(0b[01]+)', Token.NUM), # binary number (".", Token.UNIDENTIFIED),
(r'\"(\\\"|[^"])*"', Token.STRING),
(r'\'(\\\'|(\\)?[^\'])\'', Token.CHAR),
('//.*(\\n|$)', Token.COMMENT),
(r'/\*[\s\S]*?\*/', Token.COMMENT), # multiline comments
('.', Token.UNIDENTIFIED)
] ]
rules = [(re.compile(r), t) for r, t in rules] rules = [(re.compile(r), t) for r, t in rules]
@ -79,7 +75,7 @@ def analyze(text):
tokens = [] tokens = []
# create a mapping of [line number] to [offset of that line from the beginning of the text] # create a mapping of [line number] to [offset of that line from the beginning of the text]
newline = re.compile('\n') newline = re.compile("\n")
lines = [0] + [m.end() for m in re.finditer(newline, text)] lines = [0] + [m.end() for m in re.finditer(newline, text)]
i = 0 i = 0
@ -99,12 +95,12 @@ def analyze(text):
# calculate line and column # calculate line and column
line, column = None, None line, column = None, None
for line_idx in range(len(lines)-1): for line_idx in range(len(lines) - 1):
if lines[line_idx] <= longest_match.start() < lines[line_idx+1]: if lines[line_idx] <= longest_match.start() < lines[line_idx + 1]:
line, column = line_idx+1, (longest_match.start() - lines[line_idx])+1 # humans count from 1 :) line, column = line_idx + 1, (longest_match.start() - lines[line_idx]) + 1 # humans count from 1 :)
break break
if not line: if not line:
line, column = len(lines), (longest_match.start() - lines[-1])+1 line, column = len(lines), (longest_match.start() - lines[-1]) + 1
if matched_token in [Token.COMMENT, Token.WHITESPACE]: if matched_token in [Token.COMMENT, Token.WHITESPACE]:
pass # do nothing pass # do nothing
@ -112,8 +108,18 @@ def analyze(text):
raise LexicalErrorException("Unidentified Character '%s' (line %s column %s)" % (text[i], line, column)) raise LexicalErrorException("Unidentified Character '%s' (line %s column %s)" % (text[i], line, column))
elif matched_token in [Token.STRING, Token.CHAR]: elif matched_token in [Token.STRING, Token.CHAR]:
# remove quotes at beginning and end, un-escape characters # remove quotes at beginning and end, un-escape characters
tokens.append(Token(matched_token, line, column, longest_match.group()[1:-1].encode("utf8").decode("unicode_escape"))) tokens.append(
elif matched_token in [Token.NUM, Token.ID, Token.BINOP, Token.RELOP, Token.ASSIGN, Token.UNARY_MULTIPLICATIVE, Token.BITWISE_SHIFT]: Token(matched_token, line, column, longest_match.group()[1:-1].encode("utf8").decode("unicode_escape"))
)
elif matched_token in [
Token.NUM,
Token.ID,
Token.BINOP,
Token.RELOP,
Token.ASSIGN,
Token.UNARY_MULTIPLICATIVE,
Token.BITWISE_SHIFT,
]:
tokens.append(Token(matched_token, line, column, longest_match.group())) tokens.append(Token(matched_token, line, column, longest_match.group()))
else: else:
tokens.append(Token(matched_token, line, column)) tokens.append(Token(matched_token, line, column))
@ -128,16 +134,40 @@ def tests():
text = "my international int ; int; pints; international;" text = "my international int ; int; pints; international;"
res = analyze(text) res = analyze(text)
expected = [Token.ID, Token.ID, Token.INT, Token.SEMICOLON, Token.INT, Token.SEMICOLON, Token.ID, expected = [
Token.SEMICOLON, Token.ID, Token.SEMICOLON] Token.ID,
Token.ID,
Token.INT,
Token.SEMICOLON,
Token.INT,
Token.SEMICOLON,
Token.ID,
Token.SEMICOLON,
Token.ID,
Token.SEMICOLON,
]
assert len(res) == len(expected) and all(res[i].type == expected[i] for i in range(len(res))) assert len(res) == len(expected) and all(res[i].type == expected[i] for i in range(len(res)))
def test2(): def test2():
text = "true !||!false falsek k||y+-a&&x" text = "true !||!false falsek k||y+-a&&x"
res = analyze(text) res = analyze(text)
expected = [Token.TRUE, Token.NOT, Token.OR, Token.NOT, Token.FALSE, Token.ID, Token.ID, Token.OR, Token.ID, expected = [
Token.BINOP, Token.BINOP, Token.ID, Token.AND, Token.ID] Token.TRUE,
Token.NOT,
Token.OR,
Token.NOT,
Token.FALSE,
Token.ID,
Token.ID,
Token.OR,
Token.ID,
Token.BINOP,
Token.BINOP,
Token.ID,
Token.AND,
Token.ID,
]
assert len(res) == len(expected) and all(res[i].type == expected[i] for i in range(len(res))) assert len(res) == len(expected) and all(res[i].type == expected[i] for i in range(len(res)))
def test3(): def test3():
@ -166,9 +196,29 @@ def tests():
# test all arithmetic operations # test all arithmetic operations
text = "(1+2*3/6)+(1%3)*(6-1)" text = "(1+2*3/6)+(1%3)*(6-1)"
tokens = analyze(text) tokens = analyze(text)
expected = [Token.LPAREN, Token.NUM, Token.BINOP, Token.NUM, Token.BINOP, Token.NUM, Token.BINOP, Token.NUM, expected = [
Token.RPAREN, Token.BINOP, Token.LPAREN, Token.NUM, Token.BINOP, Token.NUM, Token.RPAREN, Token.LPAREN,
Token.BINOP, Token.LPAREN, Token.NUM, Token.BINOP, Token.NUM, Token.RPAREN] Token.NUM,
Token.BINOP,
Token.NUM,
Token.BINOP,
Token.NUM,
Token.BINOP,
Token.NUM,
Token.RPAREN,
Token.BINOP,
Token.LPAREN,
Token.NUM,
Token.BINOP,
Token.NUM,
Token.RPAREN,
Token.BINOP,
Token.LPAREN,
Token.NUM,
Token.BINOP,
Token.NUM,
Token.RPAREN,
]
assert len(tokens) == len(expected) and all(tokens[i].type == expected[i] for i in range(len(tokens))) assert len(tokens) == len(expected) and all(tokens[i].type == expected[i] for i in range(len(tokens)))
optimize(tokens) optimize(tokens)
assert tokens[1].data == "2" and tokens[5].data == "1" and tokens[9].data == "5" assert tokens[1].data == "2" and tokens[5].data == "1" and tokens[9].data == "5"
@ -179,5 +229,5 @@ def tests():
test3() test3()
if __name__ == '__main__': if __name__ == "__main__":
tests() tests()

View file

@ -48,7 +48,9 @@ def get_readint_code():
code += ">" # point to tmp code += ">" # point to tmp
code += "[<++++++++++>-]" # res = tmp * 10, tmp = 0 code += "[<++++++++++>-]" # res = tmp * 10, tmp = 0
code += ">" # point to input code += ">" # point to input
code += "-" * (0x30 - 10) # convert character to a digit by subtracting 0x30 from it (we already subtracted 10 before) code += "-" * (
0x30 - 10
) # convert character to a digit by subtracting 0x30 from it (we already subtracted 10 before)
code += "[<<+>>-]" # res += input code += "[<<+>>-]" # res += input
code += "]" # end if code += "]" # end if

View file

@ -1,10 +1,20 @@
from .Exceptions import BFSemanticError from .Exceptions import BFSemanticError
from .General import get_copy_from_variable_code, get_copy_to_variable_code from .General import (
from .General import get_move_left_index_cell_code, get_move_right_index_cells_code get_copy_from_variable_code,
from .General import get_offset_to_variable, get_variable_dimensions_from_token get_copy_to_variable_code,
from .General import get_op_between_literals_code, get_literal_token_code, get_token_ID_code get_literal_token_code,
from .General import get_unary_prefix_op_code, get_unary_postfix_op_code, is_token_literal get_move_left_index_cell_code,
from .General import unpack_literal_tokens_to_array_dimensions, get_op_boolean_operator_code get_move_right_index_cells_code,
get_offset_to_variable,
get_op_between_literals_code,
get_op_boolean_operator_code,
get_token_ID_code,
get_unary_postfix_op_code,
get_unary_prefix_op_code,
get_variable_dimensions_from_token,
is_token_literal,
unpack_literal_tokens_to_array_dimensions,
)
from .Token import Token from .Token import Token
""" """
@ -60,7 +70,14 @@ class NodeToken(Node):
else: else:
return get_literal_token_code(self.token) return get_literal_token_code(self.token)
elif self.token.type in [Token.BINOP, Token.RELOP, Token.BITWISE_SHIFT, Token.BITWISE_AND, Token.BITWISE_OR, Token.BITWISE_XOR]: elif self.token.type in [
Token.BINOP,
Token.RELOP,
Token.BITWISE_SHIFT,
Token.BITWISE_AND,
Token.BITWISE_OR,
Token.BITWISE_XOR,
]:
code = self.left.get_code(current_pointer) code = self.left.get_code(current_pointer)
code += self.right.get_code(current_pointer + 1) code += self.right.get_code(current_pointer + 1)
code += "<<" # point to the first operand code += "<<" # point to the first operand
@ -78,7 +95,7 @@ class NodeToken(Node):
elif self.token.type == Token.ASSIGN: elif self.token.type == Token.ASSIGN:
assert self.left.token.type == Token.ID assert self.left.token.type == Token.ID
if self.token.data == '=': if self.token.data == "=":
# id = expression # id = expression
code = self.right.get_code(current_pointer) code = self.right.get_code(current_pointer)
@ -119,7 +136,7 @@ class NodeTernary(Node):
code = ">" # point to bool_evaluate_node_false code = ">" # point to bool_evaluate_node_false
code += "[-]+" # bool_evaluate_node_false=1 code += "[-]+" # bool_evaluate_node_false=1
code += ">" # point to condition code += ">" # point to condition
code += self.condition.get_code(current_pointer+2) # evaluate condition code += self.condition.get_code(current_pointer + 2) # evaluate condition
code += "<" # point to condition code += "<" # point to condition
code += "[" # if condition is non-zero code += "[" # if condition is non-zero
@ -150,7 +167,14 @@ class NodeUnaryPrefix(Node):
def get_code(self, current_pointer, *args, **kwargs): def get_code(self, current_pointer, *args, **kwargs):
# unary prefix (!x or ++x or ~x or -x) # unary prefix (!x or ++x or ~x or -x)
assert self.token_operation.type in [Token.NOT, Token.INCREMENT, Token.DECREMENT, Token.UNARY_MULTIPLICATIVE, Token.BITWISE_NOT, Token.BINOP] assert self.token_operation.type in [
Token.NOT,
Token.INCREMENT,
Token.DECREMENT,
Token.UNARY_MULTIPLICATIVE,
Token.BITWISE_NOT,
Token.BINOP,
]
if self.token_operation.type in [Token.NOT, Token.BITWISE_NOT, Token.BINOP]: if self.token_operation.type in [Token.NOT, Token.BITWISE_NOT, Token.BINOP]:
code = self.node_literal.get_code(current_pointer) code = self.node_literal.get_code(current_pointer)
@ -178,10 +202,15 @@ class NodeUnaryPrefix(Node):
# the token to apply on must be an ID # the token to apply on must be an ID
if isinstance(self.node_literal, NodeToken) is False: if isinstance(self.node_literal, NodeToken) is False:
raise BFSemanticError("Prefix operator %s can only be applied to a variable" % str(self.token_operation)) raise BFSemanticError(
"Prefix operator %s can only be applied to a variable" % str(self.token_operation)
)
if self.node_literal.token.type != Token.ID: if self.node_literal.token.type != Token.ID:
raise BFSemanticError("Prefix operator %s cannot be applied to %s, but only to a variable" % (str(self.token_operation), str(self.node_literal.token))) raise BFSemanticError(
"Prefix operator %s cannot be applied to %s, but only to a variable"
% (str(self.token_operation), str(self.node_literal.token))
)
offset_to_ID = get_offset_to_variable(self.ids_map_list, self.node_literal.token, current_pointer) offset_to_ID = get_offset_to_variable(self.ids_map_list, self.node_literal.token, current_pointer)
return get_unary_prefix_op_code(self.token_operation, offset_to_ID) return get_unary_prefix_op_code(self.token_operation, offset_to_ID)
@ -218,7 +247,10 @@ class NodeUnaryPostfix(Node):
raise BFSemanticError("Postfix operator %s can only be applied to a variable" % str(self.token_operation)) raise BFSemanticError("Postfix operator %s can only be applied to a variable" % str(self.token_operation))
if self.node_literal.token.type != Token.ID: if self.node_literal.token.type != Token.ID:
raise BFSemanticError("Postfix operator %s cannot be applied to %s, but only to a variable" % (str(self.token_operation), str(self.node_literal.token))) raise BFSemanticError(
"Postfix operator %s cannot be applied to %s, but only to a variable"
% (str(self.token_operation), str(self.node_literal.token))
)
offset_to_ID = get_offset_to_variable(self.ids_map_list, self.node_literal.token, current_pointer) offset_to_ID = get_offset_to_variable(self.ids_map_list, self.node_literal.token, current_pointer)
return get_unary_postfix_op_code(self.token_operation, offset_to_ID) return get_unary_postfix_op_code(self.token_operation, offset_to_ID)
@ -227,27 +259,31 @@ class NodeUnaryPostfix(Node):
class NodeFunctionCall(Node): class NodeFunctionCall(Node):
def __init__(self, ids_map_list, function_to_call, parameters): def __init__(self, ids_map_list, function_to_call, parameters):
""" """
receives a FunctionCompiler object receives a FunctionCompiler object
that implements get_code() which gets a stack pointer and returns code that implements get_code() which gets a stack pointer and returns code
receives a list of parameters - Node objects receives a list of parameters - Node objects
each one gets a stack pointer and returns code that evaluates the parameter each one gets a stack pointer and returns code that evaluates the parameter
""" """
Node.__init__(self, ids_map_list) Node.__init__(self, ids_map_list)
self.function_to_call = function_to_call self.function_to_call = function_to_call
self.parameters = parameters self.parameters = parameters
def get_code(self, current_pointer, *args, **kwargs): def get_code(self, current_pointer, *args, **kwargs):
code = '[-]>' # return_value_cell=0 code = "[-]>" # return_value_cell=0
# evaluate parameters from left to right, and put them on the "stack" in that order # evaluate parameters from left to right, and put them on the "stack" in that order
# after each parameter code, the pointer points to the next available cell (one after the parameter) # after each parameter code, the pointer points to the next available cell (one after the parameter)
for i, parameter in enumerate(self.parameters): for i, parameter in enumerate(self.parameters):
code += parameter.get_code(current_pointer+1+i) # evaluate each parameter at its cell offset (starting at one after return_value_cell) code += parameter.get_code(
current_pointer + 1 + i
) # evaluate each parameter at its cell offset (starting at one after return_value_cell)
# at this point we point to one after the last parameter # at this point we point to one after the last parameter
code += "<" * len(self.parameters) # point back to first parameter code += "<" * len(self.parameters) # point back to first parameter
code += "<" # point to return_value_cell code += "<" # point to return_value_cell
code += self.function_to_call.get_code(current_stack_pointer=current_pointer) # after this we point to return value cell code += self.function_to_call.get_code(
current_stack_pointer=current_pointer
) # after this we point to return value cell
code += ">" # point to next available cell (one after return value) code += ">" # point to next available cell (one after return value)
return code return code
@ -377,9 +413,10 @@ class NodeArraySetElement(NodeArrayElement):
class NodeArrayAssignment(Node): class NodeArrayAssignment(Node):
""" """
Used for array assignment Used for array assignment
E.g arr = = { 1, 2, 3... } E.g arr = = { 1, 2, 3... }
""" """
def __init__(self, ids_map_list, token_id, literal_tokens_list): def __init__(self, ids_map_list, token_id, literal_tokens_list):
Node.__init__(self, ids_map_list) Node.__init__(self, ids_map_list)
self.token_id = token_id self.token_id = token_id
@ -387,7 +424,9 @@ class NodeArrayAssignment(Node):
def get_code(self, current_pointer, *args, **kwargs): def get_code(self, current_pointer, *args, **kwargs):
array_dimensions = get_variable_dimensions_from_token(self.ids_map_list, self.token_id) array_dimensions = get_variable_dimensions_from_token(self.ids_map_list, self.token_id)
unpacked_literals_list = unpack_literal_tokens_to_array_dimensions(self.token_id, array_dimensions, self.literal_tokens_list) unpacked_literals_list = unpack_literal_tokens_to_array_dimensions(
self.token_id, array_dimensions, self.literal_tokens_list
)
offset = get_offset_to_variable(self.ids_map_list, self.token_id, current_pointer) offset = get_offset_to_variable(self.ids_map_list, self.token_id, current_pointer)
code = "<" * offset # point to first array element code = "<" * offset # point to first array element

View file

@ -15,9 +15,13 @@ def optimize_once(tokens):
# optimize arithmetic operations. E.g replace 1+2 with 3 # optimize arithmetic operations. E.g replace 1+2 with 3
# need to be careful not to optimize (1+2*3) to (3*3) # need to be careful not to optimize (1+2*3) to (3*3)
if tokens[start_index+1].data in ["*", "/", "%"] or (start_index+3 >= len(tokens)) or (tokens[start_index+3].data not in ["*", "/", "%"]): if (
num1, num2 = get_NUM_token_value(tokens[start_index]), get_NUM_token_value(tokens[start_index+2]) tokens[start_index + 1].data in ["*", "/", "%"]
op = tokens[start_index+1].data or (start_index + 3 >= len(tokens))
or (tokens[start_index + 3].data not in ["*", "/", "%"])
):
num1, num2 = get_NUM_token_value(tokens[start_index]), get_NUM_token_value(tokens[start_index + 2])
op = tokens[start_index + 1].data
if op == "+": if op == "+":
val = num1 + num2 val = num1 + num2
elif op == "-": elif op == "-":
@ -38,8 +42,13 @@ def optimize_once(tokens):
raise NotImplementedError(op) raise NotImplementedError(op)
# remove the 3 old tokens and replace them with new one # remove the 3 old tokens and replace them with new one
new_token = Token(Token.NUM, tokens[start_index].line, tokens[start_index].column, data=str(val), new_token = Token(
original_tokens=tokens[start_index:start_index+3]) Token.NUM,
tokens[start_index].line,
tokens[start_index].column,
data=str(val),
original_tokens=tokens[start_index : start_index + 3],
)
for _ in range(3): for _ in range(3):
tokens.pop(start_index) tokens.pop(start_index)
@ -52,16 +61,24 @@ def optimize_once(tokens):
# replace printint(50) with print("50") # replace printint(50) with print("50")
# since printing strings compiles into less Brainfuck code than printing ints # since printing strings compiles into less Brainfuck code than printing ints
if tokens[start_index].data == "printint": if tokens[start_index].data == "printint":
tokens[start_index] = Token(Token.PRINT, tokens[start_index].line, tokens[start_index].column, original_tokens=[tokens[start_index]]) tokens[start_index] = Token(
tokens[start_index+2] = Token(Token.STRING, tokens[start_index].line, tokens[start_index].column, Token.PRINT, tokens[start_index].line, tokens[start_index].column, original_tokens=[tokens[start_index]]
data=str(tokens[start_index+2].data), original_tokens=[tokens[start_index+2]]) )
tokens[start_index + 2] = Token(
Token.STRING,
tokens[start_index].line,
tokens[start_index].column,
data=str(tokens[start_index + 2].data),
original_tokens=[tokens[start_index + 2]],
)
return True return True
return False return False
rules = [([Token.NUM, Token.BINOP, Token.NUM], optimize_binop), # arithmetic operations rules = [
([Token.ID, Token.LPAREN, Token.NUM, Token.RPAREN], optimize_printint), # printint(50) to print("50") ([Token.NUM, Token.BINOP, Token.NUM], optimize_binop), # arithmetic operations
] ([Token.ID, Token.LPAREN, Token.NUM, Token.RPAREN], optimize_printint), # printint(50) to print("50")
]
# try to match one of the rules to the tokens in a "sliding window" style # try to match one of the rules to the tokens in a "sliding window" style
i = 0 i = 0
@ -69,7 +86,7 @@ def optimize_once(tokens):
optimized = False optimized = False
for tokens_sequence, optimization_function in rules: for tokens_sequence, optimization_function in rules:
if i + len(tokens_sequence) <= len(tokens): if i + len(tokens_sequence) <= len(tokens):
if all(tokens_sequence[n] == tokens[i+n].type for n in range(len(tokens_sequence))): if all(tokens_sequence[n] == tokens[i + n].type for n in range(len(tokens_sequence))):
if optimization_function(tokens, i): if optimization_function(tokens, i):
optimized = True optimized = True
if optimized: if optimized:
@ -82,7 +99,7 @@ def optimize(tokens):
prev_tokens = [token.type for token in tokens] prev_tokens = [token.type for token in tokens]
while True: while True:
optimize_once(tokens) optimize_once(tokens)
print(".", end='') print(".", end="")
current_tokens = [token.type for token in tokens] current_tokens = [token.type for token in tokens]
if current_tokens == prev_tokens: if current_tokens == prev_tokens:
break break

View file

@ -1,12 +1,13 @@
from .Exceptions import BFSyntaxError, BFSemanticError from .Exceptions import BFSemanticError, BFSyntaxError
from .Token import Token
from .General import is_token_literal from .General import is_token_literal
from .Token import Token
class Parser: class Parser:
""" """
Used to easily iterate tokens Used to easily iterate tokens
""" """
def __init__(self, tokens): def __init__(self, tokens):
self.tokens = tokens self.tokens = tokens
self.current_token_index = 0 self.current_token_index = 0
@ -80,7 +81,10 @@ class Parser:
raise BFSyntaxError("Expected %s after %s" % (str(tokens_list), str(self.tokens[starting_index]))) raise BFSyntaxError("Expected %s after %s" % (str(tokens_list), str(self.tokens[starting_index])))
for i in range(0, len(tokens_list)): for i in range(0, len(tokens_list)):
if self.tokens[starting_index + 1 + i].type != tokens_list[i]: if self.tokens[starting_index + 1 + i].type != tokens_list[i]:
raise BFSyntaxError("Expected %s after %s" % (str(tokens_list[i]), [str(t) for t in self.tokens[starting_index: starting_index+1+i]])) raise BFSyntaxError(
"Expected %s after %s"
% (str(tokens_list[i]), [str(t) for t in self.tokens[starting_index : starting_index + 1 + i]])
)
def check_next_token_is(self, token, starting_index=None): def check_next_token_is(self, token, starting_index=None):
self.check_next_tokens_are([token], starting_index=starting_index) self.check_next_tokens_are([token], starting_index=starting_index)

View file

@ -1,7 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import sys
import argparse import argparse
import sys
def create_jumps_dictionary(program): def create_jumps_dictionary(program):
@ -9,9 +9,9 @@ def create_jumps_dictionary(program):
res = dict() res = dict()
for index, command in enumerate(program): for index, command in enumerate(program):
if command == '[': if command == "[":
lbraces.append(index) lbraces.append(index)
elif command == ']': elif command == "]":
if len(lbraces) == 0: if len(lbraces) == 0:
raise SyntaxError("Brainfuck: mismatched parentheses (at index: %s)" % index) raise SyntaxError("Brainfuck: mismatched parentheses (at index: %s)" % index)
@ -35,26 +35,26 @@ def brainfuck(program, bits=8):
while instruction_pointer < len(program): while instruction_pointer < len(program):
command = program[instruction_pointer] command = program[instruction_pointer]
if command == '>': if command == ">":
data_pointer += 1 data_pointer += 1
elif command == '<': elif command == "<":
data_pointer -= 1 data_pointer -= 1
elif command == '+': elif command == "+":
data[data_pointer] = (data.get(data_pointer, 0) + 1) data[data_pointer] = data.get(data_pointer, 0) + 1
if data[data_pointer] == 2 ** bits: if data[data_pointer] == 2**bits:
data[data_pointer] = 0 data[data_pointer] = 0
elif command == '-': elif command == "-":
data[data_pointer] = (data.get(data_pointer, 0) - 1) data[data_pointer] = data.get(data_pointer, 0) - 1
if data[data_pointer] == -1: if data[data_pointer] == -1:
data[data_pointer] = 2 ** bits - 1 data[data_pointer] = 2**bits - 1
elif command == ',': elif command == ",":
data[data_pointer] = ord(sys.stdin.read(1)) % 256 data[data_pointer] = ord(sys.stdin.read(1)) % 256
elif command == '.': elif command == ".":
print(chr(data.get(data_pointer, 0)), end='', flush=True) print(chr(data.get(data_pointer, 0)), end="", flush=True)
elif command == '[': elif command == "[":
if data.get(data_pointer, 0) == 0: if data.get(data_pointer, 0) == 0:
instruction_pointer = jumps[instruction_pointer] instruction_pointer = jumps[instruction_pointer]
elif command == ']': elif command == "]":
if data.get(data_pointer, 0) != 0: if data.get(data_pointer, 0) != 0:
instruction_pointer = jumps[instruction_pointer] instruction_pointer = jumps[instruction_pointer]
else: # everything else is comment else: # everything else is comment
@ -63,16 +63,19 @@ def brainfuck(program, bits=8):
instruction_pointer += 1 instruction_pointer += 1
if data_pointer != 0: if data_pointer != 0:
print("WARNING (interpreter) - at the end of the execution the data pointer is %s instead of 0 (possibly a compiler issue)" % str(data_pointer)) print(
"WARNING (interpreter) - at the end of the execution the data pointer is %s instead of 0 (possibly a compiler issue)"
% str(data_pointer)
)
if __name__ == '__main__': if __name__ == "__main__":
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("filepath") parser.add_argument("filepath")
parser.add_argument("--bits", "-b", "--interpreter-bits", type=int, default=8, help="Amount of bits each cell uses") parser.add_argument("--bits", "-b", "--interpreter-bits", type=int, default=8, help="Amount of bits each cell uses")
args = parser.parse_args() args = parser.parse_args()
with open(args.filepath, 'r') as f: with open(args.filepath, "r") as f:
code = f.read() code = f.read()
brainfuck(code, args.bits) brainfuck(code, args.bits)

View file

@ -57,7 +57,7 @@ int main()
$ ./BF-it.py helloworld.code $ ./BF-it.py helloworld.code
Compiling file 'helloworld.code'... Compiling file 'helloworld.code'...
Compiled successfully to 'helloworld.bf' Compiled successfully to 'helloworld.bf'
$ cat helloworld.bf $ cat helloworld.bf
>[-]>[-]<>++++++++[-<+++++++++>]<.>++++[-<+++++++>] >[-]>[-]<>++++++++[-<+++++++++>]<.>++++[-<+++++++>]
<+.+++++++..+++.>++++++[-<------------->]<-.>+++++[ <+.+++++++..+++.>++++++[-<------------->]<-.>+++++[
-<+++++++++++>]<.>++++[-<++++++>]<.+++.------.----- -<+++++++++++>]<.>++++[-<++++++>]<.+++.------.-----
@ -98,4 +98,3 @@ If you found a bug, or have an idea for a feature, open an issue
* https://introcs.cs.princeton.edu/java/11precedence/ for operator precedence * https://introcs.cs.princeton.edu/java/11precedence/ for operator precedence
* https://logomakr.com/ for creating a logo * https://logomakr.com/ for creating a logo
* https://www.youtube.com/ for setting the mood * https://www.youtube.com/ for setting the mood

View file

@ -7,9 +7,9 @@ Cognition tasks for training reasoning capabilities:
""" """
from .color_cube_rotation import ColorCubeRotationConfig, ColorCubeRotationDataset from .color_cube_rotation import ColorCubeRotationConfig, ColorCubeRotationDataset
from .figlet_fonts import FigletFontConfig, FigletFontDataset
from .number_sequences import NumberSequenceConfig, NumberSequenceDataset from .number_sequences import NumberSequenceConfig, NumberSequenceDataset
from .rubiks_cube import RubiksCubeConfig, RubiksCubeDataset from .rubiks_cube import RubiksCubeConfig, RubiksCubeDataset
from .figlet_fonts import FigletFontConfig, FigletFontDataset
__all__ = [ __all__ = [
"NumberSequenceConfig", "NumberSequenceConfig",
@ -19,5 +19,5 @@ __all__ = [
"RubiksCubeConfig", "RubiksCubeConfig",
"RubiksCubeDataset", "RubiksCubeDataset",
"FigletFontConfig", "FigletFontConfig",
"FigletFontDataset" "FigletFontDataset",
] ]

View file

@ -7,10 +7,10 @@ Game tasks for training reasoning capabilities:
""" """
from .countdown import CountdownConfig, CountdownDataset from .countdown import CountdownConfig, CountdownDataset
from .game_of_life import GameOfLifeConfig, GameOfLifeDataset
from .maze import MazeConfig, MazeDataset from .maze import MazeConfig, MazeDataset
from .mini_sudoku import MiniSudokuConfig, MiniSudokuDataset from .mini_sudoku import MiniSudokuConfig, MiniSudokuDataset
from .sudoku import SudokuConfig, SudokuDataset from .sudoku import SudokuConfig, SudokuDataset
from .game_of_life import GameOfLifeConfig, GameOfLifeDataset
__all__ = [ __all__ = [
"CountdownConfig", "CountdownConfig",

View file

@ -1,18 +1,19 @@
from dataclasses import dataclass from dataclasses import dataclass
from random import Random from random import Random
from typing import List, Optional, Tuple, Dict from typing import Dict, List, Optional, Tuple
import cellpylib as cpl import cellpylib as cpl
from ..factory import ProceduralDataset, register_dataset from ..factory import ProceduralDataset, register_dataset
@dataclass @dataclass
class GameOfLifeConfig: class GameOfLifeConfig:
"""Configuration for sudoku puzzle generation""" """Configuration for sudoku puzzle generation"""
grid_size_x: int = 20 grid_size_x: int = 20
grid_size_y: int = 20 grid_size_y: int = 20
filled_cells: int = 100 # actually a max filled_cells: int = 100 # actually a max
simulation_steps: int = 1 simulation_steps: int = 1
seed: Optional[int] = None seed: Optional[int] = None
size: int = 500 size: int = 500
@ -25,11 +26,12 @@ class GameOfLifeConfig:
assert self.filled_cells <= self.grid_size_x * self.grid_size_y, "filled_cells must fit in x times y" assert self.filled_cells <= self.grid_size_x * self.grid_size_y, "filled_cells must fit in x times y"
class GameOfLifeConfigDataset(ProceduralDataset): class GameOfLifeDataset(ProceduralDataset):
"""Generates Game of Life games with configurable parameters""" """Generates Game of Life games with configurable parameters"""
def __init__(self, config: GameOfLifeConfig): def __init__(self, config: GameOfLifeConfig):
self._prompt_templates = ["What will this Game of Life board look like after {simulation_steps} steps of simulation?\n\n{board}" self._prompt_templates = [
"What will this Game of Life board look like after {simulation_steps} steps of simulation?\n\n{board}"
] ]
super().__init__(config=config, seed=config.seed, size=config.size) super().__init__(config=config, seed=config.seed, size=config.size)
@ -46,7 +48,7 @@ class GameOfLifeConfigDataset(ProceduralDataset):
rng = Random(self.seed + idx) rng = Random(self.seed + idx)
# Make the board # Make the board
board = cpl.init_simple2d(self.config.grid_size_x, self.config.grid_size_y) board = cpl.init_simple2d(self.config.grid_size_x, self.config.grid_size_y)
board[:, :, :] = 0 board[:, :, :] = 0
# Add the cells # Add the cells
@ -56,13 +58,17 @@ class GameOfLifeConfigDataset(ProceduralDataset):
board[:, rx, ry] = 1 board[:, rx, ry] = 1
# Simulate the result to get the answer # Simulate the result to get the answer
evolved = cpl.evolve2d(board, timesteps=self.config.simulation_steps + 1, apply_rule=cpl.game_of_life_rule, memoize='recursive') evolved = cpl.evolve2d(
board, timesteps=self.config.simulation_steps + 1, apply_rule=cpl.game_of_life_rule, memoize="recursive"
)
board_str = str(board[0]) board_str = str(board[0])
result_str = str(evolved[-1]) result_str = str(evolved[-1])
return { return {
"question": rng.choice(self._prompt_templates).format(simulation_steps=self.config.simulation_steps, board=board_str), "question": rng.choice(self._prompt_templates).format(
simulation_steps=self.config.simulation_steps, board=board_str
),
"answer": result_str, "answer": result_str,
"metadata": { "metadata": {
"grid_size_x": self.config.grid_size_x, "grid_size_x": self.config.grid_size_x,
@ -87,10 +93,10 @@ class GameOfLifeConfigDataset(ProceduralDataset):
if answer == None: if answer == None:
return 0.0 return 0.0
if answer.replace('\n', '') != entry['answer'].replace('\n', ''): if answer.replace("\n", "") != entry["answer"].replace("\n", ""):
return 0.01 return 0.01
else: else:
return 1.0 # Yay return 1.0 # Yay
register_dataset("game_of_life", GameOfLifeConfigDataset, GameOfLifeConfig) register_dataset("game_of_life", GameOfLifeDataset, GameOfLifeConfig)

View file

@ -3,7 +3,7 @@ from .quantum_lock import QuantumLockConfig, QuantumLockDataset
__all__ = [ __all__ = [
"FamilyRelationshipsConfig", "FamilyRelationshipsConfig",
"FamilyRelationshipsDataset", "FamilyRelationshipsDataset",
"QuantumLockConfig", "QuantumLockConfig",
"QuantumLockDataset", "QuantumLockDataset",
] ]

View file

@ -32,7 +32,7 @@ def generate_gallery() -> str:
# Add dataset header with anchor # Add dataset header with anchor
anchor = name.replace("_", "-").lower() anchor = name.replace("_", "-").lower()
content.append(f"### {name} {{{anchor}}}\n") content.append(f"### {name}\n")
# Get dataset class docstring if available # Get dataset class docstring if available
if dataset.__class__.__doc__: if dataset.__class__.__doc__:

View file

@ -2,6 +2,7 @@ import pytest
from reasoning_gym.code.bf import BFConfig, BFDataset from reasoning_gym.code.bf import BFConfig, BFDataset
def test_bf(): def test_bf():
"""Test basic properties and solution of generated items""" """Test basic properties and solution of generated items"""
@ -34,4 +35,4 @@ def test_bf():
config = BFConfig(seed=44, size=20, difficulty=3) config = BFConfig(seed=44, size=20, difficulty=3)
dataset = BFDataset(config) dataset = BFDataset(config)
for item in dataset: for item in dataset:
assert dataset.score_answer(answer=item["answer"], entry=item) == 1.0 assert dataset.score_answer(answer=item["answer"], entry=item) == 1.0

View file

@ -1,20 +1,14 @@
import pytest import pytest
from reasoning_gym.games.game_of_life import GameOfLifeConfig, GameOfLifeConfigDataset from reasoning_gym.games.game_of_life import GameOfLifeConfig, GameOfLifeDataset
def test_game_of_life(): def test_game_of_life():
"""Test basic properties and solution of generated items""" """Test basic properties and solution of generated items"""
# Easy # Easy
config = GameOfLifeConfig( config = GameOfLifeConfig(seed=42, size=1, grid_size_x=20, grid_size_y=20, filled_cells=10, simulation_steps=1)
seed=42, dataset = GameOfLifeDataset(config)
size=1,
grid_size_x=20,
grid_size_y=20,
filled_cells=10,
simulation_steps=1
)
dataset = GameOfLifeConfigDataset(config)
for item in dataset: for item in dataset:
assert isinstance(item, dict) assert isinstance(item, dict)