mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-23 16:55:05 +00:00
lint
This commit is contained in:
parent
21c47db6c1
commit
ebb88e6c6a
24 changed files with 1215 additions and 814 deletions
1143
GALLERY.md
1143
GALLERY.md
File diff suppressed because it is too large
Load diff
|
|
@ -7,7 +7,4 @@ Cognition tasks for training reasoning capabilities:
|
||||||
|
|
||||||
from .bf import BFConfig, BFDataset
|
from .bf import BFConfig, BFDataset
|
||||||
|
|
||||||
__all__ = [
|
__all__ = ["BFConfig", "BFDataset"]
|
||||||
"BFConfig",
|
|
||||||
"BFDataset"
|
|
||||||
]
|
|
||||||
|
|
|
||||||
|
|
@ -3,10 +3,10 @@ from random import Random
|
||||||
from typing import Dict, Optional
|
from typing import Dict, Optional
|
||||||
|
|
||||||
import bfi
|
import bfi
|
||||||
from .contrib.bfit.Compiler import Compiler, Minify
|
|
||||||
|
|
||||||
from ..data.wordle_words import wordle_words
|
from ..data.wordle_words import wordle_words
|
||||||
from ..factory import ProceduralDataset, register_dataset
|
from ..factory import ProceduralDataset, register_dataset
|
||||||
|
from .contrib.bfit.Compiler import Compiler, Minify
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|
@ -122,10 +122,11 @@ int main() {{
|
||||||
|
|
||||||
if answer == None:
|
if answer == None:
|
||||||
return 0.0
|
return 0.0
|
||||||
if answer != entry['answer']:
|
if answer != entry["answer"]:
|
||||||
return 0.01
|
return 0.01
|
||||||
else:
|
else:
|
||||||
return 1.0 # Yay
|
return 1.0 # Yay
|
||||||
|
|
||||||
|
|
||||||
# Register the dataset
|
# Register the dataset
|
||||||
register_dataset("bf", BFDataset, BFConfig)
|
register_dataset("bf", BFDataset, BFConfig)
|
||||||
|
|
|
||||||
|
|
@ -2,9 +2,9 @@
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import os
|
import os
|
||||||
|
|
||||||
import Interpreter
|
import Interpreter
|
||||||
from Compiler import Compiler
|
from Compiler import Compiler, Minify
|
||||||
from Compiler import Minify
|
|
||||||
|
|
||||||
|
|
||||||
def process_args():
|
def process_args():
|
||||||
|
|
@ -54,5 +54,5 @@ def compile_file():
|
||||||
Interpreter.brainfuck(brainfuck_code)
|
Interpreter.brainfuck(brainfuck_code)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
compile_file()
|
compile_file()
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,18 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
from .Exceptions import BFSyntaxError, BFSemanticError
|
from .Exceptions import BFSemanticError, BFSyntaxError
|
||||||
from .FunctionCompiler import FunctionCompiler
|
from .FunctionCompiler import FunctionCompiler
|
||||||
from .Functions import check_function_exists, get_function_object, insert_function_object
|
from .Functions import check_function_exists, get_function_object, insert_function_object
|
||||||
from .General import is_token_literal, get_literal_token_code, unpack_literal_tokens_to_array_dimensions
|
from .General import get_literal_token_code, is_token_literal, unpack_literal_tokens_to_array_dimensions
|
||||||
from .Globals import get_global_variables_size, get_variable_size, get_variable_dimensions, insert_global_variable, create_variable_from_definition
|
from .Globals import (
|
||||||
|
create_variable_from_definition,
|
||||||
|
get_global_variables_size,
|
||||||
|
get_variable_dimensions,
|
||||||
|
get_variable_size,
|
||||||
|
insert_global_variable,
|
||||||
|
)
|
||||||
from .Lexical_analyzer import analyze
|
from .Lexical_analyzer import analyze
|
||||||
from .Optimizer import optimize
|
|
||||||
from .LibraryFunctionCompiler import insert_library_functions
|
from .LibraryFunctionCompiler import insert_library_functions
|
||||||
|
from .Optimizer import optimize
|
||||||
from .Parser import Parser
|
from .Parser import Parser
|
||||||
from .Token import Token
|
from .Token import Token
|
||||||
|
|
||||||
|
|
@ -29,20 +35,24 @@ class Compiler:
|
||||||
# returns function named tuple
|
# returns function named tuple
|
||||||
|
|
||||||
if self.parser.current_token().type not in [Token.VOID, Token.INT]:
|
if self.parser.current_token().type not in [Token.VOID, Token.INT]:
|
||||||
raise BFSemanticError("Function return type can be either void or int, not '%s'" % str(self.parser.current_token()))
|
raise BFSemanticError(
|
||||||
|
"Function return type can be either void or int, not '%s'" % str(self.parser.current_token())
|
||||||
|
)
|
||||||
|
|
||||||
self.parser.check_next_tokens_are([Token.ID, Token.LPAREN])
|
self.parser.check_next_tokens_are([Token.ID, Token.LPAREN])
|
||||||
|
|
||||||
# save all tokens of this function
|
# save all tokens of this function
|
||||||
function_name = self.parser.next_token(next_amount=1).data
|
function_name = self.parser.next_token(next_amount=1).data
|
||||||
RPAREN_index = self.parser.find_matching(starting_index=self.parser.current_token_index+2) # first find RPAREN
|
RPAREN_index = self.parser.find_matching(
|
||||||
|
starting_index=self.parser.current_token_index + 2
|
||||||
|
) # first find RPAREN
|
||||||
self.parser.check_next_token_is(Token.LBRACE, starting_index=RPAREN_index)
|
self.parser.check_next_token_is(Token.LBRACE, starting_index=RPAREN_index)
|
||||||
RBRACE_index = self.parser.find_matching(starting_index=RPAREN_index+1) # then find RBRACE
|
RBRACE_index = self.parser.find_matching(starting_index=RPAREN_index + 1) # then find RBRACE
|
||||||
|
|
||||||
# take all tokens between INT and RBRACE and pass them to function object
|
# take all tokens between INT and RBRACE and pass them to function object
|
||||||
function_tokens = self.parser.tokens[self.parser.current_token_index:RBRACE_index+1]
|
function_tokens = self.parser.tokens[self.parser.current_token_index : RBRACE_index + 1]
|
||||||
# skip function definition
|
# skip function definition
|
||||||
self.parser.advance_to_token_at_index(RBRACE_index+1)
|
self.parser.advance_to_token_at_index(RBRACE_index + 1)
|
||||||
|
|
||||||
function = FunctionCompiler(function_name, function_tokens)
|
function = FunctionCompiler(function_name, function_tokens)
|
||||||
return function
|
return function
|
||||||
|
|
@ -60,12 +70,12 @@ class Compiler:
|
||||||
# if this is set to True, then the compiler zeros each cell before using it (may generate a lot of unnecessary BF code)
|
# if this is set to True, then the compiler zeros each cell before using it (may generate a lot of unnecessary BF code)
|
||||||
ZERO_CELLS_BEFORE_USE = False
|
ZERO_CELLS_BEFORE_USE = False
|
||||||
|
|
||||||
code = '[-]' if ZERO_CELLS_BEFORE_USE else ''
|
code = "[-]" if ZERO_CELLS_BEFORE_USE else ""
|
||||||
if get_variable_size(variable) > 1: # its an array
|
if get_variable_size(variable) > 1: # its an array
|
||||||
if self.parser.current_token().type == Token.SEMICOLON:
|
if self.parser.current_token().type == Token.SEMICOLON:
|
||||||
# array definition - INT ID (LBRACK NUM RBRACK)+ SEMICOLON
|
# array definition - INT ID (LBRACK NUM RBRACK)+ SEMICOLON
|
||||||
self.parser.advance_token() # skip SEMICOLON
|
self.parser.advance_token() # skip SEMICOLON
|
||||||
code = (code + '>') * get_variable_size(variable) # advance to after this variable
|
code = (code + ">") * get_variable_size(variable) # advance to after this variable
|
||||||
return code
|
return code
|
||||||
elif self.parser.current_token().type == Token.ASSIGN and self.parser.current_token().data == "=":
|
elif self.parser.current_token().type == Token.ASSIGN and self.parser.current_token().data == "=":
|
||||||
# array definition and initialization - INT ID (LBRACK NUM RBRACK)+ ASSIGN ((LBRACE ... RBRACE)+|STRING) SEMICOLON
|
# array definition and initialization - INT ID (LBRACK NUM RBRACK)+ ASSIGN ((LBRACE ... RBRACE)+|STRING) SEMICOLON
|
||||||
|
|
@ -79,25 +89,34 @@ class Compiler:
|
||||||
self.parser.advance_token() # skip SEMICOLON
|
self.parser.advance_token() # skip SEMICOLON
|
||||||
|
|
||||||
array_dimensions = get_variable_dimensions(variable)
|
array_dimensions = get_variable_dimensions(variable)
|
||||||
unpacked_literals_list = unpack_literal_tokens_to_array_dimensions(ID_token, array_dimensions, literal_tokens_list)
|
unpacked_literals_list = unpack_literal_tokens_to_array_dimensions(
|
||||||
|
ID_token, array_dimensions, literal_tokens_list
|
||||||
|
)
|
||||||
|
|
||||||
for literal in unpacked_literals_list:
|
for literal in unpacked_literals_list:
|
||||||
code += get_literal_token_code(literal) # evaluate this literal and point to next array element
|
code += get_literal_token_code(literal) # evaluate this literal and point to next array element
|
||||||
return code
|
return code
|
||||||
else:
|
else:
|
||||||
raise BFSyntaxError("Unexpected %s in array definition. Expected SEMICOLON (;) or ASSIGN (=)" % self.parser.current_token())
|
raise BFSyntaxError(
|
||||||
|
"Unexpected %s in array definition. Expected SEMICOLON (;) or ASSIGN (=)"
|
||||||
|
% self.parser.current_token()
|
||||||
|
)
|
||||||
|
|
||||||
elif self.parser.current_token().type == Token.SEMICOLON: # no need to initialize
|
elif self.parser.current_token().type == Token.SEMICOLON: # no need to initialize
|
||||||
self.parser.advance_token() # skip SEMICOLON
|
self.parser.advance_token() # skip SEMICOLON
|
||||||
code += '>' # advance to after this variable
|
code += ">" # advance to after this variable
|
||||||
else:
|
else:
|
||||||
self.parser.check_current_token_is(Token.ASSIGN)
|
self.parser.check_current_token_is(Token.ASSIGN)
|
||||||
if self.parser.current_token().data != "=":
|
if self.parser.current_token().data != "=":
|
||||||
raise BFSyntaxError("Unexpected %s when initializing global variable. Expected ASSIGN (=)" % self.parser.current_token())
|
raise BFSyntaxError(
|
||||||
|
"Unexpected %s when initializing global variable. Expected ASSIGN (=)" % self.parser.current_token()
|
||||||
|
)
|
||||||
self.parser.advance_token() # skip ASSIGN
|
self.parser.advance_token() # skip ASSIGN
|
||||||
|
|
||||||
if not is_token_literal(self.parser.current_token()):
|
if not is_token_literal(self.parser.current_token()):
|
||||||
raise BFSemanticError("Unexpected '%s'. expected literal (NUM | CHAR | TRUE | FALSE )" % str(self.parser.current_token()))
|
raise BFSemanticError(
|
||||||
|
"Unexpected '%s'. expected literal (NUM | CHAR | TRUE | FALSE )" % str(self.parser.current_token())
|
||||||
|
)
|
||||||
|
|
||||||
code += get_literal_token_code(self.parser.current_token())
|
code += get_literal_token_code(self.parser.current_token())
|
||||||
|
|
||||||
|
|
@ -113,7 +132,7 @@ class Compiler:
|
||||||
When encountering global variable definition - create Variable object
|
When encountering global variable definition - create Variable object
|
||||||
Returns code that initializes global variables and advances the pointer to after them
|
Returns code that initializes global variables and advances the pointer to after them
|
||||||
"""
|
"""
|
||||||
code = ''
|
code = ""
|
||||||
token = self.parser.current_token()
|
token = self.parser.current_token()
|
||||||
while token is not None and token.type in [Token.VOID, Token.INT, Token.SEMICOLON]:
|
while token is not None and token.type in [Token.VOID, Token.INT, Token.SEMICOLON]:
|
||||||
if token.type == Token.SEMICOLON: # can have random semicolons ;)
|
if token.type == Token.SEMICOLON: # can have random semicolons ;)
|
||||||
|
|
@ -125,22 +144,31 @@ class Compiler:
|
||||||
if self.parser.next_token(next_amount=2).type == Token.LPAREN:
|
if self.parser.next_token(next_amount=2).type == Token.LPAREN:
|
||||||
function = self.create_function_object()
|
function = self.create_function_object()
|
||||||
insert_function_object(function)
|
insert_function_object(function)
|
||||||
elif token.type is Token.INT and self.parser.next_token(next_amount=2).type in [Token.SEMICOLON, Token.ASSIGN, Token.LBRACK]:
|
elif token.type is Token.INT and self.parser.next_token(next_amount=2).type in [
|
||||||
|
Token.SEMICOLON,
|
||||||
|
Token.ASSIGN,
|
||||||
|
Token.LBRACK,
|
||||||
|
]:
|
||||||
code += self.compile_global_variable_definition()
|
code += self.compile_global_variable_definition()
|
||||||
else:
|
else:
|
||||||
raise BFSyntaxError("Unexpected '%s' after '%s'. Expected '(' (function definition) or one of: '=', ';', '[' (global variable definition)" % (str(self.parser.next_token(next_amount=2)), str(self.parser.next_token())))
|
raise BFSyntaxError(
|
||||||
|
"Unexpected '%s' after '%s'. Expected '(' (function definition) or one of: '=', ';', '[' (global variable definition)"
|
||||||
|
% (str(self.parser.next_token(next_amount=2)), str(self.parser.next_token()))
|
||||||
|
)
|
||||||
|
|
||||||
token = self.parser.current_token()
|
token = self.parser.current_token()
|
||||||
|
|
||||||
if self.parser.current_token() is not None: # we have not reached the last token
|
if self.parser.current_token() is not None: # we have not reached the last token
|
||||||
untouched_tokens = [str(t) for t in self.parser.tokens[self.parser.current_token_index:]]
|
untouched_tokens = [str(t) for t in self.parser.tokens[self.parser.current_token_index :]]
|
||||||
raise BFSyntaxError("Did not reach the end of the code. Untouched tokens:\n%s" % untouched_tokens)
|
raise BFSyntaxError("Did not reach the end of the code. Untouched tokens:\n%s" % untouched_tokens)
|
||||||
|
|
||||||
return code
|
return code
|
||||||
|
|
||||||
def compile(self):
|
def compile(self):
|
||||||
insert_library_functions()
|
insert_library_functions()
|
||||||
code = self.process_global_definitions() # code that initializes global variables and advances pointer to after them
|
code = (
|
||||||
|
self.process_global_definitions()
|
||||||
|
) # code that initializes global variables and advances pointer to after them
|
||||||
|
|
||||||
check_function_exists(Token(Token.ID, 0, 0, "main"), 0)
|
check_function_exists(Token(Token.ID, 0, 0, "main"), 0)
|
||||||
code += get_function_object("main").get_code(get_global_variables_size())
|
code += get_function_object("main").get_code(get_global_variables_size())
|
||||||
|
|
@ -159,7 +187,7 @@ def compile(code, optimize_code=False):
|
||||||
return brainfuck_code
|
return brainfuck_code
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
print("This file cannot be directly run")
|
print("This file cannot be directly run")
|
||||||
print("Please import it and use the 'compile' function")
|
print("Please import it and use the 'compile' function")
|
||||||
print("Which receives a C-like code (string) and returns Brainfuck code (string)")
|
print("Which receives a C-like code (string) and returns Brainfuck code (string)")
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,28 @@
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
from functools import reduce
|
from functools import reduce
|
||||||
from .Exceptions import BFSyntaxError, BFSemanticError
|
|
||||||
|
from .Exceptions import BFSemanticError, BFSyntaxError
|
||||||
from .Functions import check_function_exists, get_function_object
|
from .Functions import check_function_exists, get_function_object
|
||||||
from .General import get_variable_dimensions_from_token, get_move_to_return_value_cell_code, get_print_string_code, get_variable_from_ID_token
|
from .General import (
|
||||||
from .General import get_literal_token_value, process_switch_cases, is_token_literal
|
get_literal_token_value,
|
||||||
|
get_move_to_return_value_cell_code,
|
||||||
|
get_print_string_code,
|
||||||
|
get_variable_dimensions_from_token,
|
||||||
|
get_variable_from_ID_token,
|
||||||
|
is_token_literal,
|
||||||
|
process_switch_cases,
|
||||||
|
)
|
||||||
from .Globals import create_variable_from_definition, get_global_variables, get_variable_size, is_variable_array
|
from .Globals import create_variable_from_definition, get_global_variables, get_variable_size, is_variable_array
|
||||||
from .Node import NodeToken, NodeTernary, NodeArraySetElement, NodeUnaryPrefix, NodeUnaryPostfix, NodeArrayGetElement, NodeFunctionCall, NodeArrayAssignment
|
from .Node import (
|
||||||
|
NodeArrayAssignment,
|
||||||
|
NodeArrayGetElement,
|
||||||
|
NodeArraySetElement,
|
||||||
|
NodeFunctionCall,
|
||||||
|
NodeTernary,
|
||||||
|
NodeToken,
|
||||||
|
NodeUnaryPostfix,
|
||||||
|
NodeUnaryPrefix,
|
||||||
|
)
|
||||||
from .Parser import Parser
|
from .Parser import Parser
|
||||||
from .Token import Token
|
from .Token import Token
|
||||||
|
|
||||||
|
|
@ -83,7 +100,9 @@ class FunctionCompiler:
|
||||||
# new stack pointer should be at least that size
|
# new stack pointer should be at least that size
|
||||||
assert self.current_stack_pointer() <= current_stack_pointer
|
assert self.current_stack_pointer() <= current_stack_pointer
|
||||||
self.return_value_cell = current_stack_pointer
|
self.return_value_cell = current_stack_pointer
|
||||||
self.set_stack_pointer(current_stack_pointer+1) # make room for return_value cell. next available cell is the next one after it.
|
self.set_stack_pointer(
|
||||||
|
current_stack_pointer + 1
|
||||||
|
) # make room for return_value cell. next available cell is the next one after it.
|
||||||
function_code = self.compile_function_scope(self.parameters)
|
function_code = self.compile_function_scope(self.parameters)
|
||||||
self.remove_ids_map() # Global variables
|
self.remove_ids_map() # Global variables
|
||||||
return function_code
|
return function_code
|
||||||
|
|
@ -123,8 +142,12 @@ class FunctionCompiler:
|
||||||
|
|
||||||
# multiply by next dimensions sizes
|
# multiply by next dimensions sizes
|
||||||
multiply_amount = reduce(lambda x, y: x * y, dimensions[1:]) # size of the following dimensions
|
multiply_amount = reduce(lambda x, y: x * y, dimensions[1:]) # size of the following dimensions
|
||||||
node_token_multiply_amount = NodeToken(self.ids_map_list, token=Token(Token.NUM, ID_token.line, ID_token.column, data=str(multiply_amount)))
|
node_token_multiply_amount = NodeToken(
|
||||||
index_expression = NodeToken(self.ids_map_list, token=multiply_token, left=first_index_expression, right=node_token_multiply_amount)
|
self.ids_map_list, token=Token(Token.NUM, ID_token.line, ID_token.column, data=str(multiply_amount))
|
||||||
|
)
|
||||||
|
index_expression = NodeToken(
|
||||||
|
self.ids_map_list, token=multiply_token, left=first_index_expression, right=node_token_multiply_amount
|
||||||
|
)
|
||||||
|
|
||||||
# handle next dimensions
|
# handle next dimensions
|
||||||
dimension = 1
|
dimension = 1
|
||||||
|
|
@ -132,8 +155,10 @@ class FunctionCompiler:
|
||||||
if self.parser.current_token().type != Token.LBRACK: # too few indexes given...
|
if self.parser.current_token().type != Token.LBRACK: # too few indexes given...
|
||||||
if dimension == 1:
|
if dimension == 1:
|
||||||
return first_index_expression # allow use of only one dimension for multi-dimensional array
|
return first_index_expression # allow use of only one dimension for multi-dimensional array
|
||||||
raise BFSemanticError("%s is a %s-dimensional array, but only %s dimension(s) given as index" %
|
raise BFSemanticError(
|
||||||
(str(ID_token), len(dimensions), dimension))
|
"%s is a %s-dimensional array, but only %s dimension(s) given as index"
|
||||||
|
% (str(ID_token), len(dimensions), dimension)
|
||||||
|
)
|
||||||
self.parser.check_current_token_is(Token.LBRACK)
|
self.parser.check_current_token_is(Token.LBRACK)
|
||||||
self.parser.advance_token() # skip LBRACK
|
self.parser.advance_token() # skip LBRACK
|
||||||
exp = self.expression()
|
exp = self.expression()
|
||||||
|
|
@ -143,19 +168,30 @@ class FunctionCompiler:
|
||||||
|
|
||||||
# current_dimension_index *= size_of_following_dimensions
|
# current_dimension_index *= size_of_following_dimensions
|
||||||
if dimension + 1 < len(dimensions): # not last dimension - need to multiply and add
|
if dimension + 1 < len(dimensions): # not last dimension - need to multiply and add
|
||||||
multiply_amount = reduce(lambda x, y: x * y, dimensions[dimension + 1:]) # size of the following dimensions
|
multiply_amount = reduce(
|
||||||
node_token_multiply_amount = NodeToken(self.ids_map_list, token=Token(Token.NUM, ID_token.line, ID_token.column, data=str(multiply_amount)))
|
lambda x, y: x * y, dimensions[dimension + 1 :]
|
||||||
multiply_node = NodeToken(self.ids_map_list, token=multiply_token, left=exp, right=node_token_multiply_amount)
|
) # size of the following dimensions
|
||||||
|
node_token_multiply_amount = NodeToken(
|
||||||
|
self.ids_map_list,
|
||||||
|
token=Token(Token.NUM, ID_token.line, ID_token.column, data=str(multiply_amount)),
|
||||||
|
)
|
||||||
|
multiply_node = NodeToken(
|
||||||
|
self.ids_map_list, token=multiply_token, left=exp, right=node_token_multiply_amount
|
||||||
|
)
|
||||||
|
|
||||||
# prev_dimensions_index += current_dimension_index
|
# prev_dimensions_index += current_dimension_index
|
||||||
index_expression = NodeToken(self.ids_map_list, token=add_token, left=index_expression, right=multiply_node)
|
index_expression = NodeToken(
|
||||||
|
self.ids_map_list, token=add_token, left=index_expression, right=multiply_node
|
||||||
|
)
|
||||||
else: # last dimension - no need to multiply, just add
|
else: # last dimension - no need to multiply, just add
|
||||||
index_expression = NodeToken(self.ids_map_list, token=add_token, left=index_expression, right=exp)
|
index_expression = NodeToken(self.ids_map_list, token=add_token, left=index_expression, right=exp)
|
||||||
dimension += 1
|
dimension += 1
|
||||||
|
|
||||||
if self.parser.current_token().type == Token.LBRACK: # too many indexes given...
|
if self.parser.current_token().type == Token.LBRACK: # too many indexes given...
|
||||||
raise BFSemanticError("%s is a %s-dimensional array. Unexpected %s" %
|
raise BFSemanticError(
|
||||||
(str(ID_token), len(dimensions), self.parser.current_token()))
|
"%s is a %s-dimensional array. Unexpected %s"
|
||||||
|
% (str(ID_token), len(dimensions), self.parser.current_token())
|
||||||
|
)
|
||||||
return index_expression
|
return index_expression
|
||||||
|
|
||||||
def get_token_after_array_access(self, offset=0):
|
def get_token_after_array_access(self, offset=0):
|
||||||
|
|
@ -193,12 +229,18 @@ class FunctionCompiler:
|
||||||
|
|
||||||
if self.parser.next_token().type == Token.SEMICOLON: # INT ID SEMICOLON
|
if self.parser.next_token().type == Token.SEMICOLON: # INT ID SEMICOLON
|
||||||
self.parser.advance_token(2) # skip ID SEMICOLON
|
self.parser.advance_token(2) # skip ID SEMICOLON
|
||||||
return '' # no code is generated here. code was generated for defining this variable when we entered the scope
|
return (
|
||||||
|
"" # no code is generated here. code was generated for defining this variable when we entered the scope
|
||||||
|
)
|
||||||
|
|
||||||
elif self.parser.next_token().type == Token.ASSIGN and self.parser.next_token().data == "=": # INT ID = EXPRESSION SEMICOLON
|
elif (
|
||||||
|
self.parser.next_token().type == Token.ASSIGN and self.parser.next_token().data == "="
|
||||||
|
): # INT ID = EXPRESSION SEMICOLON
|
||||||
return self.compile_expression_as_statement() # compile_expression_as_statement skips the SEMICOLON
|
return self.compile_expression_as_statement() # compile_expression_as_statement skips the SEMICOLON
|
||||||
|
|
||||||
elif self.parser.next_token().type == Token.LBRACK: # INT ID (LBRACK NUM RBRACK)+ (= ARRAY_INITIALIZATION)? SEMICOLON
|
elif (
|
||||||
|
self.parser.next_token().type == Token.LBRACK
|
||||||
|
): # INT ID (LBRACK NUM RBRACK)+ (= ARRAY_INITIALIZATION)? SEMICOLON
|
||||||
# array definition (int arr[2][3]...[];) or array definition and initialization (arr[2][3]...[] = {...};)
|
# array definition (int arr[2][3]...[];) or array definition and initialization (arr[2][3]...[] = {...};)
|
||||||
token_id = self.parser.current_token()
|
token_id = self.parser.current_token()
|
||||||
self.parser.advance_token() # skip ID
|
self.parser.advance_token() # skip ID
|
||||||
|
|
@ -210,7 +252,7 @@ class FunctionCompiler:
|
||||||
initialization_node = self.compile_array_assignment(token_id)
|
initialization_node = self.compile_array_assignment(token_id)
|
||||||
code = initialization_node.get_code(self.current_stack_pointer()) + "<" # discard expression value
|
code = initialization_node.get_code(self.current_stack_pointer()) + "<" # discard expression value
|
||||||
else:
|
else:
|
||||||
code = '' # just array definition
|
code = "" # just array definition
|
||||||
# no code is generated here. code was generated for defining this variable when we entered the scope
|
# no code is generated here. code was generated for defining this variable when we entered the scope
|
||||||
self.parser.check_current_token_is(Token.SEMICOLON)
|
self.parser.check_current_token_is(Token.SEMICOLON)
|
||||||
self.parser.advance_token() # skip SEMICOLON
|
self.parser.advance_token() # skip SEMICOLON
|
||||||
|
|
@ -297,7 +339,9 @@ class FunctionCompiler:
|
||||||
token = self.tokens[i]
|
token = self.tokens[i]
|
||||||
|
|
||||||
if token.type == Token.INT:
|
if token.type == Token.INT:
|
||||||
if self.tokens[i-2].type != Token.FOR: # if it is not a definition inside a FOR statement (for (int i = 0...))
|
if (
|
||||||
|
self.tokens[i - 2].type != Token.FOR
|
||||||
|
): # if it is not a definition inside a FOR statement (for (int i = 0...))
|
||||||
variable = create_variable_from_definition(self.parser, index=i)
|
variable = create_variable_from_definition(self.parser, index=i)
|
||||||
self.insert_to_ids_map(variable)
|
self.insert_to_ids_map(variable)
|
||||||
|
|
||||||
|
|
@ -333,7 +377,7 @@ class FunctionCompiler:
|
||||||
for parameter in parameters:
|
for parameter in parameters:
|
||||||
self.insert_to_ids_map(parameter)
|
self.insert_to_ids_map(parameter)
|
||||||
|
|
||||||
code = '>' # skip return_value_cell
|
code = ">" # skip return_value_cell
|
||||||
code += self.insert_scope_variables_into_ids_map()
|
code += self.insert_scope_variables_into_ids_map()
|
||||||
# this inserts scope variables AND moves pointer right, with the amount of BOTH parameters and scope variables
|
# this inserts scope variables AND moves pointer right, with the amount of BOTH parameters and scope variables
|
||||||
|
|
||||||
|
|
@ -377,7 +421,9 @@ class FunctionCompiler:
|
||||||
if token.type == Token.ID and self.parser.next_token().type == Token.LPAREN:
|
if token.type == Token.ID and self.parser.next_token().type == Token.LPAREN:
|
||||||
return self.function_call()
|
return self.function_call()
|
||||||
|
|
||||||
if token.type == Token.ID and self.parser.next_token().type == Token.LBRACK: # array - ID(LBRACK expression RBRACK)+
|
if (
|
||||||
|
token.type == Token.ID and self.parser.next_token().type == Token.LBRACK
|
||||||
|
): # array - ID(LBRACK expression RBRACK)+
|
||||||
index_expression = self.get_array_index_expression()
|
index_expression = self.get_array_index_expression()
|
||||||
return NodeArrayGetElement(self.ids_map_list, token, index_expression)
|
return NodeArrayGetElement(self.ids_map_list, token, index_expression)
|
||||||
|
|
||||||
|
|
@ -386,7 +432,10 @@ class FunctionCompiler:
|
||||||
return NodeToken(self.ids_map_list, token=token)
|
return NodeToken(self.ids_map_list, token=token)
|
||||||
|
|
||||||
if token.type != Token.LPAREN:
|
if token.type != Token.LPAREN:
|
||||||
raise BFSyntaxError("Unexpected '%s'. expected literal (NUM | ID | ID(LBRACK expression RBRACK)+ | TRUE | FALSE | function_call | ( expression ))" % str(token))
|
raise BFSyntaxError(
|
||||||
|
"Unexpected '%s'. expected literal (NUM | ID | ID(LBRACK expression RBRACK)+ | TRUE | FALSE | function_call | ( expression ))"
|
||||||
|
% str(token)
|
||||||
|
)
|
||||||
|
|
||||||
# ( expression )
|
# ( expression )
|
||||||
self.parser.check_current_token_is(Token.LPAREN)
|
self.parser.check_current_token_is(Token.LPAREN)
|
||||||
|
|
@ -417,7 +466,9 @@ class FunctionCompiler:
|
||||||
|
|
||||||
if token.type in [Token.NOT, Token.BITWISE_NOT, Token.BINOP]:
|
if token.type in [Token.NOT, Token.BITWISE_NOT, Token.BINOP]:
|
||||||
if token.type == Token.BINOP and token.data not in ["+", "-"]:
|
if token.type == Token.BINOP and token.data not in ["+", "-"]:
|
||||||
raise BFSyntaxError("Expected either + or - as unary prefix instead of token %s" % self.parser.current_token())
|
raise BFSyntaxError(
|
||||||
|
"Expected either + or - as unary prefix instead of token %s" % self.parser.current_token()
|
||||||
|
)
|
||||||
self.parser.advance_token()
|
self.parser.advance_token()
|
||||||
unary_prefix = self.unary_prefix()
|
unary_prefix = self.unary_prefix()
|
||||||
|
|
||||||
|
|
@ -618,11 +669,19 @@ class FunctionCompiler:
|
||||||
|
|
||||||
expression_node = self.expression()
|
expression_node = self.expression()
|
||||||
|
|
||||||
new_node = NodeToken(self.ids_map_list, left=NodeToken(self.ids_map_list, token=id_token), token=assign_token, right=expression_node)
|
new_node = NodeToken(
|
||||||
|
self.ids_map_list,
|
||||||
|
left=NodeToken(self.ids_map_list, token=id_token),
|
||||||
|
token=assign_token,
|
||||||
|
right=expression_node,
|
||||||
|
)
|
||||||
return new_node
|
return new_node
|
||||||
|
|
||||||
elif self.parser.current_token().type == Token.ID and self.parser.next_token().type == Token.LBRACK and \
|
elif (
|
||||||
self.get_token_after_array_access().type == Token.ASSIGN:
|
self.parser.current_token().type == Token.ID
|
||||||
|
and self.parser.next_token().type == Token.LBRACK
|
||||||
|
and self.get_token_after_array_access().type == Token.ASSIGN
|
||||||
|
):
|
||||||
# ID (LBRACK expression RBRACK)+ ASSIGN value_expression
|
# ID (LBRACK expression RBRACK)+ ASSIGN value_expression
|
||||||
id_token = self.parser.current_token()
|
id_token = self.parser.current_token()
|
||||||
index_expression = self.get_array_index_expression()
|
index_expression = self.get_array_index_expression()
|
||||||
|
|
@ -744,7 +803,7 @@ class FunctionCompiler:
|
||||||
if self.parser.current_token().type == Token.SEMICOLON:
|
if self.parser.current_token().type == Token.SEMICOLON:
|
||||||
# return;
|
# return;
|
||||||
self.parser.advance_token() # skip ;
|
self.parser.advance_token() # skip ;
|
||||||
return '' # nothing to do
|
return "" # nothing to do
|
||||||
|
|
||||||
# return exp;
|
# return exp;
|
||||||
expression_code = self.compile_expression()
|
expression_code = self.compile_expression()
|
||||||
|
|
@ -763,7 +822,12 @@ class FunctionCompiler:
|
||||||
# this expression can be used as a statement.
|
# this expression can be used as a statement.
|
||||||
# e.g: x+=5; or x++ or ++x;
|
# e.g: x+=5; or x++ or ++x;
|
||||||
|
|
||||||
assert self.parser.current_token().type in [Token.ID, Token.INCREMENT, Token.DECREMENT, Token.UNARY_MULTIPLICATIVE]
|
assert self.parser.current_token().type in [
|
||||||
|
Token.ID,
|
||||||
|
Token.INCREMENT,
|
||||||
|
Token.DECREMENT,
|
||||||
|
Token.UNARY_MULTIPLICATIVE,
|
||||||
|
]
|
||||||
|
|
||||||
code = self.compile_expression()
|
code = self.compile_expression()
|
||||||
self.parser.check_current_token_is(Token.SEMICOLON)
|
self.parser.check_current_token_is(Token.SEMICOLON)
|
||||||
|
|
@ -901,7 +965,10 @@ class FunctionCompiler:
|
||||||
self.increase_stack_pointer() # use 1 additional temp cell for indicating we need to execute a case
|
self.increase_stack_pointer() # use 1 additional temp cell for indicating we need to execute a case
|
||||||
cases = list() # list of tuples: (value/"default" (int or string), case_code (string), has_break(bool))
|
cases = list() # list of tuples: (value/"default" (int or string), case_code (string), has_break(bool))
|
||||||
|
|
||||||
while self.parser.current_token().type in [Token.CASE, Token.DEFAULT]: # (default | CASE literal) COLON statement* break;? statements*
|
while self.parser.current_token().type in [
|
||||||
|
Token.CASE,
|
||||||
|
Token.DEFAULT,
|
||||||
|
]: # (default | CASE literal) COLON statement* break;? statements*
|
||||||
if self.parser.current_token().type == Token.CASE:
|
if self.parser.current_token().type == Token.CASE:
|
||||||
self.parser.advance_token() # skip CASE
|
self.parser.advance_token() # skip CASE
|
||||||
constant_value_token = self.parser.current_token()
|
constant_value_token = self.parser.current_token()
|
||||||
|
|
@ -922,7 +989,9 @@ class FunctionCompiler:
|
||||||
|
|
||||||
inner_case_code = ""
|
inner_case_code = ""
|
||||||
while self.parser.current_token().type not in [Token.CASE, Token.DEFAULT, Token.RBRACE, Token.BREAK]:
|
while self.parser.current_token().type not in [Token.CASE, Token.DEFAULT, Token.RBRACE, Token.BREAK]:
|
||||||
inner_case_code += self.compile_statement(allow_declaration=False) # not allowed to declare variables directly inside case
|
inner_case_code += self.compile_statement(
|
||||||
|
allow_declaration=False
|
||||||
|
) # not allowed to declare variables directly inside case
|
||||||
|
|
||||||
has_break = False
|
has_break = False
|
||||||
if self.parser.current_token().type == Token.BREAK: # ignore all statements after break
|
if self.parser.current_token().type == Token.BREAK: # ignore all statements after break
|
||||||
|
|
@ -934,7 +1003,9 @@ class FunctionCompiler:
|
||||||
cases.append((value, inner_case_code, has_break))
|
cases.append((value, inner_case_code, has_break))
|
||||||
|
|
||||||
if self.parser.current_token().type not in [Token.CASE, Token.DEFAULT, Token.RBRACE]:
|
if self.parser.current_token().type not in [Token.CASE, Token.DEFAULT, Token.RBRACE]:
|
||||||
raise BFSyntaxError("Expected case / default / RBRACE (}) instead of token %s" % self.parser.current_token())
|
raise BFSyntaxError(
|
||||||
|
"Expected case / default / RBRACE (}) instead of token %s" % self.parser.current_token()
|
||||||
|
)
|
||||||
self.parser.check_current_token_is(Token.RBRACE)
|
self.parser.check_current_token_is(Token.RBRACE)
|
||||||
self.parser.advance_token()
|
self.parser.advance_token()
|
||||||
self.decrease_stack_pointer(amount=2)
|
self.decrease_stack_pointer(amount=2)
|
||||||
|
|
@ -943,7 +1014,10 @@ class FunctionCompiler:
|
||||||
|
|
||||||
def compile_break(self):
|
def compile_break(self):
|
||||||
# TODO: Make the break statement in scopes inside switch-case (including if/else), and for/do/while
|
# TODO: Make the break statement in scopes inside switch-case (including if/else), and for/do/while
|
||||||
raise NotImplementedError("Break statement found outside of switch case first scope.\nBreak is not currently implemented for while/for/do statements.\nToken is %s" % self.parser.current_token())
|
raise NotImplementedError(
|
||||||
|
"Break statement found outside of switch case first scope.\nBreak is not currently implemented for while/for/do statements.\nToken is %s"
|
||||||
|
% self.parser.current_token()
|
||||||
|
)
|
||||||
|
|
||||||
def compile_for(self):
|
def compile_for(self):
|
||||||
# for (statement expression; expression) inner_scope_code note: statement contains ;, and inner_scope_code can be scope { }
|
# for (statement expression; expression) inner_scope_code note: statement contains ;, and inner_scope_code can be scope { }
|
||||||
|
|
@ -951,17 +1025,17 @@ class FunctionCompiler:
|
||||||
# (the statement cannot contain scope - { and } )
|
# (the statement cannot contain scope - { and } )
|
||||||
|
|
||||||
"""
|
"""
|
||||||
<for> is a special case of scope
|
<for> is a special case of scope
|
||||||
the initial code (int i = 0;) is executed INSIDE the scope, but BEFORE the LBRACE
|
the initial code (int i = 0;) is executed INSIDE the scope, but BEFORE the LBRACE
|
||||||
so we manually compile the scope instead of using self.compile_scope():
|
so we manually compile the scope instead of using self.compile_scope():
|
||||||
|
|
||||||
we first create an ids map, and in the case that there is a variable definition inside the <for> definition:
|
we first create an ids map, and in the case that there is a variable definition inside the <for> definition:
|
||||||
we manually insert the ID into the ids map, and move the pointer to the right once, to make room for it
|
we manually insert the ID into the ids map, and move the pointer to the right once, to make room for it
|
||||||
(this needs to be done before the <for> definition's statement)
|
(this needs to be done before the <for> definition's statement)
|
||||||
next, inside the for's scope {}:
|
next, inside the for's scope {}:
|
||||||
after calling insert_scope_variables_into_ids_map, we move the pointer to the left once, since it counts the ID we entered manually as well
|
after calling insert_scope_variables_into_ids_map, we move the pointer to the left once, since it counts the ID we entered manually as well
|
||||||
after calling exit_scope, we move the pointer to the right, since it counts the ID we entered manually, and we don't want it to be discarded after every iteration
|
after calling exit_scope, we move the pointer to the right, since it counts the ID we entered manually, and we don't want it to be discarded after every iteration
|
||||||
finally, at the end of the <for> loop, we move the pointer once to the left, to discard the variable we defined manually
|
finally, at the end of the <for> loop, we move the pointer once to the left, to discard the variable we defined manually
|
||||||
"""
|
"""
|
||||||
|
|
||||||
self.parser.check_current_tokens_are([Token.FOR, Token.LPAREN])
|
self.parser.check_current_tokens_are([Token.FOR, Token.LPAREN])
|
||||||
|
|
@ -969,7 +1043,7 @@ class FunctionCompiler:
|
||||||
|
|
||||||
manually_inserted_variable_in_for_definition = False
|
manually_inserted_variable_in_for_definition = False
|
||||||
variable = None
|
variable = None
|
||||||
code = ''
|
code = ""
|
||||||
|
|
||||||
# =============== enter FOR scope ===============
|
# =============== enter FOR scope ===============
|
||||||
self.add_ids_map()
|
self.add_ids_map()
|
||||||
|
|
@ -987,7 +1061,10 @@ class FunctionCompiler:
|
||||||
show_side_effect_warning = self.get_token_after_array_access(offset=1).type != Token.ASSIGN
|
show_side_effect_warning = self.get_token_after_array_access(offset=1).type != Token.ASSIGN
|
||||||
|
|
||||||
if show_side_effect_warning:
|
if show_side_effect_warning:
|
||||||
print("[Warning] For loop variable '%s' isn't assigned to anything and may cause side effects" % self.parser.next_token())
|
print(
|
||||||
|
"[Warning] For loop variable '%s' isn't assigned to anything and may cause side effects"
|
||||||
|
% self.parser.next_token()
|
||||||
|
)
|
||||||
|
|
||||||
if self.parser.current_token().type == Token.LBRACE: # statement is a scope
|
if self.parser.current_token().type == Token.LBRACE: # statement is a scope
|
||||||
raise BFSyntaxError("Unexpected scope inside for loop statement - %s" % self.parser.current_token())
|
raise BFSyntaxError("Unexpected scope inside for loop statement - %s" % self.parser.current_token())
|
||||||
|
|
@ -1042,20 +1119,31 @@ class FunctionCompiler:
|
||||||
token = self.parser.current_token()
|
token = self.parser.current_token()
|
||||||
if token.type == Token.INT: # INT ID ((= EXPRESSION) | ([NUM])+ (= ARRAY_INITIALIZATION)?)? SEMICOLON
|
if token.type == Token.INT: # INT ID ((= EXPRESSION) | ([NUM])+ (= ARRAY_INITIALIZATION)?)? SEMICOLON
|
||||||
if not allow_declaration:
|
if not allow_declaration:
|
||||||
raise BFSemanticError("Cannot define variable (%s) directly inside case. "
|
raise BFSemanticError(
|
||||||
"Can define inside new scope {} or outside the switch statement" % token)
|
"Cannot define variable (%s) directly inside case. "
|
||||||
|
"Can define inside new scope {} or outside the switch statement" % token
|
||||||
|
)
|
||||||
return self.compile_variable_declaration()
|
return self.compile_variable_declaration()
|
||||||
|
|
||||||
elif token.type in [Token.INCREMENT, Token.DECREMENT, Token.UNARY_MULTIPLICATIVE]: # ++ID;
|
elif token.type in [Token.INCREMENT, Token.DECREMENT, Token.UNARY_MULTIPLICATIVE]: # ++ID;
|
||||||
return self.compile_expression_as_statement()
|
return self.compile_expression_as_statement()
|
||||||
|
|
||||||
elif token.type == Token.ID:
|
elif token.type == Token.ID:
|
||||||
if self.parser.next_token().type in [Token.ASSIGN, Token.LBRACK, Token.INCREMENT, Token.DECREMENT, Token.UNARY_MULTIPLICATIVE]:
|
if self.parser.next_token().type in [
|
||||||
|
Token.ASSIGN,
|
||||||
|
Token.LBRACK,
|
||||||
|
Token.INCREMENT,
|
||||||
|
Token.DECREMENT,
|
||||||
|
Token.UNARY_MULTIPLICATIVE,
|
||||||
|
]:
|
||||||
# ID ASSIGN expression; or ID([expression])+ ASSIGN expression; or ID++;
|
# ID ASSIGN expression; or ID([expression])+ ASSIGN expression; or ID++;
|
||||||
return self.compile_expression_as_statement()
|
return self.compile_expression_as_statement()
|
||||||
elif self.parser.next_token().type == Token.LPAREN: # ID(...); (function call)
|
elif self.parser.next_token().type == Token.LPAREN: # ID(...); (function call)
|
||||||
return self.compile_function_call_statement()
|
return self.compile_function_call_statement()
|
||||||
raise BFSyntaxError("Unexpected '%s' after '%s'. Expected '=|+=|-=|*=|/=|%%=|<<=|>>=|&=|(|=)|^=' (assignment), '++|--' (modification) or '(' (function call)" % (str(self.parser.next_token()), str(token)))
|
raise BFSyntaxError(
|
||||||
|
"Unexpected '%s' after '%s'. Expected '=|+=|-=|*=|/=|%%=|<<=|>>=|&=|(|=)|^=' (assignment), '++|--' (modification) or '(' (function call)"
|
||||||
|
% (str(self.parser.next_token()), str(token))
|
||||||
|
)
|
||||||
|
|
||||||
elif token.type == Token.PRINT:
|
elif token.type == Token.PRINT:
|
||||||
return self.compile_print_string()
|
return self.compile_print_string()
|
||||||
|
|
@ -1097,7 +1185,7 @@ class FunctionCompiler:
|
||||||
def compile_scope_statements(self):
|
def compile_scope_statements(self):
|
||||||
tokens = self.tokens
|
tokens = self.tokens
|
||||||
|
|
||||||
code = ''
|
code = ""
|
||||||
while self.parser.current_token() is not None:
|
while self.parser.current_token() is not None:
|
||||||
if self.parser.current_token().type == Token.RBRACE:
|
if self.parser.current_token().type == Token.RBRACE:
|
||||||
# we reached the end of our scope
|
# we reached the end of our scope
|
||||||
|
|
@ -1124,29 +1212,29 @@ class FunctionCompiler:
|
||||||
# will be inserted into the new scope prior to the scope's compilation
|
# will be inserted into the new scope prior to the scope's compilation
|
||||||
|
|
||||||
"""
|
"""
|
||||||
example layout:
|
example layout:
|
||||||
int global_var1;
|
int global_var1;
|
||||||
int global_var2;
|
int global_var2;
|
||||||
int foo(int a, int b) {
|
int foo(int a, int b) {
|
||||||
int x;
|
int x;
|
||||||
int y;
|
int y;
|
||||||
return 5;
|
return 5;
|
||||||
}
|
}
|
||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
int n;
|
int n;
|
||||||
foo(1, 2);
|
foo(1, 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
global_var1 global_var2 main_return_value n foo_return_value a=1 b=2 x y
|
global_var1 global_var2 main_return_value n foo_return_value a=1 b=2 x y
|
||||||
|
|
||||||
calling convention:
|
calling convention:
|
||||||
caller responsibility: make room for return_value (and zero its cell), place parameters, point to return_value cell
|
caller responsibility: make room for return_value (and zero its cell), place parameters, point to return_value cell
|
||||||
callee responsibility: put return value in return_value cell and point to it (thus "cleaning" parameters)
|
callee responsibility: put return value in return_value cell and point to it (thus "cleaning" parameters)
|
||||||
can assume that there is a zeroed cell at current_stack_pointer (return_value_cell) (therefore ids_map starts at index current_stack_pointer+1)
|
can assume that there is a zeroed cell at current_stack_pointer (return_value_cell) (therefore ids_map starts at index current_stack_pointer+1)
|
||||||
can assume that the next cells match your parameters
|
can assume that the next cells match your parameters
|
||||||
assumes that initially, the pointer points to the first cell (return_value_cell).
|
assumes that initially, the pointer points to the first cell (return_value_cell).
|
||||||
therefore begin with '>' * (1 + parameters + scope variables)
|
therefore begin with '>' * (1 + parameters + scope variables)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
assert self.parser.current_token().type == Token.LBRACE
|
assert self.parser.current_token().type == Token.LBRACE
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
|
|
||||||
from .Exceptions import BFSemanticError
|
from .Exceptions import BFSemanticError
|
||||||
|
|
||||||
functions = dict() # Global dictionary of function_name --> FunctionCompiler objects
|
functions = dict() # Global dictionary of function_name --> FunctionCompiler objects
|
||||||
|
|
@ -30,4 +31,7 @@ def check_function_exists(function_token, parameters_amount):
|
||||||
|
|
||||||
function = functions[function_name]
|
function = functions[function_name]
|
||||||
if len(function.parameters) != parameters_amount:
|
if len(function.parameters) != parameters_amount:
|
||||||
raise BFSemanticError("Function '%s' has %s parameters (called it with %s parameters)" % (str(function_token), len(function.parameters), parameters_amount))
|
raise BFSemanticError(
|
||||||
|
"Function '%s' has %s parameters (called it with %s parameters)"
|
||||||
|
% (str(function_token), len(function.parameters), parameters_amount)
|
||||||
|
)
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,8 @@
|
||||||
from .Exceptions import BFSyntaxError, BFSemanticError
|
|
||||||
from .Token import Token
|
|
||||||
from functools import reduce
|
from functools import reduce
|
||||||
|
|
||||||
|
from .Exceptions import BFSemanticError, BFSyntaxError
|
||||||
|
from .Token import Token
|
||||||
|
|
||||||
"""
|
"""
|
||||||
This file holds functions that generate general Brainfuck code
|
This file holds functions that generate general Brainfuck code
|
||||||
And general functions that are not dependent on other objects
|
And general functions that are not dependent on other objects
|
||||||
|
|
@ -126,23 +127,29 @@ def unpack_multidimensional_literal_tokens_to_array_dimensions(ID_token, array_d
|
||||||
if len(array_dimensions) == 0:
|
if len(array_dimensions) == 0:
|
||||||
raise BFSemanticError("Tried to initialize array %s with too many nested sub-arrays" % ID_token)
|
raise BFSemanticError("Tried to initialize array %s with too many nested sub-arrays" % ID_token)
|
||||||
if len(literal_tokens_list) > array_dimensions[0]:
|
if len(literal_tokens_list) > array_dimensions[0]:
|
||||||
raise BFSemanticError("Tried to initialize array %s dimension %s with too many elements (%s)"
|
raise BFSemanticError(
|
||||||
% (ID_token, str(array_dimensions), str(len(literal_tokens_list))))
|
"Tried to initialize array %s dimension %s with too many elements (%s)"
|
||||||
|
% (ID_token, str(array_dimensions), str(len(literal_tokens_list)))
|
||||||
|
)
|
||||||
|
|
||||||
result = []
|
result = []
|
||||||
for element in literal_tokens_list:
|
for element in literal_tokens_list:
|
||||||
if isinstance(element, list):
|
if isinstance(element, list):
|
||||||
# recursively unpack the list with the sub-dimension of the sub-array
|
# recursively unpack the list with the sub-dimension of the sub-array
|
||||||
# E.g if we have arr[3][3][3] and then this call will fill [3][3]=9 elements
|
# E.g if we have arr[3][3][3] and then this call will fill [3][3]=9 elements
|
||||||
result.extend(unpack_multidimensional_literal_tokens_to_array_dimensions(ID_token, array_dimensions[1:], element))
|
result.extend(
|
||||||
|
unpack_multidimensional_literal_tokens_to_array_dimensions(ID_token, array_dimensions[1:], element)
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
result.append(element)
|
result.append(element)
|
||||||
if len(array_dimensions) > 1:
|
if len(array_dimensions) > 1:
|
||||||
dimension_size = dimensions_to_size(array_dimensions[1:]) # current size we need to fill
|
dimension_size = dimensions_to_size(array_dimensions[1:]) # current size we need to fill
|
||||||
result.extend([Token(Token.NUM, 0, 0, "0")] * (dimension_size - 1)) # fill missing elements in this dimension with zeros
|
result.extend(
|
||||||
|
[Token(Token.NUM, 0, 0, "0")] * (dimension_size - 1)
|
||||||
|
) # fill missing elements in this dimension with zeros
|
||||||
|
|
||||||
dimension_size = dimensions_to_size(array_dimensions) # current size we need to fill
|
dimension_size = dimensions_to_size(array_dimensions) # current size we need to fill
|
||||||
result.extend([Token(Token.NUM, 0, 0, "0")] * (dimension_size-len(result))) # fill the result with zeros
|
result.extend([Token(Token.NUM, 0, 0, "0")] * (dimension_size - len(result))) # fill the result with zeros
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -157,13 +164,20 @@ def unpack_literal_tokens_to_array_dimensions(ID_token, array_dimensions, litera
|
||||||
if all(not isinstance(element, list) for element in literal_tokens_list):
|
if all(not isinstance(element, list) for element in literal_tokens_list):
|
||||||
# special case - if all elements are literals, then we allow assigning them as-is and not care about dimensions
|
# special case - if all elements are literals, then we allow assigning them as-is and not care about dimensions
|
||||||
# E.g if we have arr[3][3][3] = {1,2,3,4} then return [1,2,3,4,0,0,0,0,0]
|
# E.g if we have arr[3][3][3] = {1,2,3,4} then return [1,2,3,4,0,0,0,0,0]
|
||||||
unpacked_literals_list = literal_tokens_list + [Token(Token.NUM, 0, 0, "0")] * (array_size - len(literal_tokens_list)) # fill missing with zeros
|
unpacked_literals_list = literal_tokens_list + [Token(Token.NUM, 0, 0, "0")] * (
|
||||||
|
array_size - len(literal_tokens_list)
|
||||||
|
) # fill missing with zeros
|
||||||
else:
|
else:
|
||||||
unpacked_literals_list = unpack_multidimensional_literal_tokens_to_array_dimensions(ID_token, array_dimensions, literal_tokens_list)
|
unpacked_literals_list = unpack_multidimensional_literal_tokens_to_array_dimensions(
|
||||||
|
ID_token, array_dimensions, literal_tokens_list
|
||||||
|
)
|
||||||
|
|
||||||
if len(unpacked_literals_list) > array_size:
|
if len(unpacked_literals_list) > array_size:
|
||||||
raise BFSemanticError("Tried to initialize array %s with incompatible amount of literals."
|
raise BFSemanticError(
|
||||||
" (array size is %s and literals size is %s)" % (ID_token, str(array_size), str(len(unpacked_literals_list))))
|
"Tried to initialize array %s with incompatible amount of literals."
|
||||||
|
" (array size is %s and literals size is %s)"
|
||||||
|
% (ID_token, str(array_size), str(len(unpacked_literals_list)))
|
||||||
|
)
|
||||||
assert len(unpacked_literals_list) == array_size
|
assert len(unpacked_literals_list) == array_size
|
||||||
return unpacked_literals_list
|
return unpacked_literals_list
|
||||||
|
|
||||||
|
|
@ -208,7 +222,9 @@ def process_switch_cases(expression_code, cases):
|
||||||
code += "<" # point to expression
|
code += "<" # point to expression
|
||||||
|
|
||||||
if all_cases_have_break: # small optimization for evaluating the expression
|
if all_cases_have_break: # small optimization for evaluating the expression
|
||||||
cases = [case for case in cases if case[0] != "default"] # remove default to be able to sort. it is handled differently
|
cases = [
|
||||||
|
case for case in cases if case[0] != "default"
|
||||||
|
] # remove default to be able to sort. it is handled differently
|
||||||
cases.sort(key=lambda x: x[0], reverse=True) # Can sort since correct flow is not needed
|
cases.sort(key=lambda x: x[0], reverse=True) # Can sort since correct flow is not needed
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
@ -487,22 +503,22 @@ def get_bitwise_code(code_logic):
|
||||||
code += "<<" # point to a
|
code += "<<" # point to a
|
||||||
|
|
||||||
code += "[" # while a != 0:
|
code += "[" # while a != 0:
|
||||||
code += "-" # a -= 1
|
code += "-" # a -= 1
|
||||||
code += ">>-" # c -= 1
|
code += ">>-" # c -= 1
|
||||||
code += "[>+>>+<<<-]>[<+>-]" # copy c to y (using w)
|
code += "[>+>>+<<<-]>[<+>-]" # copy c to y (using w)
|
||||||
code += ">>" # point to y
|
code += ">>" # point to y
|
||||||
code += ">>+<<" # bit1 += 1
|
code += ">>+<<" # bit1 += 1
|
||||||
|
|
||||||
code += "-[" # if y != 1:
|
code += "-[" # if y != 1:
|
||||||
code += "<+" # x += 1
|
code += "<+" # x += 1
|
||||||
code += "<<++" # c += 2 (c was 0)
|
code += "<<++" # c += 2 (c was 0)
|
||||||
code += ">" * 5 # point to bit1
|
code += ">" * 5 # point to bit1
|
||||||
code += "--" # bit1 -= 2 (bit1 was 2)
|
code += "--" # bit1 -= 2 (bit1 was 2)
|
||||||
code += "<<" # point to y
|
code += "<<" # point to y
|
||||||
code += "+" # set y to 0
|
code += "+" # set y to 0
|
||||||
code += "]" # end if
|
code += "]" # end if
|
||||||
|
|
||||||
code += "<<<<<" # point to a
|
code += "<<<<<" # point to a
|
||||||
code += "]" # end while
|
code += "]" # end while
|
||||||
|
|
||||||
code += ">>>>[<<<<+>>>>-]" # move x to a (x is a/2)
|
code += ">>>>[<<<<+>>>>-]" # move x to a (x is a/2)
|
||||||
|
|
@ -510,21 +526,21 @@ def get_bitwise_code(code_logic):
|
||||||
code += "<" # point to b
|
code += "<" # point to b
|
||||||
|
|
||||||
code += "[" # while b != 0:
|
code += "[" # while b != 0:
|
||||||
code += "-" # b -= 1
|
code += "-" # b -= 1
|
||||||
code += ">-" # c -= 1
|
code += ">-" # c -= 1
|
||||||
code += "[>+>>+<<<-]>[<+>-]" # copy c to y (using w)
|
code += "[>+>>+<<<-]>[<+>-]" # copy c to y (using w)
|
||||||
code += ">>" # point to y
|
code += ">>" # point to y
|
||||||
code += ">+<" # z += 1
|
code += ">+<" # z += 1
|
||||||
|
|
||||||
code += "-[" # if y != 1:
|
code += "-[" # if y != 1:
|
||||||
code += ">--<" # z -= 2 (z was 2)
|
code += ">--<" # z -= 2 (z was 2)
|
||||||
code += "<+" # x += 1
|
code += "<+" # x += 1
|
||||||
code += "<<++" # c += 2 (c was 0)
|
code += "<<++" # c += 2 (c was 0)
|
||||||
code += ">>>" # point to y
|
code += ">>>" # point to y
|
||||||
code += "+" # set y to 0
|
code += "+" # set y to 0
|
||||||
code += "]"
|
code += "]"
|
||||||
|
|
||||||
code += "<<<<" # point to b
|
code += "<<<<" # point to b
|
||||||
code += "]" # end while
|
code += "]" # end while
|
||||||
|
|
||||||
# w is a % 2
|
# w is a % 2
|
||||||
|
|
@ -658,14 +674,14 @@ def get_unary_prefix_op_code(token, offset_to_variable=None):
|
||||||
assert token.data in ["+", "-"]
|
assert token.data in ["+", "-"]
|
||||||
if token.data == "+":
|
if token.data == "+":
|
||||||
# keep value as-is
|
# keep value as-is
|
||||||
return '>'
|
return ">"
|
||||||
elif token.data == "-":
|
elif token.data == "-":
|
||||||
# a temp
|
# a temp
|
||||||
code = ">[-]" # zero temp
|
code = ">[-]" # zero temp
|
||||||
code += "<" # point to a
|
code += "<" # point to a
|
||||||
code += "[->-<]" # sub a from temp
|
code += "[->-<]" # sub a from temp
|
||||||
code += ">" # point to temp
|
code += ">" # point to temp
|
||||||
code += "[<+>-]" # copy temp to a
|
code += "[<+>-]" # copy temp to a
|
||||||
return code
|
return code
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
@ -1127,7 +1143,6 @@ def get_op_boolean_operator_code(node, current_pointer):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def get_print_string_code(string):
|
def get_print_string_code(string):
|
||||||
code = "[-]" # zero the current cell
|
code = "[-]" # zero the current cell
|
||||||
code += ">[-]" # zero the next cell (will be used for loop counts)
|
code += ">[-]" # zero the next cell (will be used for loop counts)
|
||||||
|
|
@ -1200,6 +1215,7 @@ def get_move_left_index_cell_code():
|
||||||
# General
|
# General
|
||||||
# =================
|
# =================
|
||||||
|
|
||||||
|
|
||||||
def get_literal_token_value(token):
|
def get_literal_token_value(token):
|
||||||
# known at compilation time
|
# known at compilation time
|
||||||
assert is_token_literal(token)
|
assert is_token_literal(token)
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
from .Token import Token
|
|
||||||
from .General import dimensions_to_size, get_NUM_token_value
|
from .General import dimensions_to_size, get_NUM_token_value
|
||||||
|
from .Token import Token
|
||||||
|
|
||||||
"""
|
"""
|
||||||
This file holds the program's functions and global variables
|
This file holds the program's functions and global variables
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
import re
|
import re
|
||||||
from .Token import Token
|
|
||||||
from .Optimizer import optimize
|
from .Optimizer import optimize
|
||||||
|
from .Token import Token
|
||||||
|
|
||||||
|
|
||||||
class LexicalErrorException(Exception):
|
class LexicalErrorException(Exception):
|
||||||
|
|
@ -14,64 +15,59 @@ def analyze(text):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
rules = [
|
rules = [
|
||||||
('\s+', Token.WHITESPACE),
|
(r"\s+", Token.WHITESPACE),
|
||||||
('void', Token.VOID),
|
("void", Token.VOID),
|
||||||
('int', Token.INT),
|
("int", Token.INT),
|
||||||
('bool', Token.INT), # treat bool as int
|
("bool", Token.INT), # treat bool as int
|
||||||
('char', Token.INT), # treat char as int
|
("char", Token.INT), # treat char as int
|
||||||
|
("true", Token.TRUE),
|
||||||
('true', Token.TRUE),
|
("false", Token.FALSE),
|
||||||
('false', Token.FALSE),
|
("&&", Token.AND),
|
||||||
('&&', Token.AND),
|
(r"\|\|", Token.OR),
|
||||||
('\|\|', Token.OR),
|
(r"\!", Token.NOT),
|
||||||
('\!', Token.NOT),
|
("return", Token.RETURN),
|
||||||
('return', Token.RETURN),
|
("if", Token.IF),
|
||||||
('if', Token.IF),
|
("else", Token.ELSE),
|
||||||
('else', Token.ELSE),
|
("while", Token.WHILE),
|
||||||
('while', Token.WHILE),
|
("for", Token.FOR),
|
||||||
('for', Token.FOR),
|
("do", Token.DO),
|
||||||
('do', Token.DO),
|
("print", Token.PRINT),
|
||||||
('print', Token.PRINT),
|
("switch", Token.SWITCH),
|
||||||
('switch', Token.SWITCH),
|
("case", Token.CASE),
|
||||||
('case', Token.CASE),
|
("default", Token.DEFAULT),
|
||||||
('default', Token.DEFAULT),
|
("break", Token.BREAK),
|
||||||
('break', Token.BREAK),
|
("continue", Token.CONTINUE), # todo
|
||||||
('continue', Token.CONTINUE), # todo
|
(":", Token.COLON),
|
||||||
(':', Token.COLON),
|
(";", Token.SEMICOLON),
|
||||||
(';', Token.SEMICOLON),
|
(",", Token.COMMA),
|
||||||
(',', Token.COMMA),
|
(r"\(", Token.LPAREN),
|
||||||
|
(r"\)", Token.RPAREN),
|
||||||
('\(', Token.LPAREN),
|
(r"\{", Token.LBRACE),
|
||||||
('\)', Token.RPAREN),
|
(r"\}", Token.RBRACE),
|
||||||
('\{', Token.LBRACE),
|
(r"\[", Token.LBRACK),
|
||||||
('\}', Token.RBRACE),
|
(r"\]", Token.RBRACK),
|
||||||
('\[', Token.LBRACK),
|
(r"=|\+=|-=|\*=|/=|%=|<<=|>>=|&=|\|=|\^=", Token.ASSIGN),
|
||||||
('\]', Token.RBRACK),
|
(r"\?", Token.TERNARY),
|
||||||
('=|\+=|-=|\*=|/=|%=|<<=|>>=|&=|\|=|\^=', Token.ASSIGN),
|
(r"<=|>=|==|!=|<|>", Token.RELOP),
|
||||||
('\?', Token.TERNARY),
|
(r"\+\+", Token.INCREMENT),
|
||||||
|
("--", Token.DECREMENT),
|
||||||
('<=|>=|==|!=|<|>', Token.RELOP),
|
(r"\+|-|\*|/|%", Token.BINOP),
|
||||||
('\+\+', Token.INCREMENT),
|
(r"\*\*|//|%%", Token.UNARY_MULTIPLICATIVE),
|
||||||
('--', Token.DECREMENT),
|
("<<|>>", Token.BITWISE_SHIFT),
|
||||||
('\+|-|\*|/|%', Token.BINOP),
|
("~", Token.BITWISE_NOT),
|
||||||
('\*\*|//|%%', Token.UNARY_MULTIPLICATIVE),
|
("&", Token.BITWISE_AND),
|
||||||
|
(r"\|", Token.BITWISE_OR),
|
||||||
('<<|>>', Token.BITWISE_SHIFT),
|
(r"\^", Token.BITWISE_XOR),
|
||||||
('~', Token.BITWISE_NOT),
|
("([a-zA-Z_][a-zA-Z0-9_]*)", Token.ID),
|
||||||
('&', Token.BITWISE_AND),
|
(r"(\d+)", Token.NUM),
|
||||||
('\|', Token.BITWISE_OR),
|
(r"(0x[A-Fa-f\d]+)", Token.NUM), # hexadecimal number
|
||||||
('\^', Token.BITWISE_XOR),
|
("(0o[0-7]+)", Token.NUM), # octal number
|
||||||
|
("(0b[01]+)", Token.NUM), # binary number
|
||||||
('([a-zA-Z_][a-zA-Z0-9_]*)', Token.ID),
|
(r'\"(\\\"|[^"])*"', Token.STRING),
|
||||||
('(\d+)', Token.NUM),
|
(r"\'(\\\'|(\\)?[^\'])\'", Token.CHAR),
|
||||||
('(0x[A-Fa-f\d]+)', Token.NUM), # hexadecimal number
|
("//.*(\\n|$)", Token.COMMENT),
|
||||||
('(0o[0-7]+)', Token.NUM), # octal number
|
(r"/\*[\s\S]*?\*/", Token.COMMENT), # multiline comments
|
||||||
('(0b[01]+)', Token.NUM), # binary number
|
(".", Token.UNIDENTIFIED),
|
||||||
(r'\"(\\\"|[^"])*"', Token.STRING),
|
|
||||||
(r'\'(\\\'|(\\)?[^\'])\'', Token.CHAR),
|
|
||||||
('//.*(\\n|$)', Token.COMMENT),
|
|
||||||
(r'/\*[\s\S]*?\*/', Token.COMMENT), # multiline comments
|
|
||||||
('.', Token.UNIDENTIFIED)
|
|
||||||
]
|
]
|
||||||
|
|
||||||
rules = [(re.compile(r), t) for r, t in rules]
|
rules = [(re.compile(r), t) for r, t in rules]
|
||||||
|
|
@ -79,7 +75,7 @@ def analyze(text):
|
||||||
tokens = []
|
tokens = []
|
||||||
|
|
||||||
# create a mapping of [line number] to [offset of that line from the beginning of the text]
|
# create a mapping of [line number] to [offset of that line from the beginning of the text]
|
||||||
newline = re.compile('\n')
|
newline = re.compile("\n")
|
||||||
lines = [0] + [m.end() for m in re.finditer(newline, text)]
|
lines = [0] + [m.end() for m in re.finditer(newline, text)]
|
||||||
|
|
||||||
i = 0
|
i = 0
|
||||||
|
|
@ -99,12 +95,12 @@ def analyze(text):
|
||||||
|
|
||||||
# calculate line and column
|
# calculate line and column
|
||||||
line, column = None, None
|
line, column = None, None
|
||||||
for line_idx in range(len(lines)-1):
|
for line_idx in range(len(lines) - 1):
|
||||||
if lines[line_idx] <= longest_match.start() < lines[line_idx+1]:
|
if lines[line_idx] <= longest_match.start() < lines[line_idx + 1]:
|
||||||
line, column = line_idx+1, (longest_match.start() - lines[line_idx])+1 # humans count from 1 :)
|
line, column = line_idx + 1, (longest_match.start() - lines[line_idx]) + 1 # humans count from 1 :)
|
||||||
break
|
break
|
||||||
if not line:
|
if not line:
|
||||||
line, column = len(lines), (longest_match.start() - lines[-1])+1
|
line, column = len(lines), (longest_match.start() - lines[-1]) + 1
|
||||||
|
|
||||||
if matched_token in [Token.COMMENT, Token.WHITESPACE]:
|
if matched_token in [Token.COMMENT, Token.WHITESPACE]:
|
||||||
pass # do nothing
|
pass # do nothing
|
||||||
|
|
@ -112,8 +108,18 @@ def analyze(text):
|
||||||
raise LexicalErrorException("Unidentified Character '%s' (line %s column %s)" % (text[i], line, column))
|
raise LexicalErrorException("Unidentified Character '%s' (line %s column %s)" % (text[i], line, column))
|
||||||
elif matched_token in [Token.STRING, Token.CHAR]:
|
elif matched_token in [Token.STRING, Token.CHAR]:
|
||||||
# remove quotes at beginning and end, un-escape characters
|
# remove quotes at beginning and end, un-escape characters
|
||||||
tokens.append(Token(matched_token, line, column, longest_match.group()[1:-1].encode("utf8").decode("unicode_escape")))
|
tokens.append(
|
||||||
elif matched_token in [Token.NUM, Token.ID, Token.BINOP, Token.RELOP, Token.ASSIGN, Token.UNARY_MULTIPLICATIVE, Token.BITWISE_SHIFT]:
|
Token(matched_token, line, column, longest_match.group()[1:-1].encode("utf8").decode("unicode_escape"))
|
||||||
|
)
|
||||||
|
elif matched_token in [
|
||||||
|
Token.NUM,
|
||||||
|
Token.ID,
|
||||||
|
Token.BINOP,
|
||||||
|
Token.RELOP,
|
||||||
|
Token.ASSIGN,
|
||||||
|
Token.UNARY_MULTIPLICATIVE,
|
||||||
|
Token.BITWISE_SHIFT,
|
||||||
|
]:
|
||||||
tokens.append(Token(matched_token, line, column, longest_match.group()))
|
tokens.append(Token(matched_token, line, column, longest_match.group()))
|
||||||
else:
|
else:
|
||||||
tokens.append(Token(matched_token, line, column))
|
tokens.append(Token(matched_token, line, column))
|
||||||
|
|
@ -128,16 +134,40 @@ def tests():
|
||||||
text = "my international int ; int; pints; international;"
|
text = "my international int ; int; pints; international;"
|
||||||
res = analyze(text)
|
res = analyze(text)
|
||||||
|
|
||||||
expected = [Token.ID, Token.ID, Token.INT, Token.SEMICOLON, Token.INT, Token.SEMICOLON, Token.ID,
|
expected = [
|
||||||
Token.SEMICOLON, Token.ID, Token.SEMICOLON]
|
Token.ID,
|
||||||
|
Token.ID,
|
||||||
|
Token.INT,
|
||||||
|
Token.SEMICOLON,
|
||||||
|
Token.INT,
|
||||||
|
Token.SEMICOLON,
|
||||||
|
Token.ID,
|
||||||
|
Token.SEMICOLON,
|
||||||
|
Token.ID,
|
||||||
|
Token.SEMICOLON,
|
||||||
|
]
|
||||||
assert len(res) == len(expected) and all(res[i].type == expected[i] for i in range(len(res)))
|
assert len(res) == len(expected) and all(res[i].type == expected[i] for i in range(len(res)))
|
||||||
|
|
||||||
def test2():
|
def test2():
|
||||||
text = "true !||!false falsek k||y+-a&&x"
|
text = "true !||!false falsek k||y+-a&&x"
|
||||||
res = analyze(text)
|
res = analyze(text)
|
||||||
|
|
||||||
expected = [Token.TRUE, Token.NOT, Token.OR, Token.NOT, Token.FALSE, Token.ID, Token.ID, Token.OR, Token.ID,
|
expected = [
|
||||||
Token.BINOP, Token.BINOP, Token.ID, Token.AND, Token.ID]
|
Token.TRUE,
|
||||||
|
Token.NOT,
|
||||||
|
Token.OR,
|
||||||
|
Token.NOT,
|
||||||
|
Token.FALSE,
|
||||||
|
Token.ID,
|
||||||
|
Token.ID,
|
||||||
|
Token.OR,
|
||||||
|
Token.ID,
|
||||||
|
Token.BINOP,
|
||||||
|
Token.BINOP,
|
||||||
|
Token.ID,
|
||||||
|
Token.AND,
|
||||||
|
Token.ID,
|
||||||
|
]
|
||||||
assert len(res) == len(expected) and all(res[i].type == expected[i] for i in range(len(res)))
|
assert len(res) == len(expected) and all(res[i].type == expected[i] for i in range(len(res)))
|
||||||
|
|
||||||
def test3():
|
def test3():
|
||||||
|
|
@ -166,9 +196,29 @@ def tests():
|
||||||
# test all arithmetic operations
|
# test all arithmetic operations
|
||||||
text = "(1+2*3/6)+(1%3)*(6-1)"
|
text = "(1+2*3/6)+(1%3)*(6-1)"
|
||||||
tokens = analyze(text)
|
tokens = analyze(text)
|
||||||
expected = [Token.LPAREN, Token.NUM, Token.BINOP, Token.NUM, Token.BINOP, Token.NUM, Token.BINOP, Token.NUM,
|
expected = [
|
||||||
Token.RPAREN, Token.BINOP, Token.LPAREN, Token.NUM, Token.BINOP, Token.NUM, Token.RPAREN,
|
Token.LPAREN,
|
||||||
Token.BINOP, Token.LPAREN, Token.NUM, Token.BINOP, Token.NUM, Token.RPAREN]
|
Token.NUM,
|
||||||
|
Token.BINOP,
|
||||||
|
Token.NUM,
|
||||||
|
Token.BINOP,
|
||||||
|
Token.NUM,
|
||||||
|
Token.BINOP,
|
||||||
|
Token.NUM,
|
||||||
|
Token.RPAREN,
|
||||||
|
Token.BINOP,
|
||||||
|
Token.LPAREN,
|
||||||
|
Token.NUM,
|
||||||
|
Token.BINOP,
|
||||||
|
Token.NUM,
|
||||||
|
Token.RPAREN,
|
||||||
|
Token.BINOP,
|
||||||
|
Token.LPAREN,
|
||||||
|
Token.NUM,
|
||||||
|
Token.BINOP,
|
||||||
|
Token.NUM,
|
||||||
|
Token.RPAREN,
|
||||||
|
]
|
||||||
assert len(tokens) == len(expected) and all(tokens[i].type == expected[i] for i in range(len(tokens)))
|
assert len(tokens) == len(expected) and all(tokens[i].type == expected[i] for i in range(len(tokens)))
|
||||||
optimize(tokens)
|
optimize(tokens)
|
||||||
assert tokens[1].data == "2" and tokens[5].data == "1" and tokens[9].data == "5"
|
assert tokens[1].data == "2" and tokens[5].data == "1" and tokens[9].data == "5"
|
||||||
|
|
@ -179,5 +229,5 @@ def tests():
|
||||||
test3()
|
test3()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
tests()
|
tests()
|
||||||
|
|
|
||||||
|
|
@ -48,7 +48,9 @@ def get_readint_code():
|
||||||
code += ">" # point to tmp
|
code += ">" # point to tmp
|
||||||
code += "[<++++++++++>-]" # res = tmp * 10, tmp = 0
|
code += "[<++++++++++>-]" # res = tmp * 10, tmp = 0
|
||||||
code += ">" # point to input
|
code += ">" # point to input
|
||||||
code += "-" * (0x30 - 10) # convert character to a digit by subtracting 0x30 from it (we already subtracted 10 before)
|
code += "-" * (
|
||||||
|
0x30 - 10
|
||||||
|
) # convert character to a digit by subtracting 0x30 from it (we already subtracted 10 before)
|
||||||
code += "[<<+>>-]" # res += input
|
code += "[<<+>>-]" # res += input
|
||||||
code += "]" # end if
|
code += "]" # end if
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,20 @@
|
||||||
from .Exceptions import BFSemanticError
|
from .Exceptions import BFSemanticError
|
||||||
from .General import get_copy_from_variable_code, get_copy_to_variable_code
|
from .General import (
|
||||||
from .General import get_move_left_index_cell_code, get_move_right_index_cells_code
|
get_copy_from_variable_code,
|
||||||
from .General import get_offset_to_variable, get_variable_dimensions_from_token
|
get_copy_to_variable_code,
|
||||||
from .General import get_op_between_literals_code, get_literal_token_code, get_token_ID_code
|
get_literal_token_code,
|
||||||
from .General import get_unary_prefix_op_code, get_unary_postfix_op_code, is_token_literal
|
get_move_left_index_cell_code,
|
||||||
from .General import unpack_literal_tokens_to_array_dimensions, get_op_boolean_operator_code
|
get_move_right_index_cells_code,
|
||||||
|
get_offset_to_variable,
|
||||||
|
get_op_between_literals_code,
|
||||||
|
get_op_boolean_operator_code,
|
||||||
|
get_token_ID_code,
|
||||||
|
get_unary_postfix_op_code,
|
||||||
|
get_unary_prefix_op_code,
|
||||||
|
get_variable_dimensions_from_token,
|
||||||
|
is_token_literal,
|
||||||
|
unpack_literal_tokens_to_array_dimensions,
|
||||||
|
)
|
||||||
from .Token import Token
|
from .Token import Token
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
@ -60,7 +70,14 @@ class NodeToken(Node):
|
||||||
else:
|
else:
|
||||||
return get_literal_token_code(self.token)
|
return get_literal_token_code(self.token)
|
||||||
|
|
||||||
elif self.token.type in [Token.BINOP, Token.RELOP, Token.BITWISE_SHIFT, Token.BITWISE_AND, Token.BITWISE_OR, Token.BITWISE_XOR]:
|
elif self.token.type in [
|
||||||
|
Token.BINOP,
|
||||||
|
Token.RELOP,
|
||||||
|
Token.BITWISE_SHIFT,
|
||||||
|
Token.BITWISE_AND,
|
||||||
|
Token.BITWISE_OR,
|
||||||
|
Token.BITWISE_XOR,
|
||||||
|
]:
|
||||||
code = self.left.get_code(current_pointer)
|
code = self.left.get_code(current_pointer)
|
||||||
code += self.right.get_code(current_pointer + 1)
|
code += self.right.get_code(current_pointer + 1)
|
||||||
code += "<<" # point to the first operand
|
code += "<<" # point to the first operand
|
||||||
|
|
@ -78,7 +95,7 @@ class NodeToken(Node):
|
||||||
elif self.token.type == Token.ASSIGN:
|
elif self.token.type == Token.ASSIGN:
|
||||||
assert self.left.token.type == Token.ID
|
assert self.left.token.type == Token.ID
|
||||||
|
|
||||||
if self.token.data == '=':
|
if self.token.data == "=":
|
||||||
# id = expression
|
# id = expression
|
||||||
code = self.right.get_code(current_pointer)
|
code = self.right.get_code(current_pointer)
|
||||||
|
|
||||||
|
|
@ -119,7 +136,7 @@ class NodeTernary(Node):
|
||||||
code = ">" # point to bool_evaluate_node_false
|
code = ">" # point to bool_evaluate_node_false
|
||||||
code += "[-]+" # bool_evaluate_node_false=1
|
code += "[-]+" # bool_evaluate_node_false=1
|
||||||
code += ">" # point to condition
|
code += ">" # point to condition
|
||||||
code += self.condition.get_code(current_pointer+2) # evaluate condition
|
code += self.condition.get_code(current_pointer + 2) # evaluate condition
|
||||||
code += "<" # point to condition
|
code += "<" # point to condition
|
||||||
|
|
||||||
code += "[" # if condition is non-zero
|
code += "[" # if condition is non-zero
|
||||||
|
|
@ -150,7 +167,14 @@ class NodeUnaryPrefix(Node):
|
||||||
|
|
||||||
def get_code(self, current_pointer, *args, **kwargs):
|
def get_code(self, current_pointer, *args, **kwargs):
|
||||||
# unary prefix (!x or ++x or ~x or -x)
|
# unary prefix (!x or ++x or ~x or -x)
|
||||||
assert self.token_operation.type in [Token.NOT, Token.INCREMENT, Token.DECREMENT, Token.UNARY_MULTIPLICATIVE, Token.BITWISE_NOT, Token.BINOP]
|
assert self.token_operation.type in [
|
||||||
|
Token.NOT,
|
||||||
|
Token.INCREMENT,
|
||||||
|
Token.DECREMENT,
|
||||||
|
Token.UNARY_MULTIPLICATIVE,
|
||||||
|
Token.BITWISE_NOT,
|
||||||
|
Token.BINOP,
|
||||||
|
]
|
||||||
|
|
||||||
if self.token_operation.type in [Token.NOT, Token.BITWISE_NOT, Token.BINOP]:
|
if self.token_operation.type in [Token.NOT, Token.BITWISE_NOT, Token.BINOP]:
|
||||||
code = self.node_literal.get_code(current_pointer)
|
code = self.node_literal.get_code(current_pointer)
|
||||||
|
|
@ -178,10 +202,15 @@ class NodeUnaryPrefix(Node):
|
||||||
|
|
||||||
# the token to apply on must be an ID
|
# the token to apply on must be an ID
|
||||||
if isinstance(self.node_literal, NodeToken) is False:
|
if isinstance(self.node_literal, NodeToken) is False:
|
||||||
raise BFSemanticError("Prefix operator %s can only be applied to a variable" % str(self.token_operation))
|
raise BFSemanticError(
|
||||||
|
"Prefix operator %s can only be applied to a variable" % str(self.token_operation)
|
||||||
|
)
|
||||||
|
|
||||||
if self.node_literal.token.type != Token.ID:
|
if self.node_literal.token.type != Token.ID:
|
||||||
raise BFSemanticError("Prefix operator %s cannot be applied to %s, but only to a variable" % (str(self.token_operation), str(self.node_literal.token)))
|
raise BFSemanticError(
|
||||||
|
"Prefix operator %s cannot be applied to %s, but only to a variable"
|
||||||
|
% (str(self.token_operation), str(self.node_literal.token))
|
||||||
|
)
|
||||||
|
|
||||||
offset_to_ID = get_offset_to_variable(self.ids_map_list, self.node_literal.token, current_pointer)
|
offset_to_ID = get_offset_to_variable(self.ids_map_list, self.node_literal.token, current_pointer)
|
||||||
return get_unary_prefix_op_code(self.token_operation, offset_to_ID)
|
return get_unary_prefix_op_code(self.token_operation, offset_to_ID)
|
||||||
|
|
@ -218,7 +247,10 @@ class NodeUnaryPostfix(Node):
|
||||||
raise BFSemanticError("Postfix operator %s can only be applied to a variable" % str(self.token_operation))
|
raise BFSemanticError("Postfix operator %s can only be applied to a variable" % str(self.token_operation))
|
||||||
|
|
||||||
if self.node_literal.token.type != Token.ID:
|
if self.node_literal.token.type != Token.ID:
|
||||||
raise BFSemanticError("Postfix operator %s cannot be applied to %s, but only to a variable" % (str(self.token_operation), str(self.node_literal.token)))
|
raise BFSemanticError(
|
||||||
|
"Postfix operator %s cannot be applied to %s, but only to a variable"
|
||||||
|
% (str(self.token_operation), str(self.node_literal.token))
|
||||||
|
)
|
||||||
|
|
||||||
offset_to_ID = get_offset_to_variable(self.ids_map_list, self.node_literal.token, current_pointer)
|
offset_to_ID = get_offset_to_variable(self.ids_map_list, self.node_literal.token, current_pointer)
|
||||||
return get_unary_postfix_op_code(self.token_operation, offset_to_ID)
|
return get_unary_postfix_op_code(self.token_operation, offset_to_ID)
|
||||||
|
|
@ -227,27 +259,31 @@ class NodeUnaryPostfix(Node):
|
||||||
class NodeFunctionCall(Node):
|
class NodeFunctionCall(Node):
|
||||||
def __init__(self, ids_map_list, function_to_call, parameters):
|
def __init__(self, ids_map_list, function_to_call, parameters):
|
||||||
"""
|
"""
|
||||||
receives a FunctionCompiler object
|
receives a FunctionCompiler object
|
||||||
that implements get_code() which gets a stack pointer and returns code
|
that implements get_code() which gets a stack pointer and returns code
|
||||||
receives a list of parameters - Node objects
|
receives a list of parameters - Node objects
|
||||||
each one gets a stack pointer and returns code that evaluates the parameter
|
each one gets a stack pointer and returns code that evaluates the parameter
|
||||||
"""
|
"""
|
||||||
Node.__init__(self, ids_map_list)
|
Node.__init__(self, ids_map_list)
|
||||||
self.function_to_call = function_to_call
|
self.function_to_call = function_to_call
|
||||||
self.parameters = parameters
|
self.parameters = parameters
|
||||||
|
|
||||||
def get_code(self, current_pointer, *args, **kwargs):
|
def get_code(self, current_pointer, *args, **kwargs):
|
||||||
code = '[-]>' # return_value_cell=0
|
code = "[-]>" # return_value_cell=0
|
||||||
|
|
||||||
# evaluate parameters from left to right, and put them on the "stack" in that order
|
# evaluate parameters from left to right, and put them on the "stack" in that order
|
||||||
# after each parameter code, the pointer points to the next available cell (one after the parameter)
|
# after each parameter code, the pointer points to the next available cell (one after the parameter)
|
||||||
for i, parameter in enumerate(self.parameters):
|
for i, parameter in enumerate(self.parameters):
|
||||||
code += parameter.get_code(current_pointer+1+i) # evaluate each parameter at its cell offset (starting at one after return_value_cell)
|
code += parameter.get_code(
|
||||||
|
current_pointer + 1 + i
|
||||||
|
) # evaluate each parameter at its cell offset (starting at one after return_value_cell)
|
||||||
|
|
||||||
# at this point we point to one after the last parameter
|
# at this point we point to one after the last parameter
|
||||||
code += "<" * len(self.parameters) # point back to first parameter
|
code += "<" * len(self.parameters) # point back to first parameter
|
||||||
code += "<" # point to return_value_cell
|
code += "<" # point to return_value_cell
|
||||||
code += self.function_to_call.get_code(current_stack_pointer=current_pointer) # after this we point to return value cell
|
code += self.function_to_call.get_code(
|
||||||
|
current_stack_pointer=current_pointer
|
||||||
|
) # after this we point to return value cell
|
||||||
code += ">" # point to next available cell (one after return value)
|
code += ">" # point to next available cell (one after return value)
|
||||||
return code
|
return code
|
||||||
|
|
||||||
|
|
@ -377,9 +413,10 @@ class NodeArraySetElement(NodeArrayElement):
|
||||||
|
|
||||||
class NodeArrayAssignment(Node):
|
class NodeArrayAssignment(Node):
|
||||||
"""
|
"""
|
||||||
Used for array assignment
|
Used for array assignment
|
||||||
E.g arr = = { 1, 2, 3... }
|
E.g arr = = { 1, 2, 3... }
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, ids_map_list, token_id, literal_tokens_list):
|
def __init__(self, ids_map_list, token_id, literal_tokens_list):
|
||||||
Node.__init__(self, ids_map_list)
|
Node.__init__(self, ids_map_list)
|
||||||
self.token_id = token_id
|
self.token_id = token_id
|
||||||
|
|
@ -387,7 +424,9 @@ class NodeArrayAssignment(Node):
|
||||||
|
|
||||||
def get_code(self, current_pointer, *args, **kwargs):
|
def get_code(self, current_pointer, *args, **kwargs):
|
||||||
array_dimensions = get_variable_dimensions_from_token(self.ids_map_list, self.token_id)
|
array_dimensions = get_variable_dimensions_from_token(self.ids_map_list, self.token_id)
|
||||||
unpacked_literals_list = unpack_literal_tokens_to_array_dimensions(self.token_id, array_dimensions, self.literal_tokens_list)
|
unpacked_literals_list = unpack_literal_tokens_to_array_dimensions(
|
||||||
|
self.token_id, array_dimensions, self.literal_tokens_list
|
||||||
|
)
|
||||||
|
|
||||||
offset = get_offset_to_variable(self.ids_map_list, self.token_id, current_pointer)
|
offset = get_offset_to_variable(self.ids_map_list, self.token_id, current_pointer)
|
||||||
code = "<" * offset # point to first array element
|
code = "<" * offset # point to first array element
|
||||||
|
|
|
||||||
|
|
@ -15,9 +15,13 @@ def optimize_once(tokens):
|
||||||
# optimize arithmetic operations. E.g replace 1+2 with 3
|
# optimize arithmetic operations. E.g replace 1+2 with 3
|
||||||
|
|
||||||
# need to be careful not to optimize (1+2*3) to (3*3)
|
# need to be careful not to optimize (1+2*3) to (3*3)
|
||||||
if tokens[start_index+1].data in ["*", "/", "%"] or (start_index+3 >= len(tokens)) or (tokens[start_index+3].data not in ["*", "/", "%"]):
|
if (
|
||||||
num1, num2 = get_NUM_token_value(tokens[start_index]), get_NUM_token_value(tokens[start_index+2])
|
tokens[start_index + 1].data in ["*", "/", "%"]
|
||||||
op = tokens[start_index+1].data
|
or (start_index + 3 >= len(tokens))
|
||||||
|
or (tokens[start_index + 3].data not in ["*", "/", "%"])
|
||||||
|
):
|
||||||
|
num1, num2 = get_NUM_token_value(tokens[start_index]), get_NUM_token_value(tokens[start_index + 2])
|
||||||
|
op = tokens[start_index + 1].data
|
||||||
if op == "+":
|
if op == "+":
|
||||||
val = num1 + num2
|
val = num1 + num2
|
||||||
elif op == "-":
|
elif op == "-":
|
||||||
|
|
@ -38,8 +42,13 @@ def optimize_once(tokens):
|
||||||
raise NotImplementedError(op)
|
raise NotImplementedError(op)
|
||||||
|
|
||||||
# remove the 3 old tokens and replace them with new one
|
# remove the 3 old tokens and replace them with new one
|
||||||
new_token = Token(Token.NUM, tokens[start_index].line, tokens[start_index].column, data=str(val),
|
new_token = Token(
|
||||||
original_tokens=tokens[start_index:start_index+3])
|
Token.NUM,
|
||||||
|
tokens[start_index].line,
|
||||||
|
tokens[start_index].column,
|
||||||
|
data=str(val),
|
||||||
|
original_tokens=tokens[start_index : start_index + 3],
|
||||||
|
)
|
||||||
|
|
||||||
for _ in range(3):
|
for _ in range(3):
|
||||||
tokens.pop(start_index)
|
tokens.pop(start_index)
|
||||||
|
|
@ -52,16 +61,24 @@ def optimize_once(tokens):
|
||||||
# replace printint(50) with print("50")
|
# replace printint(50) with print("50")
|
||||||
# since printing strings compiles into less Brainfuck code than printing ints
|
# since printing strings compiles into less Brainfuck code than printing ints
|
||||||
if tokens[start_index].data == "printint":
|
if tokens[start_index].data == "printint":
|
||||||
tokens[start_index] = Token(Token.PRINT, tokens[start_index].line, tokens[start_index].column, original_tokens=[tokens[start_index]])
|
tokens[start_index] = Token(
|
||||||
tokens[start_index+2] = Token(Token.STRING, tokens[start_index].line, tokens[start_index].column,
|
Token.PRINT, tokens[start_index].line, tokens[start_index].column, original_tokens=[tokens[start_index]]
|
||||||
data=str(tokens[start_index+2].data), original_tokens=[tokens[start_index+2]])
|
)
|
||||||
|
tokens[start_index + 2] = Token(
|
||||||
|
Token.STRING,
|
||||||
|
tokens[start_index].line,
|
||||||
|
tokens[start_index].column,
|
||||||
|
data=str(tokens[start_index + 2].data),
|
||||||
|
original_tokens=[tokens[start_index + 2]],
|
||||||
|
)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
rules = [([Token.NUM, Token.BINOP, Token.NUM], optimize_binop), # arithmetic operations
|
rules = [
|
||||||
([Token.ID, Token.LPAREN, Token.NUM, Token.RPAREN], optimize_printint), # printint(50) to print("50")
|
([Token.NUM, Token.BINOP, Token.NUM], optimize_binop), # arithmetic operations
|
||||||
]
|
([Token.ID, Token.LPAREN, Token.NUM, Token.RPAREN], optimize_printint), # printint(50) to print("50")
|
||||||
|
]
|
||||||
|
|
||||||
# try to match one of the rules to the tokens in a "sliding window" style
|
# try to match one of the rules to the tokens in a "sliding window" style
|
||||||
i = 0
|
i = 0
|
||||||
|
|
@ -69,7 +86,7 @@ def optimize_once(tokens):
|
||||||
optimized = False
|
optimized = False
|
||||||
for tokens_sequence, optimization_function in rules:
|
for tokens_sequence, optimization_function in rules:
|
||||||
if i + len(tokens_sequence) <= len(tokens):
|
if i + len(tokens_sequence) <= len(tokens):
|
||||||
if all(tokens_sequence[n] == tokens[i+n].type for n in range(len(tokens_sequence))):
|
if all(tokens_sequence[n] == tokens[i + n].type for n in range(len(tokens_sequence))):
|
||||||
if optimization_function(tokens, i):
|
if optimization_function(tokens, i):
|
||||||
optimized = True
|
optimized = True
|
||||||
if optimized:
|
if optimized:
|
||||||
|
|
@ -82,7 +99,7 @@ def optimize(tokens):
|
||||||
prev_tokens = [token.type for token in tokens]
|
prev_tokens = [token.type for token in tokens]
|
||||||
while True:
|
while True:
|
||||||
optimize_once(tokens)
|
optimize_once(tokens)
|
||||||
print(".", end='')
|
print(".", end="")
|
||||||
current_tokens = [token.type for token in tokens]
|
current_tokens = [token.type for token in tokens]
|
||||||
if current_tokens == prev_tokens:
|
if current_tokens == prev_tokens:
|
||||||
break
|
break
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,13 @@
|
||||||
from .Exceptions import BFSyntaxError, BFSemanticError
|
from .Exceptions import BFSemanticError, BFSyntaxError
|
||||||
from .Token import Token
|
|
||||||
from .General import is_token_literal
|
from .General import is_token_literal
|
||||||
|
from .Token import Token
|
||||||
|
|
||||||
|
|
||||||
class Parser:
|
class Parser:
|
||||||
"""
|
"""
|
||||||
Used to easily iterate tokens
|
Used to easily iterate tokens
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, tokens):
|
def __init__(self, tokens):
|
||||||
self.tokens = tokens
|
self.tokens = tokens
|
||||||
self.current_token_index = 0
|
self.current_token_index = 0
|
||||||
|
|
@ -80,7 +81,10 @@ class Parser:
|
||||||
raise BFSyntaxError("Expected %s after %s" % (str(tokens_list), str(self.tokens[starting_index])))
|
raise BFSyntaxError("Expected %s after %s" % (str(tokens_list), str(self.tokens[starting_index])))
|
||||||
for i in range(0, len(tokens_list)):
|
for i in range(0, len(tokens_list)):
|
||||||
if self.tokens[starting_index + 1 + i].type != tokens_list[i]:
|
if self.tokens[starting_index + 1 + i].type != tokens_list[i]:
|
||||||
raise BFSyntaxError("Expected %s after %s" % (str(tokens_list[i]), [str(t) for t in self.tokens[starting_index: starting_index+1+i]]))
|
raise BFSyntaxError(
|
||||||
|
"Expected %s after %s"
|
||||||
|
% (str(tokens_list[i]), [str(t) for t in self.tokens[starting_index : starting_index + 1 + i]])
|
||||||
|
)
|
||||||
|
|
||||||
def check_next_token_is(self, token, starting_index=None):
|
def check_next_token_is(self, token, starting_index=None):
|
||||||
self.check_next_tokens_are([token], starting_index=starting_index)
|
self.check_next_tokens_are([token], starting_index=starting_index)
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
import sys
|
|
||||||
import argparse
|
import argparse
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
def create_jumps_dictionary(program):
|
def create_jumps_dictionary(program):
|
||||||
|
|
@ -9,9 +9,9 @@ def create_jumps_dictionary(program):
|
||||||
res = dict()
|
res = dict()
|
||||||
|
|
||||||
for index, command in enumerate(program):
|
for index, command in enumerate(program):
|
||||||
if command == '[':
|
if command == "[":
|
||||||
lbraces.append(index)
|
lbraces.append(index)
|
||||||
elif command == ']':
|
elif command == "]":
|
||||||
if len(lbraces) == 0:
|
if len(lbraces) == 0:
|
||||||
raise SyntaxError("Brainfuck: mismatched parentheses (at index: %s)" % index)
|
raise SyntaxError("Brainfuck: mismatched parentheses (at index: %s)" % index)
|
||||||
|
|
||||||
|
|
@ -35,26 +35,26 @@ def brainfuck(program, bits=8):
|
||||||
while instruction_pointer < len(program):
|
while instruction_pointer < len(program):
|
||||||
command = program[instruction_pointer]
|
command = program[instruction_pointer]
|
||||||
|
|
||||||
if command == '>':
|
if command == ">":
|
||||||
data_pointer += 1
|
data_pointer += 1
|
||||||
elif command == '<':
|
elif command == "<":
|
||||||
data_pointer -= 1
|
data_pointer -= 1
|
||||||
elif command == '+':
|
elif command == "+":
|
||||||
data[data_pointer] = (data.get(data_pointer, 0) + 1)
|
data[data_pointer] = data.get(data_pointer, 0) + 1
|
||||||
if data[data_pointer] == 2 ** bits:
|
if data[data_pointer] == 2**bits:
|
||||||
data[data_pointer] = 0
|
data[data_pointer] = 0
|
||||||
elif command == '-':
|
elif command == "-":
|
||||||
data[data_pointer] = (data.get(data_pointer, 0) - 1)
|
data[data_pointer] = data.get(data_pointer, 0) - 1
|
||||||
if data[data_pointer] == -1:
|
if data[data_pointer] == -1:
|
||||||
data[data_pointer] = 2 ** bits - 1
|
data[data_pointer] = 2**bits - 1
|
||||||
elif command == ',':
|
elif command == ",":
|
||||||
data[data_pointer] = ord(sys.stdin.read(1)) % 256
|
data[data_pointer] = ord(sys.stdin.read(1)) % 256
|
||||||
elif command == '.':
|
elif command == ".":
|
||||||
print(chr(data.get(data_pointer, 0)), end='', flush=True)
|
print(chr(data.get(data_pointer, 0)), end="", flush=True)
|
||||||
elif command == '[':
|
elif command == "[":
|
||||||
if data.get(data_pointer, 0) == 0:
|
if data.get(data_pointer, 0) == 0:
|
||||||
instruction_pointer = jumps[instruction_pointer]
|
instruction_pointer = jumps[instruction_pointer]
|
||||||
elif command == ']':
|
elif command == "]":
|
||||||
if data.get(data_pointer, 0) != 0:
|
if data.get(data_pointer, 0) != 0:
|
||||||
instruction_pointer = jumps[instruction_pointer]
|
instruction_pointer = jumps[instruction_pointer]
|
||||||
else: # everything else is comment
|
else: # everything else is comment
|
||||||
|
|
@ -63,16 +63,19 @@ def brainfuck(program, bits=8):
|
||||||
instruction_pointer += 1
|
instruction_pointer += 1
|
||||||
|
|
||||||
if data_pointer != 0:
|
if data_pointer != 0:
|
||||||
print("WARNING (interpreter) - at the end of the execution the data pointer is %s instead of 0 (possibly a compiler issue)" % str(data_pointer))
|
print(
|
||||||
|
"WARNING (interpreter) - at the end of the execution the data pointer is %s instead of 0 (possibly a compiler issue)"
|
||||||
|
% str(data_pointer)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("filepath")
|
parser.add_argument("filepath")
|
||||||
parser.add_argument("--bits", "-b", "--interpreter-bits", type=int, default=8, help="Amount of bits each cell uses")
|
parser.add_argument("--bits", "-b", "--interpreter-bits", type=int, default=8, help="Amount of bits each cell uses")
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
with open(args.filepath, 'r') as f:
|
with open(args.filepath, "r") as f:
|
||||||
code = f.read()
|
code = f.read()
|
||||||
|
|
||||||
brainfuck(code, args.bits)
|
brainfuck(code, args.bits)
|
||||||
|
|
|
||||||
|
|
@ -98,4 +98,3 @@ If you found a bug, or have an idea for a feature, open an issue
|
||||||
* https://introcs.cs.princeton.edu/java/11precedence/ for operator precedence
|
* https://introcs.cs.princeton.edu/java/11precedence/ for operator precedence
|
||||||
* https://logomakr.com/ for creating a logo
|
* https://logomakr.com/ for creating a logo
|
||||||
* https://www.youtube.com/ for setting the mood
|
* https://www.youtube.com/ for setting the mood
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -7,9 +7,9 @@ Cognition tasks for training reasoning capabilities:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from .color_cube_rotation import ColorCubeRotationConfig, ColorCubeRotationDataset
|
from .color_cube_rotation import ColorCubeRotationConfig, ColorCubeRotationDataset
|
||||||
|
from .figlet_fonts import FigletFontConfig, FigletFontDataset
|
||||||
from .number_sequences import NumberSequenceConfig, NumberSequenceDataset
|
from .number_sequences import NumberSequenceConfig, NumberSequenceDataset
|
||||||
from .rubiks_cube import RubiksCubeConfig, RubiksCubeDataset
|
from .rubiks_cube import RubiksCubeConfig, RubiksCubeDataset
|
||||||
from .figlet_fonts import FigletFontConfig, FigletFontDataset
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"NumberSequenceConfig",
|
"NumberSequenceConfig",
|
||||||
|
|
@ -19,5 +19,5 @@ __all__ = [
|
||||||
"RubiksCubeConfig",
|
"RubiksCubeConfig",
|
||||||
"RubiksCubeDataset",
|
"RubiksCubeDataset",
|
||||||
"FigletFontConfig",
|
"FigletFontConfig",
|
||||||
"FigletFontDataset"
|
"FigletFontDataset",
|
||||||
]
|
]
|
||||||
|
|
|
||||||
|
|
@ -7,10 +7,10 @@ Game tasks for training reasoning capabilities:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from .countdown import CountdownConfig, CountdownDataset
|
from .countdown import CountdownConfig, CountdownDataset
|
||||||
|
from .game_of_life import GameOfLifeConfig, GameOfLifeDataset
|
||||||
from .maze import MazeConfig, MazeDataset
|
from .maze import MazeConfig, MazeDataset
|
||||||
from .mini_sudoku import MiniSudokuConfig, MiniSudokuDataset
|
from .mini_sudoku import MiniSudokuConfig, MiniSudokuDataset
|
||||||
from .sudoku import SudokuConfig, SudokuDataset
|
from .sudoku import SudokuConfig, SudokuDataset
|
||||||
from .game_of_life import GameOfLifeConfig, GameOfLifeDataset
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"CountdownConfig",
|
"CountdownConfig",
|
||||||
|
|
|
||||||
|
|
@ -1,18 +1,19 @@
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from random import Random
|
from random import Random
|
||||||
from typing import List, Optional, Tuple, Dict
|
from typing import Dict, List, Optional, Tuple
|
||||||
|
|
||||||
import cellpylib as cpl
|
import cellpylib as cpl
|
||||||
|
|
||||||
from ..factory import ProceduralDataset, register_dataset
|
from ..factory import ProceduralDataset, register_dataset
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class GameOfLifeConfig:
|
class GameOfLifeConfig:
|
||||||
"""Configuration for sudoku puzzle generation"""
|
"""Configuration for sudoku puzzle generation"""
|
||||||
|
|
||||||
grid_size_x: int = 20
|
grid_size_x: int = 20
|
||||||
grid_size_y: int = 20
|
grid_size_y: int = 20
|
||||||
filled_cells: int = 100 # actually a max
|
filled_cells: int = 100 # actually a max
|
||||||
simulation_steps: int = 1
|
simulation_steps: int = 1
|
||||||
seed: Optional[int] = None
|
seed: Optional[int] = None
|
||||||
size: int = 500
|
size: int = 500
|
||||||
|
|
@ -25,11 +26,12 @@ class GameOfLifeConfig:
|
||||||
assert self.filled_cells <= self.grid_size_x * self.grid_size_y, "filled_cells must fit in x times y"
|
assert self.filled_cells <= self.grid_size_x * self.grid_size_y, "filled_cells must fit in x times y"
|
||||||
|
|
||||||
|
|
||||||
class GameOfLifeConfigDataset(ProceduralDataset):
|
class GameOfLifeDataset(ProceduralDataset):
|
||||||
"""Generates Game of Life games with configurable parameters"""
|
"""Generates Game of Life games with configurable parameters"""
|
||||||
|
|
||||||
def __init__(self, config: GameOfLifeConfig):
|
def __init__(self, config: GameOfLifeConfig):
|
||||||
self._prompt_templates = ["What will this Game of Life board look like after {simulation_steps} steps of simulation?\n\n{board}"
|
self._prompt_templates = [
|
||||||
|
"What will this Game of Life board look like after {simulation_steps} steps of simulation?\n\n{board}"
|
||||||
]
|
]
|
||||||
|
|
||||||
super().__init__(config=config, seed=config.seed, size=config.size)
|
super().__init__(config=config, seed=config.seed, size=config.size)
|
||||||
|
|
@ -46,7 +48,7 @@ class GameOfLifeConfigDataset(ProceduralDataset):
|
||||||
rng = Random(self.seed + idx)
|
rng = Random(self.seed + idx)
|
||||||
|
|
||||||
# Make the board
|
# Make the board
|
||||||
board = cpl.init_simple2d(self.config.grid_size_x, self.config.grid_size_y)
|
board = cpl.init_simple2d(self.config.grid_size_x, self.config.grid_size_y)
|
||||||
board[:, :, :] = 0
|
board[:, :, :] = 0
|
||||||
|
|
||||||
# Add the cells
|
# Add the cells
|
||||||
|
|
@ -56,13 +58,17 @@ class GameOfLifeConfigDataset(ProceduralDataset):
|
||||||
board[:, rx, ry] = 1
|
board[:, rx, ry] = 1
|
||||||
|
|
||||||
# Simulate the result to get the answer
|
# Simulate the result to get the answer
|
||||||
evolved = cpl.evolve2d(board, timesteps=self.config.simulation_steps + 1, apply_rule=cpl.game_of_life_rule, memoize='recursive')
|
evolved = cpl.evolve2d(
|
||||||
|
board, timesteps=self.config.simulation_steps + 1, apply_rule=cpl.game_of_life_rule, memoize="recursive"
|
||||||
|
)
|
||||||
|
|
||||||
board_str = str(board[0])
|
board_str = str(board[0])
|
||||||
result_str = str(evolved[-1])
|
result_str = str(evolved[-1])
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"question": rng.choice(self._prompt_templates).format(simulation_steps=self.config.simulation_steps, board=board_str),
|
"question": rng.choice(self._prompt_templates).format(
|
||||||
|
simulation_steps=self.config.simulation_steps, board=board_str
|
||||||
|
),
|
||||||
"answer": result_str,
|
"answer": result_str,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"grid_size_x": self.config.grid_size_x,
|
"grid_size_x": self.config.grid_size_x,
|
||||||
|
|
@ -87,10 +93,10 @@ class GameOfLifeConfigDataset(ProceduralDataset):
|
||||||
|
|
||||||
if answer == None:
|
if answer == None:
|
||||||
return 0.0
|
return 0.0
|
||||||
if answer.replace('\n', '') != entry['answer'].replace('\n', ''):
|
if answer.replace("\n", "") != entry["answer"].replace("\n", ""):
|
||||||
return 0.01
|
return 0.01
|
||||||
else:
|
else:
|
||||||
return 1.0 # Yay
|
return 1.0 # Yay
|
||||||
|
|
||||||
|
|
||||||
register_dataset("game_of_life", GameOfLifeConfigDataset, GameOfLifeConfig)
|
register_dataset("game_of_life", GameOfLifeDataset, GameOfLifeConfig)
|
||||||
|
|
|
||||||
|
|
@ -32,7 +32,7 @@ def generate_gallery() -> str:
|
||||||
|
|
||||||
# Add dataset header with anchor
|
# Add dataset header with anchor
|
||||||
anchor = name.replace("_", "-").lower()
|
anchor = name.replace("_", "-").lower()
|
||||||
content.append(f"### {name} {{{anchor}}}\n")
|
content.append(f"### {name}\n")
|
||||||
|
|
||||||
# Get dataset class docstring if available
|
# Get dataset class docstring if available
|
||||||
if dataset.__class__.__doc__:
|
if dataset.__class__.__doc__:
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ import pytest
|
||||||
|
|
||||||
from reasoning_gym.code.bf import BFConfig, BFDataset
|
from reasoning_gym.code.bf import BFConfig, BFDataset
|
||||||
|
|
||||||
|
|
||||||
def test_bf():
|
def test_bf():
|
||||||
"""Test basic properties and solution of generated items"""
|
"""Test basic properties and solution of generated items"""
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,20 +1,14 @@
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from reasoning_gym.games.game_of_life import GameOfLifeConfig, GameOfLifeConfigDataset
|
from reasoning_gym.games.game_of_life import GameOfLifeConfig, GameOfLifeDataset
|
||||||
|
|
||||||
|
|
||||||
def test_game_of_life():
|
def test_game_of_life():
|
||||||
"""Test basic properties and solution of generated items"""
|
"""Test basic properties and solution of generated items"""
|
||||||
|
|
||||||
# Easy
|
# Easy
|
||||||
config = GameOfLifeConfig(
|
config = GameOfLifeConfig(seed=42, size=1, grid_size_x=20, grid_size_y=20, filled_cells=10, simulation_steps=1)
|
||||||
seed=42,
|
dataset = GameOfLifeDataset(config)
|
||||||
size=1,
|
|
||||||
grid_size_x=20,
|
|
||||||
grid_size_y=20,
|
|
||||||
filled_cells=10,
|
|
||||||
simulation_steps=1
|
|
||||||
)
|
|
||||||
dataset = GameOfLifeConfigDataset(config)
|
|
||||||
|
|
||||||
for item in dataset:
|
for item in dataset:
|
||||||
assert isinstance(item, dict)
|
assert isinstance(item, dict)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue