mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-19 12:58:07 +00:00
add contrib
This commit is contained in:
parent
bf053e2266
commit
0688cadf59
20 changed files with 3971 additions and 0 deletions
129
reasoning_gym/code/contrib/bfit/Compiler/Parser.py
Normal file
129
reasoning_gym/code/contrib/bfit/Compiler/Parser.py
Normal file
|
|
@ -0,0 +1,129 @@
|
|||
from .Exceptions import BFSyntaxError, BFSemanticError
|
||||
from .Token import Token
|
||||
from .General import is_token_literal
|
||||
|
||||
|
||||
class Parser:
|
||||
"""
|
||||
Used to easily iterate tokens
|
||||
"""
|
||||
def __init__(self, tokens):
|
||||
self.tokens = tokens
|
||||
self.current_token_index = 0
|
||||
|
||||
# parsing tokens
|
||||
def current_token(self):
|
||||
if self.current_token_index >= len(self.tokens):
|
||||
return None
|
||||
else:
|
||||
return self.token_at_index(self.current_token_index)
|
||||
|
||||
def advance_token(self, amount=1):
|
||||
self.current_token_index += amount
|
||||
|
||||
def advance_to_token_at_index(self, token_index):
|
||||
self.current_token_index = token_index
|
||||
|
||||
def token_at_index(self, index):
|
||||
assert index < len(self.tokens)
|
||||
return self.tokens[index]
|
||||
|
||||
def next_token(self, next_amount=1):
|
||||
return self.token_at_index(self.current_token_index + next_amount)
|
||||
|
||||
def find_matching(self, starting_index=None):
|
||||
"""
|
||||
:return: the index of the token that matches the current token
|
||||
:param starting_index (optional) - the index of the token we want to match
|
||||
|
||||
for example, if current token is {
|
||||
it returns the index of the matching }
|
||||
"""
|
||||
if starting_index is None:
|
||||
starting_index = self.current_token_index
|
||||
|
||||
tokens = self.tokens
|
||||
token_to_match = tokens[starting_index]
|
||||
if token_to_match.type == Token.LBRACE:
|
||||
inc = Token.LBRACE
|
||||
dec = Token.RBRACE
|
||||
elif token_to_match.type == Token.LBRACK:
|
||||
inc = Token.LBRACK
|
||||
dec = Token.RBRACK
|
||||
elif token_to_match.type == Token.LPAREN:
|
||||
inc = Token.LPAREN
|
||||
dec = Token.RPAREN
|
||||
else:
|
||||
raise BFSemanticError("No support for matching %s" % str(token_to_match))
|
||||
|
||||
i = starting_index
|
||||
cnt = 0
|
||||
while i < len(tokens):
|
||||
if tokens[i].type == inc:
|
||||
cnt += 1
|
||||
elif tokens[i].type == dec:
|
||||
cnt -= 1
|
||||
|
||||
if cnt == 0:
|
||||
return i
|
||||
|
||||
i += 1
|
||||
|
||||
raise BFSyntaxError("Did not find matching %s for %s" % (dec, str(token_to_match)))
|
||||
|
||||
def check_next_tokens_are(self, tokens_list, starting_index=None):
|
||||
if starting_index is None:
|
||||
starting_index = self.current_token_index
|
||||
|
||||
# used for "assertion" and print a nice message to the user
|
||||
if starting_index + len(tokens_list) >= len(self.tokens):
|
||||
raise BFSyntaxError("Expected %s after %s" % (str(tokens_list), str(self.tokens[starting_index])))
|
||||
for i in range(0, len(tokens_list)):
|
||||
if self.tokens[starting_index + 1 + i].type != tokens_list[i]:
|
||||
raise BFSyntaxError("Expected %s after %s" % (str(tokens_list[i]), [str(t) for t in self.tokens[starting_index: starting_index+1+i]]))
|
||||
|
||||
def check_next_token_is(self, token, starting_index=None):
|
||||
self.check_next_tokens_are([token], starting_index=starting_index)
|
||||
|
||||
def check_current_tokens_are(self, tokens_list):
|
||||
self.check_next_tokens_are(tokens_list, starting_index=self.current_token_index - 1)
|
||||
|
||||
def check_current_token_is(self, token):
|
||||
self.check_current_tokens_are([token])
|
||||
|
||||
def compile_array_initialization_list(self):
|
||||
# {1, 2, 3, ...} or {array_initialization_list, array_initialization_list, array_initialization_list, ...} or string
|
||||
# parses the definition and returns a list (of list of list ....) of literal tokens (NUM, CHAR, TRUE, FALSE)
|
||||
|
||||
list_tokens = []
|
||||
|
||||
if self.current_token().type == Token.STRING:
|
||||
string_token = self.current_token()
|
||||
line, column = string_token.line, string_token.column
|
||||
for char in string_token.data:
|
||||
list_tokens.append(Token(Token.NUM, line, column, str(ord(char))))
|
||||
|
||||
self.advance_token() # point to after STRING
|
||||
return list_tokens
|
||||
|
||||
assert self.current_token().type == Token.LBRACE
|
||||
self.advance_token() # skip to after LBRACE
|
||||
|
||||
while is_token_literal(self.current_token()) or self.current_token().type == Token.LBRACE:
|
||||
if self.current_token().type == Token.LBRACE: # list of (literals | list)
|
||||
list_tokens.append(self.compile_array_initialization_list())
|
||||
else: # literal
|
||||
list_tokens.append(self.current_token())
|
||||
self.advance_token() # skip literal
|
||||
|
||||
if self.current_token().type not in [Token.COMMA, Token.RBRACE]:
|
||||
raise BFSyntaxError("Unexpected %s (expected comma (,) or RBRACE (}))" % self.current_token())
|
||||
|
||||
if self.current_token().type == Token.COMMA:
|
||||
self.advance_token() # skip comma
|
||||
if self.current_token().type == Token.RBRACE:
|
||||
break
|
||||
|
||||
self.check_current_token_is(Token.RBRACE)
|
||||
self.advance_token() # skip RBRACE
|
||||
return list_tokens
|
||||
Loading…
Add table
Add a link
Reference in a new issue