mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-19 12:58:07 +00:00
133 lines
4.9 KiB
Python
133 lines
4.9 KiB
Python
from .Exceptions import BFSemanticError, BFSyntaxError
|
|
from .General import is_token_literal
|
|
from .Token import Token
|
|
|
|
|
|
class Parser:
|
|
"""
|
|
Used to easily iterate tokens
|
|
"""
|
|
|
|
def __init__(self, tokens):
|
|
self.tokens = tokens
|
|
self.current_token_index = 0
|
|
|
|
# parsing tokens
|
|
def current_token(self):
|
|
if self.current_token_index >= len(self.tokens):
|
|
return None
|
|
else:
|
|
return self.token_at_index(self.current_token_index)
|
|
|
|
def advance_token(self, amount=1):
|
|
self.current_token_index += amount
|
|
|
|
def advance_to_token_at_index(self, token_index):
|
|
self.current_token_index = token_index
|
|
|
|
def token_at_index(self, index):
|
|
assert index < len(self.tokens)
|
|
return self.tokens[index]
|
|
|
|
def next_token(self, next_amount=1):
|
|
return self.token_at_index(self.current_token_index + next_amount)
|
|
|
|
def find_matching(self, starting_index=None):
|
|
"""
|
|
:return: the index of the token that matches the current token
|
|
:param starting_index (optional) - the index of the token we want to match
|
|
|
|
for example, if current token is {
|
|
it returns the index of the matching }
|
|
"""
|
|
if starting_index is None:
|
|
starting_index = self.current_token_index
|
|
|
|
tokens = self.tokens
|
|
token_to_match = tokens[starting_index]
|
|
if token_to_match.type == Token.LBRACE:
|
|
inc = Token.LBRACE
|
|
dec = Token.RBRACE
|
|
elif token_to_match.type == Token.LBRACK:
|
|
inc = Token.LBRACK
|
|
dec = Token.RBRACK
|
|
elif token_to_match.type == Token.LPAREN:
|
|
inc = Token.LPAREN
|
|
dec = Token.RPAREN
|
|
else:
|
|
raise BFSemanticError("No support for matching %s" % str(token_to_match))
|
|
|
|
i = starting_index
|
|
cnt = 0
|
|
while i < len(tokens):
|
|
if tokens[i].type == inc:
|
|
cnt += 1
|
|
elif tokens[i].type == dec:
|
|
cnt -= 1
|
|
|
|
if cnt == 0:
|
|
return i
|
|
|
|
i += 1
|
|
|
|
raise BFSyntaxError("Did not find matching %s for %s" % (dec, str(token_to_match)))
|
|
|
|
def check_next_tokens_are(self, tokens_list, starting_index=None):
|
|
if starting_index is None:
|
|
starting_index = self.current_token_index
|
|
|
|
# used for "assertion" and print a nice message to the user
|
|
if starting_index + len(tokens_list) >= len(self.tokens):
|
|
raise BFSyntaxError("Expected %s after %s" % (str(tokens_list), str(self.tokens[starting_index])))
|
|
for i in range(0, len(tokens_list)):
|
|
if self.tokens[starting_index + 1 + i].type != tokens_list[i]:
|
|
raise BFSyntaxError(
|
|
"Expected %s after %s"
|
|
% (str(tokens_list[i]), [str(t) for t in self.tokens[starting_index : starting_index + 1 + i]])
|
|
)
|
|
|
|
def check_next_token_is(self, token, starting_index=None):
|
|
self.check_next_tokens_are([token], starting_index=starting_index)
|
|
|
|
def check_current_tokens_are(self, tokens_list):
|
|
self.check_next_tokens_are(tokens_list, starting_index=self.current_token_index - 1)
|
|
|
|
def check_current_token_is(self, token):
|
|
self.check_current_tokens_are([token])
|
|
|
|
def compile_array_initialization_list(self):
|
|
# {1, 2, 3, ...} or {array_initialization_list, array_initialization_list, array_initialization_list, ...} or string
|
|
# parses the definition and returns a list (of list of list ....) of literal tokens (NUM, CHAR, TRUE, FALSE)
|
|
|
|
list_tokens = []
|
|
|
|
if self.current_token().type == Token.STRING:
|
|
string_token = self.current_token()
|
|
line, column = string_token.line, string_token.column
|
|
for char in string_token.data:
|
|
list_tokens.append(Token(Token.NUM, line, column, str(ord(char))))
|
|
|
|
self.advance_token() # point to after STRING
|
|
return list_tokens
|
|
|
|
assert self.current_token().type == Token.LBRACE
|
|
self.advance_token() # skip to after LBRACE
|
|
|
|
while is_token_literal(self.current_token()) or self.current_token().type == Token.LBRACE:
|
|
if self.current_token().type == Token.LBRACE: # list of (literals | list)
|
|
list_tokens.append(self.compile_array_initialization_list())
|
|
else: # literal
|
|
list_tokens.append(self.current_token())
|
|
self.advance_token() # skip literal
|
|
|
|
if self.current_token().type not in [Token.COMMA, Token.RBRACE]:
|
|
raise BFSyntaxError("Unexpected %s (expected comma (,) or RBRACE (}))" % self.current_token())
|
|
|
|
if self.current_token().type == Token.COMMA:
|
|
self.advance_token() # skip comma
|
|
if self.current_token().type == Token.RBRACE:
|
|
break
|
|
|
|
self.check_current_token_is(Token.RBRACE)
|
|
self.advance_token() # skip RBRACE
|
|
return list_tokens
|