reasoning-gym/reasoning_gym/code/contrib/bfit/Compiler/Parser.py
Andreas Koepf ebb88e6c6a lint
2025-01-30 22:55:04 +01:00

133 lines
4.9 KiB
Python

from .Exceptions import BFSemanticError, BFSyntaxError
from .General import is_token_literal
from .Token import Token
class Parser:
"""
Used to easily iterate tokens
"""
def __init__(self, tokens):
self.tokens = tokens
self.current_token_index = 0
# parsing tokens
def current_token(self):
if self.current_token_index >= len(self.tokens):
return None
else:
return self.token_at_index(self.current_token_index)
def advance_token(self, amount=1):
self.current_token_index += amount
def advance_to_token_at_index(self, token_index):
self.current_token_index = token_index
def token_at_index(self, index):
assert index < len(self.tokens)
return self.tokens[index]
def next_token(self, next_amount=1):
return self.token_at_index(self.current_token_index + next_amount)
def find_matching(self, starting_index=None):
"""
:return: the index of the token that matches the current token
:param starting_index (optional) - the index of the token we want to match
for example, if current token is {
it returns the index of the matching }
"""
if starting_index is None:
starting_index = self.current_token_index
tokens = self.tokens
token_to_match = tokens[starting_index]
if token_to_match.type == Token.LBRACE:
inc = Token.LBRACE
dec = Token.RBRACE
elif token_to_match.type == Token.LBRACK:
inc = Token.LBRACK
dec = Token.RBRACK
elif token_to_match.type == Token.LPAREN:
inc = Token.LPAREN
dec = Token.RPAREN
else:
raise BFSemanticError("No support for matching %s" % str(token_to_match))
i = starting_index
cnt = 0
while i < len(tokens):
if tokens[i].type == inc:
cnt += 1
elif tokens[i].type == dec:
cnt -= 1
if cnt == 0:
return i
i += 1
raise BFSyntaxError("Did not find matching %s for %s" % (dec, str(token_to_match)))
def check_next_tokens_are(self, tokens_list, starting_index=None):
if starting_index is None:
starting_index = self.current_token_index
# used for "assertion" and print a nice message to the user
if starting_index + len(tokens_list) >= len(self.tokens):
raise BFSyntaxError("Expected %s after %s" % (str(tokens_list), str(self.tokens[starting_index])))
for i in range(0, len(tokens_list)):
if self.tokens[starting_index + 1 + i].type != tokens_list[i]:
raise BFSyntaxError(
"Expected %s after %s"
% (str(tokens_list[i]), [str(t) for t in self.tokens[starting_index : starting_index + 1 + i]])
)
def check_next_token_is(self, token, starting_index=None):
self.check_next_tokens_are([token], starting_index=starting_index)
def check_current_tokens_are(self, tokens_list):
self.check_next_tokens_are(tokens_list, starting_index=self.current_token_index - 1)
def check_current_token_is(self, token):
self.check_current_tokens_are([token])
def compile_array_initialization_list(self):
# {1, 2, 3, ...} or {array_initialization_list, array_initialization_list, array_initialization_list, ...} or string
# parses the definition and returns a list (of list of list ....) of literal tokens (NUM, CHAR, TRUE, FALSE)
list_tokens = []
if self.current_token().type == Token.STRING:
string_token = self.current_token()
line, column = string_token.line, string_token.column
for char in string_token.data:
list_tokens.append(Token(Token.NUM, line, column, str(ord(char))))
self.advance_token() # point to after STRING
return list_tokens
assert self.current_token().type == Token.LBRACE
self.advance_token() # skip to after LBRACE
while is_token_literal(self.current_token()) or self.current_token().type == Token.LBRACE:
if self.current_token().type == Token.LBRACE: # list of (literals | list)
list_tokens.append(self.compile_array_initialization_list())
else: # literal
list_tokens.append(self.current_token())
self.advance_token() # skip literal
if self.current_token().type not in [Token.COMMA, Token.RBRACE]:
raise BFSyntaxError("Unexpected %s (expected comma (,) or RBRACE (}))" % self.current_token())
if self.current_token().type == Token.COMMA:
self.advance_token() # skip comma
if self.current_token().type == Token.RBRACE:
break
self.check_current_token_is(Token.RBRACE)
self.advance_token() # skip RBRACE
return list_tokens