reasoning-gym/reasoning_gym/code/contrib/bfit/Compiler/Parser.py

from .Exceptions import BFSemanticError, BFSyntaxError
from .General import is_token_literal
from .Token import Token


class Parser:
    """
    Used to easily iterate tokens
    """

    def __init__(self, tokens):
        self.tokens = tokens
        self.current_token_index = 0

    # parsing tokens
    def current_token(self):
        if self.current_token_index >= len(self.tokens):
            return None
        else:
            return self.token_at_index(self.current_token_index)

    def advance_token(self, amount=1):
        self.current_token_index += amount

    def advance_to_token_at_index(self, token_index):
        self.current_token_index = token_index

    def token_at_index(self, index):
        assert index < len(self.tokens)
        return self.tokens[index]

    def next_token(self, next_amount=1):
        return self.token_at_index(self.current_token_index + next_amount)

    def find_matching(self, starting_index=None):
        """
        :return: the index of the token that matches the current token
        :param starting_index (optional) - the index of the token we want to match

        for example, if current token is {
        it returns the index of the matching }
        """
        if starting_index is None:
            starting_index = self.current_token_index

        tokens = self.tokens
        token_to_match = tokens[starting_index]
        if token_to_match.type == Token.LBRACE:
            inc = Token.LBRACE
            dec = Token.RBRACE
        elif token_to_match.type == Token.LBRACK:
            inc = Token.LBRACK
            dec = Token.RBRACK
        elif token_to_match.type == Token.LPAREN:
            inc = Token.LPAREN
            dec = Token.RPAREN
        else:
            raise BFSemanticError("No support for matching %s" % str(token_to_match))

        i = starting_index
        cnt = 0
        while i < len(tokens):
            if tokens[i].type == inc:
                cnt += 1
            elif tokens[i].type == dec:
                cnt -= 1

            if cnt == 0:
                return i

            i += 1

        raise BFSyntaxError("Did not find matching %s for %s" % (dec, str(token_to_match)))

    def check_next_tokens_are(self, tokens_list, starting_index=None):
        if starting_index is None:
            starting_index = self.current_token_index

        # used for "assertion" and print a nice message to the user
        if starting_index + len(tokens_list) >= len(self.tokens):
            raise BFSyntaxError("Expected %s after %s" % (str(tokens_list), str(self.tokens[starting_index])))
        for i in range(0, len(tokens_list)):
            if self.tokens[starting_index + 1 + i].type != tokens_list[i]:
                raise BFSyntaxError(
                    "Expected %s after %s"
                    % (str(tokens_list[i]), [str(t) for t in self.tokens[starting_index : starting_index + 1 + i]])
                )

    def check_next_token_is(self, token, starting_index=None):
        self.check_next_tokens_are([token], starting_index=starting_index)

    def check_current_tokens_are(self, tokens_list):
        self.check_next_tokens_are(tokens_list, starting_index=self.current_token_index - 1)

    def check_current_token_is(self, token):
        self.check_current_tokens_are([token])

    def compile_array_initialization_list(self):
        # {1, 2, 3, ...} or {array_initialization_list, array_initialization_list, array_initialization_list, ...} or string
        # parses the definition and returns a list (of list of list ....) of literal tokens (NUM, CHAR, TRUE, FALSE)

        list_tokens = []

        if self.current_token().type == Token.STRING:
            string_token = self.current_token()
            line, column = string_token.line, string_token.column
            for char in string_token.data:
                list_tokens.append(Token(Token.NUM, line, column, str(ord(char))))

            self.advance_token()  # point to after STRING
            return list_tokens

        assert self.current_token().type == Token.LBRACE
        self.advance_token()  # skip to after LBRACE

        while is_token_literal(self.current_token()) or self.current_token().type == Token.LBRACE:
            if self.current_token().type == Token.LBRACE:  # list of (literals | list)
                list_tokens.append(self.compile_array_initialization_list())
            else:  # literal
                list_tokens.append(self.current_token())
                self.advance_token()  # skip literal

            if self.current_token().type not in [Token.COMMA, Token.RBRACE]:
                raise BFSyntaxError("Unexpected %s (expected comma (,) or RBRACE (}))" % self.current_token())

            if self.current_token().type == Token.COMMA:
                self.advance_token()  # skip comma
            if self.current_token().type == Token.RBRACE:
                break

        self.check_current_token_is(Token.RBRACE)
        self.advance_token()  # skip RBRACE
        return list_tokens