add contrib

2026-04-19 12:58:07 +00:00 · 2025-01-30 15:42:11 +01:00 · 2025-01-30 15:42:11 +01:00 · 0688cadf59
commit 0688cadf59
parent bf053e2266
20 changed files with 3971 additions and 0 deletions
--- a/reasoning_gym/code/contrib/bfit/Compiler/Parser.py
+++ b/reasoning_gym/code/contrib/bfit/Compiler/Parser.py
@ -0,0 +1,129 @@
+from .Exceptions import BFSyntaxError, BFSemanticError
+from .Token import Token
+from .General import is_token_literal
+
+
+class Parser:
+    """
+    Used to easily iterate tokens
+    """
+    def __init__(self, tokens):
+        self.tokens = tokens
+        self.current_token_index = 0
+
+    # parsing tokens
+    def current_token(self):
+        if self.current_token_index >= len(self.tokens):
+            return None
+        else:
+            return self.token_at_index(self.current_token_index)
+
+    def advance_token(self, amount=1):
+        self.current_token_index += amount
+
+    def advance_to_token_at_index(self, token_index):
+        self.current_token_index = token_index
+
+    def token_at_index(self, index):
+        assert index < len(self.tokens)
+        return self.tokens[index]
+
+    def next_token(self, next_amount=1):
+        return self.token_at_index(self.current_token_index + next_amount)
+
+    def find_matching(self, starting_index=None):
+        """
+        :return: the index of the token that matches the current token
+        :param starting_index (optional) - the index of the token we want to match
+
+        for example, if current token is {
+        it returns the index of the matching }
+        """
+        if starting_index is None:
+            starting_index = self.current_token_index
+
+        tokens = self.tokens
+        token_to_match = tokens[starting_index]
+        if token_to_match.type == Token.LBRACE:
+            inc = Token.LBRACE
+            dec = Token.RBRACE
+        elif token_to_match.type == Token.LBRACK:
+            inc = Token.LBRACK
+            dec = Token.RBRACK
+        elif token_to_match.type == Token.LPAREN:
+            inc = Token.LPAREN
+            dec = Token.RPAREN
+        else:
+            raise BFSemanticError("No support for matching %s" % str(token_to_match))
+
+        i = starting_index
+        cnt = 0
+        while i < len(tokens):
+            if tokens[i].type == inc:
+                cnt += 1
+            elif tokens[i].type == dec:
+                cnt -= 1
+
+            if cnt == 0:
+                return i
+
+            i += 1
+
+        raise BFSyntaxError("Did not find matching %s for %s" % (dec, str(token_to_match)))
+
+    def check_next_tokens_are(self, tokens_list, starting_index=None):
+        if starting_index is None:
+            starting_index = self.current_token_index
+
+        # used for "assertion" and print a nice message to the user
+        if starting_index + len(tokens_list) >= len(self.tokens):
+            raise BFSyntaxError("Expected %s after %s" % (str(tokens_list), str(self.tokens[starting_index])))
+        for i in range(0, len(tokens_list)):
+            if self.tokens[starting_index + 1 + i].type != tokens_list[i]:
+                raise BFSyntaxError("Expected %s after %s" % (str(tokens_list[i]), [str(t) for t in self.tokens[starting_index: starting_index+1+i]]))
+
+    def check_next_token_is(self, token, starting_index=None):
+        self.check_next_tokens_are([token], starting_index=starting_index)
+
+    def check_current_tokens_are(self, tokens_list):
+        self.check_next_tokens_are(tokens_list, starting_index=self.current_token_index - 1)
+
+    def check_current_token_is(self, token):
+        self.check_current_tokens_are([token])
+
+    def compile_array_initialization_list(self):
+        # {1, 2, 3, ...} or {array_initialization_list, array_initialization_list, array_initialization_list, ...} or string
+        # parses the definition and returns a list (of list of list ....) of literal tokens (NUM, CHAR, TRUE, FALSE)
+
+        list_tokens = []
+
+        if self.current_token().type == Token.STRING:
+            string_token = self.current_token()
+            line, column = string_token.line, string_token.column
+            for char in string_token.data:
+                list_tokens.append(Token(Token.NUM, line, column, str(ord(char))))
+
+            self.advance_token()  # point to after STRING
+            return list_tokens
+
+        assert self.current_token().type == Token.LBRACE
+        self.advance_token()  # skip to after LBRACE
+
+        while is_token_literal(self.current_token()) or self.current_token().type == Token.LBRACE:
+            if self.current_token().type == Token.LBRACE:  # list of (literals | list)
+                list_tokens.append(self.compile_array_initialization_list())
+            else:  # literal
+                list_tokens.append(self.current_token())
+                self.advance_token()  # skip literal
+
+            if self.current_token().type not in [Token.COMMA, Token.RBRACE]:
+                raise BFSyntaxError("Unexpected %s (expected comma (,) or RBRACE (}))" % self.current_token())
+
+            if self.current_token().type == Token.COMMA:
+                self.advance_token()  # skip comma
+            if self.current_token().type == Token.RBRACE:
+                break
+
+        self.check_current_token_is(Token.RBRACE)
+        self.advance_token()  # skip RBRACE
+        return list_tokens