mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-19 12:58:07 +00:00
lint
This commit is contained in:
parent
21c47db6c1
commit
ebb88e6c6a
24 changed files with 1215 additions and 814 deletions
|
|
@ -1,6 +1,7 @@
|
|||
import re
|
||||
from .Token import Token
|
||||
|
||||
from .Optimizer import optimize
|
||||
from .Token import Token
|
||||
|
||||
|
||||
class LexicalErrorException(Exception):
|
||||
|
|
@ -14,64 +15,59 @@ def analyze(text):
|
|||
"""
|
||||
|
||||
rules = [
|
||||
('\s+', Token.WHITESPACE),
|
||||
('void', Token.VOID),
|
||||
('int', Token.INT),
|
||||
('bool', Token.INT), # treat bool as int
|
||||
('char', Token.INT), # treat char as int
|
||||
|
||||
('true', Token.TRUE),
|
||||
('false', Token.FALSE),
|
||||
('&&', Token.AND),
|
||||
('\|\|', Token.OR),
|
||||
('\!', Token.NOT),
|
||||
('return', Token.RETURN),
|
||||
('if', Token.IF),
|
||||
('else', Token.ELSE),
|
||||
('while', Token.WHILE),
|
||||
('for', Token.FOR),
|
||||
('do', Token.DO),
|
||||
('print', Token.PRINT),
|
||||
('switch', Token.SWITCH),
|
||||
('case', Token.CASE),
|
||||
('default', Token.DEFAULT),
|
||||
('break', Token.BREAK),
|
||||
('continue', Token.CONTINUE), # todo
|
||||
(':', Token.COLON),
|
||||
(';', Token.SEMICOLON),
|
||||
(',', Token.COMMA),
|
||||
|
||||
('\(', Token.LPAREN),
|
||||
('\)', Token.RPAREN),
|
||||
('\{', Token.LBRACE),
|
||||
('\}', Token.RBRACE),
|
||||
('\[', Token.LBRACK),
|
||||
('\]', Token.RBRACK),
|
||||
('=|\+=|-=|\*=|/=|%=|<<=|>>=|&=|\|=|\^=', Token.ASSIGN),
|
||||
('\?', Token.TERNARY),
|
||||
|
||||
('<=|>=|==|!=|<|>', Token.RELOP),
|
||||
('\+\+', Token.INCREMENT),
|
||||
('--', Token.DECREMENT),
|
||||
('\+|-|\*|/|%', Token.BINOP),
|
||||
('\*\*|//|%%', Token.UNARY_MULTIPLICATIVE),
|
||||
|
||||
('<<|>>', Token.BITWISE_SHIFT),
|
||||
('~', Token.BITWISE_NOT),
|
||||
('&', Token.BITWISE_AND),
|
||||
('\|', Token.BITWISE_OR),
|
||||
('\^', Token.BITWISE_XOR),
|
||||
|
||||
('([a-zA-Z_][a-zA-Z0-9_]*)', Token.ID),
|
||||
('(\d+)', Token.NUM),
|
||||
('(0x[A-Fa-f\d]+)', Token.NUM), # hexadecimal number
|
||||
('(0o[0-7]+)', Token.NUM), # octal number
|
||||
('(0b[01]+)', Token.NUM), # binary number
|
||||
(r'\"(\\\"|[^"])*"', Token.STRING),
|
||||
(r'\'(\\\'|(\\)?[^\'])\'', Token.CHAR),
|
||||
('//.*(\\n|$)', Token.COMMENT),
|
||||
(r'/\*[\s\S]*?\*/', Token.COMMENT), # multiline comments
|
||||
('.', Token.UNIDENTIFIED)
|
||||
(r"\s+", Token.WHITESPACE),
|
||||
("void", Token.VOID),
|
||||
("int", Token.INT),
|
||||
("bool", Token.INT), # treat bool as int
|
||||
("char", Token.INT), # treat char as int
|
||||
("true", Token.TRUE),
|
||||
("false", Token.FALSE),
|
||||
("&&", Token.AND),
|
||||
(r"\|\|", Token.OR),
|
||||
(r"\!", Token.NOT),
|
||||
("return", Token.RETURN),
|
||||
("if", Token.IF),
|
||||
("else", Token.ELSE),
|
||||
("while", Token.WHILE),
|
||||
("for", Token.FOR),
|
||||
("do", Token.DO),
|
||||
("print", Token.PRINT),
|
||||
("switch", Token.SWITCH),
|
||||
("case", Token.CASE),
|
||||
("default", Token.DEFAULT),
|
||||
("break", Token.BREAK),
|
||||
("continue", Token.CONTINUE), # todo
|
||||
(":", Token.COLON),
|
||||
(";", Token.SEMICOLON),
|
||||
(",", Token.COMMA),
|
||||
(r"\(", Token.LPAREN),
|
||||
(r"\)", Token.RPAREN),
|
||||
(r"\{", Token.LBRACE),
|
||||
(r"\}", Token.RBRACE),
|
||||
(r"\[", Token.LBRACK),
|
||||
(r"\]", Token.RBRACK),
|
||||
(r"=|\+=|-=|\*=|/=|%=|<<=|>>=|&=|\|=|\^=", Token.ASSIGN),
|
||||
(r"\?", Token.TERNARY),
|
||||
(r"<=|>=|==|!=|<|>", Token.RELOP),
|
||||
(r"\+\+", Token.INCREMENT),
|
||||
("--", Token.DECREMENT),
|
||||
(r"\+|-|\*|/|%", Token.BINOP),
|
||||
(r"\*\*|//|%%", Token.UNARY_MULTIPLICATIVE),
|
||||
("<<|>>", Token.BITWISE_SHIFT),
|
||||
("~", Token.BITWISE_NOT),
|
||||
("&", Token.BITWISE_AND),
|
||||
(r"\|", Token.BITWISE_OR),
|
||||
(r"\^", Token.BITWISE_XOR),
|
||||
("([a-zA-Z_][a-zA-Z0-9_]*)", Token.ID),
|
||||
(r"(\d+)", Token.NUM),
|
||||
(r"(0x[A-Fa-f\d]+)", Token.NUM), # hexadecimal number
|
||||
("(0o[0-7]+)", Token.NUM), # octal number
|
||||
("(0b[01]+)", Token.NUM), # binary number
|
||||
(r'\"(\\\"|[^"])*"', Token.STRING),
|
||||
(r"\'(\\\'|(\\)?[^\'])\'", Token.CHAR),
|
||||
("//.*(\\n|$)", Token.COMMENT),
|
||||
(r"/\*[\s\S]*?\*/", Token.COMMENT), # multiline comments
|
||||
(".", Token.UNIDENTIFIED),
|
||||
]
|
||||
|
||||
rules = [(re.compile(r), t) for r, t in rules]
|
||||
|
|
@ -79,7 +75,7 @@ def analyze(text):
|
|||
tokens = []
|
||||
|
||||
# create a mapping of [line number] to [offset of that line from the beginning of the text]
|
||||
newline = re.compile('\n')
|
||||
newline = re.compile("\n")
|
||||
lines = [0] + [m.end() for m in re.finditer(newline, text)]
|
||||
|
||||
i = 0
|
||||
|
|
@ -99,12 +95,12 @@ def analyze(text):
|
|||
|
||||
# calculate line and column
|
||||
line, column = None, None
|
||||
for line_idx in range(len(lines)-1):
|
||||
if lines[line_idx] <= longest_match.start() < lines[line_idx+1]:
|
||||
line, column = line_idx+1, (longest_match.start() - lines[line_idx])+1 # humans count from 1 :)
|
||||
for line_idx in range(len(lines) - 1):
|
||||
if lines[line_idx] <= longest_match.start() < lines[line_idx + 1]:
|
||||
line, column = line_idx + 1, (longest_match.start() - lines[line_idx]) + 1 # humans count from 1 :)
|
||||
break
|
||||
if not line:
|
||||
line, column = len(lines), (longest_match.start() - lines[-1])+1
|
||||
line, column = len(lines), (longest_match.start() - lines[-1]) + 1
|
||||
|
||||
if matched_token in [Token.COMMENT, Token.WHITESPACE]:
|
||||
pass # do nothing
|
||||
|
|
@ -112,8 +108,18 @@ def analyze(text):
|
|||
raise LexicalErrorException("Unidentified Character '%s' (line %s column %s)" % (text[i], line, column))
|
||||
elif matched_token in [Token.STRING, Token.CHAR]:
|
||||
# remove quotes at beginning and end, un-escape characters
|
||||
tokens.append(Token(matched_token, line, column, longest_match.group()[1:-1].encode("utf8").decode("unicode_escape")))
|
||||
elif matched_token in [Token.NUM, Token.ID, Token.BINOP, Token.RELOP, Token.ASSIGN, Token.UNARY_MULTIPLICATIVE, Token.BITWISE_SHIFT]:
|
||||
tokens.append(
|
||||
Token(matched_token, line, column, longest_match.group()[1:-1].encode("utf8").decode("unicode_escape"))
|
||||
)
|
||||
elif matched_token in [
|
||||
Token.NUM,
|
||||
Token.ID,
|
||||
Token.BINOP,
|
||||
Token.RELOP,
|
||||
Token.ASSIGN,
|
||||
Token.UNARY_MULTIPLICATIVE,
|
||||
Token.BITWISE_SHIFT,
|
||||
]:
|
||||
tokens.append(Token(matched_token, line, column, longest_match.group()))
|
||||
else:
|
||||
tokens.append(Token(matched_token, line, column))
|
||||
|
|
@ -128,16 +134,40 @@ def tests():
|
|||
text = "my international int ; int; pints; international;"
|
||||
res = analyze(text)
|
||||
|
||||
expected = [Token.ID, Token.ID, Token.INT, Token.SEMICOLON, Token.INT, Token.SEMICOLON, Token.ID,
|
||||
Token.SEMICOLON, Token.ID, Token.SEMICOLON]
|
||||
expected = [
|
||||
Token.ID,
|
||||
Token.ID,
|
||||
Token.INT,
|
||||
Token.SEMICOLON,
|
||||
Token.INT,
|
||||
Token.SEMICOLON,
|
||||
Token.ID,
|
||||
Token.SEMICOLON,
|
||||
Token.ID,
|
||||
Token.SEMICOLON,
|
||||
]
|
||||
assert len(res) == len(expected) and all(res[i].type == expected[i] for i in range(len(res)))
|
||||
|
||||
def test2():
|
||||
text = "true !||!false falsek k||y+-a&&x"
|
||||
res = analyze(text)
|
||||
|
||||
expected = [Token.TRUE, Token.NOT, Token.OR, Token.NOT, Token.FALSE, Token.ID, Token.ID, Token.OR, Token.ID,
|
||||
Token.BINOP, Token.BINOP, Token.ID, Token.AND, Token.ID]
|
||||
expected = [
|
||||
Token.TRUE,
|
||||
Token.NOT,
|
||||
Token.OR,
|
||||
Token.NOT,
|
||||
Token.FALSE,
|
||||
Token.ID,
|
||||
Token.ID,
|
||||
Token.OR,
|
||||
Token.ID,
|
||||
Token.BINOP,
|
||||
Token.BINOP,
|
||||
Token.ID,
|
||||
Token.AND,
|
||||
Token.ID,
|
||||
]
|
||||
assert len(res) == len(expected) and all(res[i].type == expected[i] for i in range(len(res)))
|
||||
|
||||
def test3():
|
||||
|
|
@ -166,9 +196,29 @@ def tests():
|
|||
# test all arithmetic operations
|
||||
text = "(1+2*3/6)+(1%3)*(6-1)"
|
||||
tokens = analyze(text)
|
||||
expected = [Token.LPAREN, Token.NUM, Token.BINOP, Token.NUM, Token.BINOP, Token.NUM, Token.BINOP, Token.NUM,
|
||||
Token.RPAREN, Token.BINOP, Token.LPAREN, Token.NUM, Token.BINOP, Token.NUM, Token.RPAREN,
|
||||
Token.BINOP, Token.LPAREN, Token.NUM, Token.BINOP, Token.NUM, Token.RPAREN]
|
||||
expected = [
|
||||
Token.LPAREN,
|
||||
Token.NUM,
|
||||
Token.BINOP,
|
||||
Token.NUM,
|
||||
Token.BINOP,
|
||||
Token.NUM,
|
||||
Token.BINOP,
|
||||
Token.NUM,
|
||||
Token.RPAREN,
|
||||
Token.BINOP,
|
||||
Token.LPAREN,
|
||||
Token.NUM,
|
||||
Token.BINOP,
|
||||
Token.NUM,
|
||||
Token.RPAREN,
|
||||
Token.BINOP,
|
||||
Token.LPAREN,
|
||||
Token.NUM,
|
||||
Token.BINOP,
|
||||
Token.NUM,
|
||||
Token.RPAREN,
|
||||
]
|
||||
assert len(tokens) == len(expected) and all(tokens[i].type == expected[i] for i in range(len(tokens)))
|
||||
optimize(tokens)
|
||||
assert tokens[1].data == "2" and tokens[5].data == "1" and tokens[9].data == "5"
|
||||
|
|
@ -179,5 +229,5 @@ def tests():
|
|||
test3()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
tests()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue