From c265215bc6f8a49d47f5bfa29962601302c0c2df Mon Sep 17 00:00:00 2001 From: Kyle Gunger Date: Fri, 4 Aug 2023 00:30:58 -0400 Subject: Add tests to tokenizer --- tnslc/simple.tnsl | 52 +++++++++--------- tnslc/tokenizer.tnsl | 146 ++++++++++++++++++++++++++++++++++++--------------- 2 files changed, 130 insertions(+), 68 deletions(-) diff --git a/tnslc/simple.tnsl b/tnslc/simple.tnsl index c0c1821..90ee758 100644 --- a/tnslc/simple.tnsl +++ b/tnslc/simple.tnsl @@ -1,34 +1,36 @@ -# testing file to try to reproduce bugs as best as possible +# No longer simple +:include "c_wrap.tnsl" -# 8x4 bytes -struct AddressError { - uint a, b, c, d +enum LOL [int] { + A = 1, + B = 2, + C = 4 } -/; method AddressError - /; if_check [bool] - /; if (self.a < 0 && self.b !< self.d / 4) - return true - ;; else if (self.c == 0) - return false - ;/ - return self.d != 1 - ;/ +struct Lolbert { + int a, b, c, + + ~uint8 stuff +} + +/; lolbert1 (~Lolbert l, uint8 check) [bool] + return l`.a == check ;/ -/; main (int argc, ~~uint argv) [int] - # On windows, the first two arguments are passed in RCX and RDX, so we need to - # update their positions here or else tnsl will have garbage values in r8 and r9 - asm "mov r8, rcx" - asm "mov r9, rdx" - AddressError ae - ae.a = ae.b = ae.c = ae.d = 0 +/; main [int] + Lolbert lol + lol.a = 1 + lol.b = 4 + lol.c = 3 + lol.stuff = _alloc(2) + lol.stuff{0} = 2 + lol.stuff{1} = 1 - /; if (ae.if_check()) - return 1 + /; if (lolbert1(~lol, lol.stuff{1}) && lolbert1(~lol, lol.stuff{0})) + _delete(lol.stuff) + return lol.a + lol.b ;/ - + _delete(lol.stuff) return 0 -;/ - +;/ \ No newline at end of file diff --git a/tnslc/tokenizer.tnsl b/tnslc/tokenizer.tnsl index ae52468..3de0182 100644 --- a/tnslc/tokenizer.tnsl +++ b/tnslc/tokenizer.tnsl @@ -55,7 +55,6 @@ struct Token { # Remove the last character from this token /; pop int ln = cstr_len(self.data) - self.data = _realloc(self.data, ln) self.data{ln - 1} = 0 ;/ @@ -72,7 +71,7 @@ struct Token { # Delete the memory associated with this token /; _del - _delete(self.data) + _realloc(self.data, 0) ;/ # length of the string that this token encodes @@ -106,13 +105,10 @@ struct Token { # Returns true if the token is a valid reserved token /; tok_reserved (Token tok) [bool] - log_one_nl('i') /; if (tok._len() == 1) return is_reserved(tok.data{0}) ;; else if (tok._len() == 2) - bool a = contains_char(~MULTI_PARENS, tok.data{0}) - bool b = contains_char(~MULTI_PARENS, tok.data{1}) - return a && b + return contains_char(~MULTI_PARENS, tok.data{0}) && contains_char(~MULTI_PARENS, tok.data{1}) ;/ return in_csv(~CSV_AUGMENTS, tok.data) @@ -125,25 +121,22 @@ struct Token { ;/ /; get_tok_type(Token tok) [uint] - log_one_nl('h') /; if (tok_reserved(tok) == true) /; if (tok._len() > 1) - bool a = contains_char(~MULTI_PARENS, tok.data{0}) - bool b = contains_char(~MULTI_PARENS, tok.data{1}) - /; if (a && b) + /; if (contains_char(~MULTI_PARENS, tok.data{0}) && contains_char(~MULTI_PARENS, tok.data{1})) return TOKEN_TYPE.DELIMITER ;/ return TOKEN_TYPE.AUGMENT - ;; else if (contains_char(~PARENS, tok.data{0})) + ;; else if (contains_char(~PARENS, tok.data{0}) == true) return TOKEN_TYPE.DELIMITER - ;; else if (contains_char(~SEPS, tok.data{0})) + ;; else if (contains_char(~SEPS, tok.data{0}) == true) return TOKEN_TYPE.SEPARATOR - ;; else if (contains_char(~AUGMENTS, tok.data{0})) + ;; else if (contains_char(~AUGMENTS, tok.data{0}) == true) return TOKEN_TYPE.AUGMENT ;/ - ;; else if (in_csv(~CSV_KEYWORDS, tok.data)) + ;; else if (in_csv(~CSV_KEYWORDS, tok.data) == true) return TOKEN_TYPE.KEYWORD - ;; else if (in_csv(~CSV_KEYTYPES, tok.data)) + ;; else if (in_csv(~CSV_KEYTYPES, tok.data) == true) return TOKEN_TYPE.KEYTYPE ;; else if (tok_literal(tok) == true) return TOKEN_TYPE.LITERAL @@ -153,27 +146,14 @@ struct Token { ;/ -/; break_token(Token tok, uint8 c) [bool] - log_one('g') - log_one(' ') - log_one_nl(c) +/; break_token(~Token tok, uint8 c) [bool] # return true - uint type_before = get_tok_type(tok) - tok.append(c) - uint type_after = get_tok_type(tok) - tok.pop() - log_one('g') - bool a = type_before !== TOKEN_TYPE.DEFWORD && type_before != TOKEN_TYPE.KEYTYPE && type_before != TOKEN_TYPE.KEYWORD && type_after == TOKEN_TYPE.DEFWORD - log_one(' ') - log_one('[') - log_one(a) - log_one(']') - bool b = type_after !== TOKEN_TYPE.LITERAL && is_whitespace(c) == true - log_one(' ') - log_one('[') - log_one(b) - log_one_nl(']') - return a || b + uint type_before = get_tok_type(tok`) + tok`.append(c) + uint type_after = get_tok_type(tok`) + tok`.pop() + bool a = true + return a ;/ /; tokenize_file (~void file_in, file_out) @@ -191,7 +171,6 @@ struct Token { break ;/ - log_one_nl('b') /; if (buf == '#') /; loop (_read_byte(file_in, ~buf, ~read_count)) @@ -200,32 +179,26 @@ struct Token { ;/ ;/ - ;; else if (break_token(tmp, buf) == true) - log_one_nl('c') + ;; else if (break_token(~tmp, buf) == true) /; if (tmp._len() > 0) - log_one_nl('d') print_token(tmp, file_out) ;/ tmp._del() tmp.start() /; if (is_whitespace(buf) == false) - log_one_nl('e') tmp.append(buf) ;; else if (buf == WHITESPACE{2}) - log_one_nl('f') tmp.append(WHITESPACE{2}) print_token(tmp, file_out) tmp._del() tmp.start() ;/ - log_one_nl('c') ;; else tmp.append(buf) ;/ - log_one_nl('b') read_count = 0 ;/ @@ -235,3 +208,90 @@ struct Token { tmp._del() ;/ + +{}uint8 w_SEP = "SEPARATOR\n\0" +{}uint8 w_DEL = "DELIMITER\n\0" +{}uint8 w_AUG = "AUGMENT\n\0" +{}uint8 w_KTP = "KEYTYPE\n\0" +{}uint8 w_KWD = "KEYWORD\n\0" +{}uint8 w_LIT = "LITERAL\n\0" +{}uint8 w_DEF = "DEFWORD\n\0" + +/; print_tok_type(uint tt) + + ~uint8 ptr = ~w_DEF{0} + + /; if (tt == TOKEN_TYPE.SEPARATOR) + ptr = ~w_SEP{0} + ;; else if (tt == TOKEN_TYPE.DELIMITER) + ptr = ~w_DEL{0} + ;; else if (tt == TOKEN_TYPE.AUGMENT) + ptr = ~w_AUG{0} + ;; else if (tt == TOKEN_TYPE.KEYTYPE) + ptr = ~w_KTP{0} + ;; else if (tt == TOKEN_TYPE.KEYWORD) + ptr = ~w_KWD{0} + ;; else if (tt == TOKEN_TYPE.LITERAL) + ptr = ~w_LIT{0} + ;; else if (tt == TOKEN_TYPE.DEFWORD) + ptr = ~w_DEF{0} + ;/ + + _printf(ptr) + +;/ + +{}uint8 test_multi = "/;\0" +{}uint8 test_paren = "(\0" +{}uint8 test_seps = ",\0" +{}uint8 test_aug = ".\0" +{}uint8 test_maug = "++\0" +{}uint8 test_mkw = "if\0" +{}uint8 test_mkt = "bool\0" +{}uint8 test_def = "main\0" +{}uint8 space = " \0" + +/; tests + Token tk + + # Delimiter + tk.data = ~test_multi{0} + _printf(tk.data) + _printf(~space{0}) + print_tok_type(get_tok_type(tk)) + + tk.data = ~test_paren{0} + _printf(tk.data) + _printf(~space{0}) + print_tok_type(get_tok_type(tk)) + + tk.data = ~test_seps{0} + _printf(tk.data) + _printf(~space{0}) + print_tok_type(get_tok_type(tk)) + + tk.data = ~test_aug{0} + _printf(tk.data) + _printf(~space{0}) + print_tok_type(get_tok_type(tk)) + + tk.data = ~test_maug{0} + _printf(tk.data) + _printf(~space{0}) + print_tok_type(get_tok_type(tk)) + + tk.data = ~test_mkw{0} + _printf(tk.data) + _printf(~space{0}) + print_tok_type(get_tok_type(tk)) + + tk.data = ~test_mkt{0} + _printf(tk.data) + _printf(~space{0}) + print_tok_type(get_tok_type(tk)) + + tk.data = ~test_def{0} + _printf(tk.data) + _printf(~space{0}) + print_tok_type(get_tok_type(tk)) +;/ -- cgit v1.2.3