From c265215bc6f8a49d47f5bfa29962601302c0c2df Mon Sep 17 00:00:00 2001
From: Kyle Gunger <kgunger12@gmail.com>
Date: Fri, 4 Aug 2023 00:30:58 -0400
Subject: Add tests to tokenizer

---
 tnslc/simple.tnsl    |  52 +++++++++---------
 tnslc/tokenizer.tnsl | 146 ++++++++++++++++++++++++++++++++++++---------------
 2 files changed, 130 insertions(+), 68 deletions(-)

diff --git a/tnslc/simple.tnsl b/tnslc/simple.tnsl
index c0c1821..90ee758 100644
--- a/tnslc/simple.tnsl
+++ b/tnslc/simple.tnsl
@@ -1,34 +1,36 @@
-# testing file to try to reproduce bugs as best as possible
+# No longer simple
+:include "c_wrap.tnsl"
 
-# 8x4 bytes
-struct AddressError {
-    uint a, b, c, d
+enum LOL [int] {
+    A = 1,
+    B = 2,
+    C = 4
 }
 
-/; method AddressError
-    /; if_check [bool]
-        /; if (self.a < 0 && self.b !< self.d / 4)
-            return true
-        ;; else if (self.c == 0)
-            return false
-        ;/
-        return self.d != 1
-    ;/
+struct Lolbert {
+    int a, b, c,
+
+    ~uint8 stuff
+}
+
+/; lolbert1 (~Lolbert l, uint8 check) [bool]
+    return l`.a == check
 ;/
 
-/; main (int argc, ~~uint argv) [int]
-    # On windows, the first two arguments are passed in RCX and RDX, so we need to
-    # update their positions here or else tnsl will have garbage values in r8 and r9
-    asm "mov r8, rcx"
-    asm "mov r9, rdx"
 
-    AddressError ae
-    ae.a = ae.b = ae.c = ae.d = 0
+/; main [int]
+    Lolbert lol
+    lol.a = 1
+    lol.b = 4
+    lol.c = 3
+    lol.stuff = _alloc(2)
+    lol.stuff{0} = 2
+    lol.stuff{1} = 1
 
-    /; if (ae.if_check())
-        return 1
+    /; if (lolbert1(~lol, lol.stuff{1}) && lolbert1(~lol, lol.stuff{0}))
+        _delete(lol.stuff)
+        return lol.a + lol.b
     ;/
-    
+    _delete(lol.stuff)
     return 0
-;/
-
+;/
\ No newline at end of file
diff --git a/tnslc/tokenizer.tnsl b/tnslc/tokenizer.tnsl
index ae52468..3de0182 100644
--- a/tnslc/tokenizer.tnsl
+++ b/tnslc/tokenizer.tnsl
@@ -55,7 +55,6 @@ struct Token {
     # Remove the last character from this token
     /; pop
         int ln = cstr_len(self.data)
-        self.data = _realloc(self.data, ln)
         self.data{ln - 1} = 0
     ;/
 
@@ -72,7 +71,7 @@ struct Token {
 
     # Delete the memory associated with this token
     /; _del
-        _delete(self.data)
+        _realloc(self.data, 0)
     ;/
 
     # length of the string that this token encodes
@@ -106,13 +105,10 @@ struct Token {
 
 # Returns true if the token is a valid reserved token
 /; tok_reserved (Token tok) [bool]
-    log_one_nl('i')
     /; if (tok._len() == 1)
         return is_reserved(tok.data{0})
     ;; else if (tok._len() == 2)
-        bool a = contains_char(~MULTI_PARENS, tok.data{0})
-        bool b = contains_char(~MULTI_PARENS, tok.data{1})
-        return a && b
+        return contains_char(~MULTI_PARENS, tok.data{0}) && contains_char(~MULTI_PARENS, tok.data{1})
     ;/
 
     return in_csv(~CSV_AUGMENTS, tok.data)
@@ -125,25 +121,22 @@ struct Token {
 ;/
 
 /; get_tok_type(Token tok) [uint]
-    log_one_nl('h')
     /; if (tok_reserved(tok) == true)
         /; if (tok._len() > 1)
-            bool a = contains_char(~MULTI_PARENS, tok.data{0})
-            bool b = contains_char(~MULTI_PARENS, tok.data{1})
-            /; if (a && b)
+            /; if (contains_char(~MULTI_PARENS, tok.data{0}) && contains_char(~MULTI_PARENS, tok.data{1}))
                 return TOKEN_TYPE.DELIMITER
             ;/
             return TOKEN_TYPE.AUGMENT
-        ;; else if (contains_char(~PARENS, tok.data{0}))
+        ;; else if (contains_char(~PARENS, tok.data{0}) == true)
             return TOKEN_TYPE.DELIMITER
-        ;; else if (contains_char(~SEPS, tok.data{0}))
+        ;; else if (contains_char(~SEPS, tok.data{0}) == true)
             return TOKEN_TYPE.SEPARATOR
-        ;; else if (contains_char(~AUGMENTS, tok.data{0}))
+        ;; else if (contains_char(~AUGMENTS, tok.data{0}) == true)
             return TOKEN_TYPE.AUGMENT
         ;/
-    ;; else if (in_csv(~CSV_KEYWORDS, tok.data))
+    ;; else if (in_csv(~CSV_KEYWORDS, tok.data) == true)
         return TOKEN_TYPE.KEYWORD
-    ;; else if (in_csv(~CSV_KEYTYPES, tok.data))
+    ;; else if (in_csv(~CSV_KEYTYPES, tok.data) == true)
         return TOKEN_TYPE.KEYTYPE
     ;; else if (tok_literal(tok) == true)
         return TOKEN_TYPE.LITERAL
@@ -153,27 +146,14 @@ struct Token {
 ;/
 
 
-/; break_token(Token tok, uint8 c) [bool]
-    log_one('g')
-    log_one(' ')
-    log_one_nl(c)
+/; break_token(~Token tok, uint8 c) [bool]
     # return true
-    uint type_before = get_tok_type(tok)
-    tok.append(c)
-    uint type_after = get_tok_type(tok)
-    tok.pop()
-    log_one('g')
-    bool a = type_before !== TOKEN_TYPE.DEFWORD && type_before != TOKEN_TYPE.KEYTYPE && type_before != TOKEN_TYPE.KEYWORD && type_after == TOKEN_TYPE.DEFWORD
-    log_one(' ')
-    log_one('[')
-    log_one(a)
-    log_one(']')
-    bool b = type_after !== TOKEN_TYPE.LITERAL && is_whitespace(c) == true
-    log_one(' ')
-    log_one('[')
-    log_one(b)
-    log_one_nl(']')
-    return a || b
+    uint type_before = get_tok_type(tok`)
+    tok`.append(c)
+    uint type_after = get_tok_type(tok`)
+    tok`.pop()
+    bool a = true
+    return a
 ;/
 
 /; tokenize_file (~void file_in, file_out)
@@ -191,7 +171,6 @@ struct Token {
             break
         ;/
 
-        log_one_nl('b')
         
         /; if (buf == '#')
             /; loop (_read_byte(file_in, ~buf, ~read_count))
@@ -200,32 +179,26 @@ struct Token {
                 ;/
             ;/
 
-        ;; else if (break_token(tmp, buf) == true)
-            log_one_nl('c')
+        ;; else if (break_token(~tmp, buf) == true)
             /; if (tmp._len() > 0)
-                log_one_nl('d')
                 print_token(tmp, file_out)
             ;/
             tmp._del()
             tmp.start()
             /; if (is_whitespace(buf) == false)
-                log_one_nl('e')
                 tmp.append(buf)
             ;; else if (buf == WHITESPACE{2})
-                log_one_nl('f')
                 tmp.append(WHITESPACE{2})
                 print_token(tmp, file_out)
                 tmp._del()
                 tmp.start()
             ;/
-            log_one_nl('c')
 
         ;; else
             tmp.append(buf)
 
         ;/
 
-        log_one_nl('b')
         read_count = 0
     ;/
 
@@ -235,3 +208,90 @@ struct Token {
 
     tmp._del()
 ;/
+
+{}uint8 w_SEP = "SEPARATOR\n\0"
+{}uint8 w_DEL = "DELIMITER\n\0"
+{}uint8 w_AUG = "AUGMENT\n\0"
+{}uint8 w_KTP = "KEYTYPE\n\0"
+{}uint8 w_KWD = "KEYWORD\n\0"
+{}uint8 w_LIT = "LITERAL\n\0"
+{}uint8 w_DEF = "DEFWORD\n\0"
+
+/; print_tok_type(uint tt)
+
+    ~uint8 ptr = ~w_DEF{0}
+
+    /; if (tt == TOKEN_TYPE.SEPARATOR)
+        ptr = ~w_SEP{0}
+    ;; else if (tt == TOKEN_TYPE.DELIMITER)
+        ptr = ~w_DEL{0}
+    ;; else if (tt == TOKEN_TYPE.AUGMENT)
+        ptr = ~w_AUG{0}
+    ;; else if (tt == TOKEN_TYPE.KEYTYPE)
+        ptr = ~w_KTP{0}
+    ;; else if (tt == TOKEN_TYPE.KEYWORD)
+        ptr = ~w_KWD{0}
+    ;; else if (tt == TOKEN_TYPE.LITERAL)
+        ptr = ~w_LIT{0}
+    ;; else if (tt == TOKEN_TYPE.DEFWORD)
+        ptr = ~w_DEF{0}
+    ;/
+
+    _printf(ptr)
+
+;/
+
+{}uint8 test_multi = "/;\0"
+{}uint8 test_paren = "(\0"
+{}uint8 test_seps = ",\0"
+{}uint8 test_aug = ".\0"
+{}uint8 test_maug = "++\0"
+{}uint8 test_mkw = "if\0"
+{}uint8 test_mkt = "bool\0"
+{}uint8 test_def = "main\0"
+{}uint8 space = " \0"
+
+/; tests
+    Token tk
+
+    # Delimiter
+    tk.data = ~test_multi{0}
+    _printf(tk.data)
+    _printf(~space{0})
+    print_tok_type(get_tok_type(tk))
+
+    tk.data = ~test_paren{0}
+    _printf(tk.data)
+    _printf(~space{0})
+    print_tok_type(get_tok_type(tk))
+
+    tk.data = ~test_seps{0}
+    _printf(tk.data)
+    _printf(~space{0})
+    print_tok_type(get_tok_type(tk))
+
+    tk.data = ~test_aug{0}
+    _printf(tk.data)
+    _printf(~space{0})
+    print_tok_type(get_tok_type(tk))
+
+    tk.data = ~test_maug{0}
+    _printf(tk.data)
+    _printf(~space{0})
+    print_tok_type(get_tok_type(tk))
+
+    tk.data = ~test_mkw{0}
+    _printf(tk.data)
+    _printf(~space{0})
+    print_tok_type(get_tok_type(tk))
+
+    tk.data = ~test_mkt{0}
+    _printf(tk.data)
+    _printf(~space{0})
+    print_tok_type(get_tok_type(tk))
+
+    tk.data = ~test_def{0}
+    _printf(tk.data)
+    _printf(~space{0})
+    print_tok_type(get_tok_type(tk))
+;/
-- 
cgit v1.2.3