Frame out port of compiler

author: Kyle Gunger <kgunger12@gmail.com> 2023-08-06 03:00:18 -0400
committer: Kyle Gunger <kgunger12@gmail.com> 2023-08-06 03:00:18 -0400
commit: d3dfc56318829e212a87da6874011d8304be31b0 (patch)
tree: 3e62dd637c5aa64f3ba51c3c08f35e1363efad97 /tnslc
parent: 8535ee80ebccb81cebd61d1f33992aaf682e455a (diff)
6 files changed, 317 insertions, 13 deletions
diff --git a/tnslc/compiler.tnsl b/tnslc/compiler.tnsl
new file mode 100644
index 0000000..ab43e6b
--- /dev/null
+++ b/tnslc/compiler.tnsl
@@ -0,0 +1,275 @@
+# Actual compilation of the vector of tokens, ported from the "dirty tnsl"
+# that was originally written for the interpreter
+
+# CompData represents three vectors:
+# hsec - the heading of the output assembly
+# dsec - the data tied to the assembly
+# csec - the .text section that is the code of the assembly
+struct CompData {
+    Vector
+        hsec,
+        dsec,
+        csec
+}
+
+/; method CompData
+    /; start
+        self.hsec.start(1)
+        self.dsec.start(1)
+        self.csec.start(1)
+    ;/
+
+    /; add (CompData c)
+        self.hsec.add(c.hsec)
+        self.dsec.add(c.dsec)
+        self.csec.add(c.csec)
+    ;/
+
+    /; _del
+        self.hsec._del()
+        self.dsec._del()
+        self.csec._del()
+    ;/
+;/
+
+# Path represents the actual path of a file
+# that we are trying to tokenize
+# Assumes that the last item in the path array is a file name
+struct Path {
+    int 
+        path_count,
+    ~~uint8
+        split_path
+}
+
+/; method Path
+    /; start (~uint8 path)
+        self.split_path = _alloc(8)
+        self.split_path{0} = _alloc(1)
+        self.path_count = 1
+        
+        int i = 0
+        int j = cstr_len(path)
+
+        /; loop (i < j) [i++]
+            /; if (path{i} == '\\' || path{i} == '/')
+
+            ;/
+        ;/ 
+    ;/
+
+    /; copy [Path]
+        Path out
+        out.start(self.full_path())
+    ;/
+
+    /; sub_folder (~uint8 sub)
+        ~uint8 file_name
+    ;/
+    
+    /; relative_file(~uint8 rel_pth) [Path]
+
+    ;/
+
+    /; full_path [~uint8]
+        ~uint8 pth = _alloc(1)
+
+        /; loop (pth)
+
+        ;/
+    ;/
+
+    /; open_read [~void]
+        ~uint8 path = self.full_path()
+        ~void out = _open_file(path)
+        _delete(path)
+        return out
+    ;/
+
+    /; open_write [~void]
+        ~uint8 path = self.full_path()
+        ~void out = _create_file(path)
+        _delete(path)
+        return out
+    ;/
+
+    /; _del
+        /; loop (int i = 0; i < self.path_count) [i++]
+            _delete(self.split_path{i})
+        ;/
+
+        _delete(self.split_path)
+    ;/
+;/
+
+
+#
+# Actual compiler code -- here be dragons
+#
+
+# Types of pointers the compiler may generate or work with
+enum PTR_TYPE [uint] {
+    POINTER = 0,
+    REFERENCE = 1,
+    ARRAY = 2
+}
+
+# Represents a type of a variable
+struct Type {
+    int
+        s,          # size of type (bytes)
+        p_ct,       # ptr_chain count
+        m_ct,       # member count
+    ~uint8
+        name,       # name of the type
+    ~uint
+        ptr_chain,  # for every pointer augment on the type, give it an extra PTR_TYPE in the chain
+    ~Variable
+        members,    # member variables (their types and names)
+    ~Module
+        mod         # the methods (if any) that are associated with this type
+}
+
+/; method Type
+    /; start
+
+    ;/
+
+    /; size [int]
+        /; loop (int i = 0; i < self.p_ct) [i++]
+            /; if (self.ptr_chain{i} == PTR_TYPE.REFERENCE)
+                return 8
+            ;/
+        ;/
+        return self.s
+    ;/
+
+    /; _del
+        _delete(self.name)
+        _delete(self.ptr_chain)
+        _delete(self.members)
+        _delete(self.mod)
+    ;/
+;/
+
+/; NO_TYPE [Type]
+    Type t
+    t.s = 0
+    t.name = 0
+    t.ptr_chain = 0
+    t.members = 0
+    t.mod = 0
+    return t
+;/
+
+/; is_primitive (~uint8 name)
+
+;/
+
+# Location type represents the type of memory
+# that the variable is stored in
+enum LOCATION [uint] {
+    REGISTER = 0,
+    STACK = 1,
+    LABEL = 2,
+    LITERAL = 3
+}
+
+# The Big Kahuna
+struct Variable {
+    ~uint8
+        name,
+    Type
+        _type,
+    uint
+        location, # Actual location.  If literal, the literal value. 
+                  # If register, corrosponds to a register name.
+                  # If stack, represents the offset from the base pointer
+                  # Ignore if label.
+        loc_type
+}
+
+# I hate c-like strings.  Hopefully once I'm done with this
+# language I'll have something that doesn't use them
+
+
+# This part sucks ass
+/; method Variable
+
+;/
+
+# Scopes
+struct Scope {
+    int
+        num,
+        c,    # Figure it out yourself
+        tmp,
+    ~Scope
+        parent,
+    ~uint8
+        name
+}
+
+/; method Scope
+
+;/
+
+struct Function {
+    ~uint8
+        name,
+    ~Type 
+        inputs,
+    Type
+        output,
+    ~Module mod
+}
+
+/; method Function
+
+;/
+
+struct Module {
+    ~Module
+        parent,
+    bool
+        exp,       # Export functions or not
+    ~uint8
+        name,
+    ~Type
+        types,     # Types defined in this module
+    ~Variable
+        defs,      # Variables defined in this module
+    ~Function
+        functions, # Functions defined in this module
+    ~Module
+        sub        # Sub modules
+}
+
+/; method Module
+
+;/
+
+#
+# Actual compiler functions
+#
+
+# Used in the first pass
+/; get_type_P1
+
+;/
+
+/; get_type_P2
+
+;/
+
+/; get_artifact [~~uint8]
+
+;/
+
+/; is_call [bool]
+
+;/
+
+/; is_definition
+
+;/
+\ No newline at end of file
diff --git a/tnslc/logging.tnsl b/tnslc/logging.tnsl
index 6fac3a0..238f3be 100644
--- a/tnslc/logging.tnsl
+++ b/tnslc/logging.tnsl
@@ -64,6 +64,10 @@ int log_mode = 1
     _printf(~_log_nl{0})
 ;/
 
+/; log_num (int i)
+    _print_num(~_dec{0}, i)
+;/
+
 /; log_num_nl (int i)
     _print_num(~_dec{0}, i)
     _printf(~_log_nl{0})
diff --git a/tnslc/tnslc_wrapped.tnsl b/tnslc/tnslc_wrapped.tnsl
index d58c045..cf5b59b 100644
--- a/tnslc/tnslc_wrapped.tnsl
+++ b/tnslc/tnslc_wrapped.tnsl
@@ -3,6 +3,7 @@
 :include "vector.tnsl"
 :include "utils.tnsl"
 :include "tokenizer.tnsl"
+# :include "compiler.tnsl"
 
 /; main (int argc, ~~uint8 argv) [int]
     asm "mov r8, rcx"
@@ -11,7 +12,10 @@
     ~void open_handle = _open_file(argv{1})
     ~void write_handle = _create_file(argv{2})
     log_one_nl('a')
-    tokenize_file(open_handle, write_handle)
+
+    Vector tokens = tokenize_file(open_handle, write_handle)
+    # TODO: do compilation
+
     log_one_nl('a')
     _close_file(open_handle)
     _close_file(write_handle)
diff --git a/tnslc/tokenizer.tnsl b/tnslc/tokenizer.tnsl
index 3220313..08c0b52 100644
--- a/tnslc/tokenizer.tnsl
+++ b/tnslc/tokenizer.tnsl
@@ -241,7 +241,13 @@ struct Token {
     return tmp
 ;/
 
-/; tokenize_file (~void file_in, file_out)
+{}uint8 w_tkn_gen = "%d Tokens generated from file.\n\0"
+
+/; tokenize_file (~void file_in, file_out) [Vector]
+    # This vector is going to store all of our tokens as we generate them
+    Vector out_vect
+    # The size of a token struct is 3 uint + pointer = 4*8 = 32 bytes
+    out_vect.start(32)
 
     Token tmp
     tmp.start()
@@ -271,16 +277,14 @@ struct Token {
             # Don't rope the last token into this
             /; if (tmp._len() > 0)
                 tmp._type = get_tok_type(tmp)
-                print_token(tmp, file_out)
-                tmp._del()
+                out_vect.push(~tmp)
                 tmp.start()
             ;/
 
             # Handle char/string literal
             tmp = handle_str(file_in, tmp, ~line, ~column, buf)
 
-            print_token(tmp, file_out)
-            tmp._del()
+            out_vect.push(~tmp)
             tmp.start()
             tmp.line = line
             tmp.column = column
@@ -290,10 +294,10 @@ struct Token {
             # Handle token break
             /; if (tmp._len() > 0)
                 tmp._type = get_tok_type(tmp)
-                print_token(tmp, file_out)
+                out_vect.push(~tmp)
+                tmp.start()
             ;/
-            tmp._del()
-            tmp.start()
+
             tmp.line = line
             tmp.column = column
             /; if (is_whitespace(buf) == false)
@@ -316,10 +320,13 @@ struct Token {
     ;/
 
     /; if (tmp._len() > 0)
-        print_token(tmp, file_out)
+        tmp._type = get_tok_type(tmp)
+        out_vect.push(~tmp)
     ;/
 
-    tmp._del()
+    _print_num(~w_tkn_gen{0}, out_vect._len())
+    
+    return out_vect
 ;/
 
 {}uint8 w_SEP = "SEPARATOR\0"
diff --git a/tnslc/utils.tnsl b/tnslc/utils.tnsl
index 9fe113a..54f01aa 100644
--- a/tnslc/utils.tnsl
+++ b/tnslc/utils.tnsl
@@ -71,6 +71,16 @@
     return false
 ;/
 
+/; cstr_contains (~uint8 cstr, uint8 c) [bool]
+    int j = cstr_len(cstr)
+    /; loop (int i = 0; i < j) [i++]
+        /; if (cstr{i} == c)
+            return true
+        ;/
+    ;/
+    return false
+;/
+
 /; write_to_file(~void file, ~uint8 string)
     int ln = cstr_len(string)
     /; loop (int i = 0; i < ln) [i++]
diff --git a/tnslc/vector.tnsl b/tnslc/vector.tnsl
index d21f83c..87a49e9 100644
--- a/tnslc/vector.tnsl
+++ b/tnslc/vector.tnsl
@@ -10,9 +10,7 @@ struct Vector {
 
     /; resize (uint num_el)
         self.dat_size = num_el
-        _print_num(~_ptr{0}, ~self.dat)
         self.dat = _realloc(self.dat, num_el * self.el_size)
-        _print_num(~_ptr{0}, ~self.dat)
     ;/
 
     /; get (uint i) [~uint8]
@@ -66,6 +64,12 @@ struct Vector {
             self.set(i, v.dat + i * self.el_size)
         ;/
     ;/
+    
+    /; add (Vector v)
+        /; loop (int i = 0; i < v._len()) [i++]
+            self.push(v.get(i))
+        ;/
+    ;/
 
     /; start (int el_size)
         self.num_el = 0
author	Kyle Gunger <kgunger12@gmail.com>	2023-08-06 03:00:18 -0400
committer	Kyle Gunger <kgunger12@gmail.com>	2023-08-06 03:00:18 -0400
commit	d3dfc56318829e212a87da6874011d8304be31b0 (patch)
tree	3e62dd637c5aa64f3ba51c3c08f35e1363efad97 /tnslc
parent	8535ee80ebccb81cebd61d1f33992aaf682e455a (diff)