summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKyle Gunger <kgunger12@gmail.com>2023-08-06 03:00:18 -0400
committerKyle Gunger <kgunger12@gmail.com>2023-08-06 03:00:18 -0400
commitd3dfc56318829e212a87da6874011d8304be31b0 (patch)
tree3e62dd637c5aa64f3ba51c3c08f35e1363efad97
parent8535ee80ebccb81cebd61d1f33992aaf682e455a (diff)
Frame out port of compiler
-rw-r--r--tnslc/compiler.tnsl275
-rw-r--r--tnslc/logging.tnsl4
-rw-r--r--tnslc/tnslc_wrapped.tnsl6
-rw-r--r--tnslc/tokenizer.tnsl27
-rw-r--r--tnslc/utils.tnsl10
-rw-r--r--tnslc/vector.tnsl8
6 files changed, 317 insertions, 13 deletions
diff --git a/tnslc/compiler.tnsl b/tnslc/compiler.tnsl
new file mode 100644
index 0000000..ab43e6b
--- /dev/null
+++ b/tnslc/compiler.tnsl
@@ -0,0 +1,275 @@
+# Actual compilation of the vector of tokens, ported from the "dirty tnsl"
+# that was originally written for the interpreter
+
+# CompData represents three vectors:
+# hsec - the heading of the output assembly
+# dsec - the data tied to the assembly
+# csec - the .text section that is the code of the assembly
+struct CompData {
+ Vector
+ hsec,
+ dsec,
+ csec
+}
+
+/; method CompData
+ /; start
+ self.hsec.start(1)
+ self.dsec.start(1)
+ self.csec.start(1)
+ ;/
+
+ /; add (CompData c)
+ self.hsec.add(c.hsec)
+ self.dsec.add(c.dsec)
+ self.csec.add(c.csec)
+ ;/
+
+ /; _del
+ self.hsec._del()
+ self.dsec._del()
+ self.csec._del()
+ ;/
+;/
+
+# Path represents the actual path of a file
+# that we are trying to tokenize
+# Assumes that the last item in the path array is a file name
+struct Path {
+ int
+ path_count,
+ ~~uint8
+ split_path
+}
+
+/; method Path
+ /; start (~uint8 path)
+ self.split_path = _alloc(8)
+ self.split_path{0} = _alloc(1)
+ self.path_count = 1
+
+ int i = 0
+ int j = cstr_len(path)
+
+ /; loop (i < j) [i++]
+ /; if (path{i} == '\\' || path{i} == '/')
+
+ ;/
+ ;/
+ ;/
+
+ /; copy [Path]
+ Path out
+ out.start(self.full_path())
+ ;/
+
+ /; sub_folder (~uint8 sub)
+ ~uint8 file_name
+ ;/
+
+ /; relative_file(~uint8 rel_pth) [Path]
+
+ ;/
+
+ /; full_path [~uint8]
+ ~uint8 pth = _alloc(1)
+
+ /; loop (pth)
+
+ ;/
+ ;/
+
+ /; open_read [~void]
+ ~uint8 path = self.full_path()
+ ~void out = _open_file(path)
+ _delete(path)
+ return out
+ ;/
+
+ /; open_write [~void]
+ ~uint8 path = self.full_path()
+ ~void out = _create_file(path)
+ _delete(path)
+ return out
+ ;/
+
+ /; _del
+ /; loop (int i = 0; i < self.path_count) [i++]
+ _delete(self.split_path{i})
+ ;/
+
+ _delete(self.split_path)
+ ;/
+;/
+
+
+#
+# Actual compiler code -- here be dragons
+#
+
+# Types of pointers the compiler may generate or work with
+enum PTR_TYPE [uint] {
+ POINTER = 0,
+ REFERENCE = 1,
+ ARRAY = 2
+}
+
+# Represents a type of a variable
+struct Type {
+ int
+ s, # size of type (bytes)
+ p_ct, # ptr_chain count
+ m_ct, # member count
+ ~uint8
+ name, # name of the type
+ ~uint
+ ptr_chain, # for every pointer augment on the type, give it an extra PTR_TYPE in the chain
+ ~Variable
+ members, # member variables (their types and names)
+ ~Module
+ mod # the methods (if any) that are associated with this type
+}
+
+/; method Type
+ /; start
+
+ ;/
+
+ /; size [int]
+ /; loop (int i = 0; i < self.p_ct) [i++]
+ /; if (self.ptr_chain{i} == PTR_TYPE.REFERENCE)
+ return 8
+ ;/
+ ;/
+ return self.s
+ ;/
+
+ /; _del
+ _delete(self.name)
+ _delete(self.ptr_chain)
+ _delete(self.members)
+ _delete(self.mod)
+ ;/
+;/
+
+/; NO_TYPE [Type]
+ Type t
+ t.s = 0
+ t.name = 0
+ t.ptr_chain = 0
+ t.members = 0
+ t.mod = 0
+ return t
+;/
+
+/; is_primitive (~uint8 name)
+
+;/
+
+# Location type represents the type of memory
+# that the variable is stored in
+enum LOCATION [uint] {
+ REGISTER = 0,
+ STACK = 1,
+ LABEL = 2,
+ LITERAL = 3
+}
+
+# The Big Kahuna
+struct Variable {
+ ~uint8
+ name,
+ Type
+ _type,
+ uint
+ location, # Actual location. If literal, the literal value.
+ # If register, corrosponds to a register name.
+ # If stack, represents the offset from the base pointer
+ # Ignore if label.
+ loc_type
+}
+
+# I hate c-like strings. Hopefully once I'm done with this
+# language I'll have something that doesn't use them
+
+
+# This part sucks ass
+/; method Variable
+
+;/
+
+# Scopes
+struct Scope {
+ int
+ num,
+ c, # Figure it out yourself
+ tmp,
+ ~Scope
+ parent,
+ ~uint8
+ name
+}
+
+/; method Scope
+
+;/
+
+struct Function {
+ ~uint8
+ name,
+ ~Type
+ inputs,
+ Type
+ output,
+ ~Module mod
+}
+
+/; method Function
+
+;/
+
+struct Module {
+ ~Module
+ parent,
+ bool
+ exp, # Export functions or not
+ ~uint8
+ name,
+ ~Type
+ types, # Types defined in this module
+ ~Variable
+ defs, # Variables defined in this module
+ ~Function
+ functions, # Functions defined in this module
+ ~Module
+ sub # Sub modules
+}
+
+/; method Module
+
+;/
+
+#
+# Actual compiler functions
+#
+
+# Used in the first pass
+/; get_type_P1
+
+;/
+
+/; get_type_P2
+
+;/
+
+/; get_artifact [~~uint8]
+
+;/
+
+/; is_call [bool]
+
+;/
+
+/; is_definition
+
+;/ \ No newline at end of file
diff --git a/tnslc/logging.tnsl b/tnslc/logging.tnsl
index 6fac3a0..238f3be 100644
--- a/tnslc/logging.tnsl
+++ b/tnslc/logging.tnsl
@@ -64,6 +64,10 @@ int log_mode = 1
_printf(~_log_nl{0})
;/
+/; log_num (int i)
+ _print_num(~_dec{0}, i)
+;/
+
/; log_num_nl (int i)
_print_num(~_dec{0}, i)
_printf(~_log_nl{0})
diff --git a/tnslc/tnslc_wrapped.tnsl b/tnslc/tnslc_wrapped.tnsl
index d58c045..cf5b59b 100644
--- a/tnslc/tnslc_wrapped.tnsl
+++ b/tnslc/tnslc_wrapped.tnsl
@@ -3,6 +3,7 @@
:include "vector.tnsl"
:include "utils.tnsl"
:include "tokenizer.tnsl"
+# :include "compiler.tnsl"
/; main (int argc, ~~uint8 argv) [int]
asm "mov r8, rcx"
@@ -11,7 +12,10 @@
~void open_handle = _open_file(argv{1})
~void write_handle = _create_file(argv{2})
log_one_nl('a')
- tokenize_file(open_handle, write_handle)
+
+ Vector tokens = tokenize_file(open_handle, write_handle)
+ # TODO: do compilation
+
log_one_nl('a')
_close_file(open_handle)
_close_file(write_handle)
diff --git a/tnslc/tokenizer.tnsl b/tnslc/tokenizer.tnsl
index 3220313..08c0b52 100644
--- a/tnslc/tokenizer.tnsl
+++ b/tnslc/tokenizer.tnsl
@@ -241,7 +241,13 @@ struct Token {
return tmp
;/
-/; tokenize_file (~void file_in, file_out)
+{}uint8 w_tkn_gen = "%d Tokens generated from file.\n\0"
+
+/; tokenize_file (~void file_in, file_out) [Vector]
+ # This vector is going to store all of our tokens as we generate them
+ Vector out_vect
+ # The size of a token struct is 3 uint + pointer = 4*8 = 32 bytes
+ out_vect.start(32)
Token tmp
tmp.start()
@@ -271,16 +277,14 @@ struct Token {
# Don't rope the last token into this
/; if (tmp._len() > 0)
tmp._type = get_tok_type(tmp)
- print_token(tmp, file_out)
- tmp._del()
+ out_vect.push(~tmp)
tmp.start()
;/
# Handle char/string literal
tmp = handle_str(file_in, tmp, ~line, ~column, buf)
- print_token(tmp, file_out)
- tmp._del()
+ out_vect.push(~tmp)
tmp.start()
tmp.line = line
tmp.column = column
@@ -290,10 +294,10 @@ struct Token {
# Handle token break
/; if (tmp._len() > 0)
tmp._type = get_tok_type(tmp)
- print_token(tmp, file_out)
+ out_vect.push(~tmp)
+ tmp.start()
;/
- tmp._del()
- tmp.start()
+
tmp.line = line
tmp.column = column
/; if (is_whitespace(buf) == false)
@@ -316,10 +320,13 @@ struct Token {
;/
/; if (tmp._len() > 0)
- print_token(tmp, file_out)
+ tmp._type = get_tok_type(tmp)
+ out_vect.push(~tmp)
;/
- tmp._del()
+ _print_num(~w_tkn_gen{0}, out_vect._len())
+
+ return out_vect
;/
{}uint8 w_SEP = "SEPARATOR\0"
diff --git a/tnslc/utils.tnsl b/tnslc/utils.tnsl
index 9fe113a..54f01aa 100644
--- a/tnslc/utils.tnsl
+++ b/tnslc/utils.tnsl
@@ -71,6 +71,16 @@
return false
;/
+/; cstr_contains (~uint8 cstr, uint8 c) [bool]
+ int j = cstr_len(cstr)
+ /; loop (int i = 0; i < j) [i++]
+ /; if (cstr{i} == c)
+ return true
+ ;/
+ ;/
+ return false
+;/
+
/; write_to_file(~void file, ~uint8 string)
int ln = cstr_len(string)
/; loop (int i = 0; i < ln) [i++]
diff --git a/tnslc/vector.tnsl b/tnslc/vector.tnsl
index d21f83c..87a49e9 100644
--- a/tnslc/vector.tnsl
+++ b/tnslc/vector.tnsl
@@ -10,9 +10,7 @@ struct Vector {
/; resize (uint num_el)
self.dat_size = num_el
- _print_num(~_ptr{0}, ~self.dat)
self.dat = _realloc(self.dat, num_el * self.el_size)
- _print_num(~_ptr{0}, ~self.dat)
;/
/; get (uint i) [~uint8]
@@ -66,6 +64,12 @@ struct Vector {
self.set(i, v.dat + i * self.el_size)
;/
;/
+
+ /; add (Vector v)
+ /; loop (int i = 0; i < v._len()) [i++]
+ self.push(v.get(i))
+ ;/
+ ;/
/; start (int el_size)
self.num_el = 0