From d3dfc56318829e212a87da6874011d8304be31b0 Mon Sep 17 00:00:00 2001 From: Kyle Gunger Date: Sun, 6 Aug 2023 03:00:18 -0400 Subject: Frame out port of compiler --- tnslc/compiler.tnsl | 275 +++++++++++++++++++++++++++++++++++++++++++++++ tnslc/logging.tnsl | 4 + tnslc/tnslc_wrapped.tnsl | 6 +- tnslc/tokenizer.tnsl | 27 +++-- tnslc/utils.tnsl | 10 ++ tnslc/vector.tnsl | 8 +- 6 files changed, 317 insertions(+), 13 deletions(-) create mode 100644 tnslc/compiler.tnsl diff --git a/tnslc/compiler.tnsl b/tnslc/compiler.tnsl new file mode 100644 index 0000000..ab43e6b --- /dev/null +++ b/tnslc/compiler.tnsl @@ -0,0 +1,275 @@ +# Actual compilation of the vector of tokens, ported from the "dirty tnsl" +# that was originally written for the interpreter + +# CompData represents three vectors: +# hsec - the heading of the output assembly +# dsec - the data tied to the assembly +# csec - the .text section that is the code of the assembly +struct CompData { + Vector + hsec, + dsec, + csec +} + +/; method CompData + /; start + self.hsec.start(1) + self.dsec.start(1) + self.csec.start(1) + ;/ + + /; add (CompData c) + self.hsec.add(c.hsec) + self.dsec.add(c.dsec) + self.csec.add(c.csec) + ;/ + + /; _del + self.hsec._del() + self.dsec._del() + self.csec._del() + ;/ +;/ + +# Path represents the actual path of a file +# that we are trying to tokenize +# Assumes that the last item in the path array is a file name +struct Path { + int + path_count, + ~~uint8 + split_path +} + +/; method Path + /; start (~uint8 path) + self.split_path = _alloc(8) + self.split_path{0} = _alloc(1) + self.path_count = 1 + + int i = 0 + int j = cstr_len(path) + + /; loop (i < j) [i++] + /; if (path{i} == '\\' || path{i} == '/') + + ;/ + ;/ + ;/ + + /; copy [Path] + Path out + out.start(self.full_path()) + ;/ + + /; sub_folder (~uint8 sub) + ~uint8 file_name + ;/ + + /; relative_file(~uint8 rel_pth) [Path] + + ;/ + + /; full_path [~uint8] + ~uint8 pth = _alloc(1) + + /; loop (pth) + + ;/ + ;/ + + /; open_read [~void] + ~uint8 path = self.full_path() + ~void out = _open_file(path) + _delete(path) + return out + ;/ + + /; open_write [~void] + ~uint8 path = self.full_path() + ~void out = _create_file(path) + _delete(path) + return out + ;/ + + /; _del + /; loop (int i = 0; i < self.path_count) [i++] + _delete(self.split_path{i}) + ;/ + + _delete(self.split_path) + ;/ +;/ + + +# +# Actual compiler code -- here be dragons +# + +# Types of pointers the compiler may generate or work with +enum PTR_TYPE [uint] { + POINTER = 0, + REFERENCE = 1, + ARRAY = 2 +} + +# Represents a type of a variable +struct Type { + int + s, # size of type (bytes) + p_ct, # ptr_chain count + m_ct, # member count + ~uint8 + name, # name of the type + ~uint + ptr_chain, # for every pointer augment on the type, give it an extra PTR_TYPE in the chain + ~Variable + members, # member variables (their types and names) + ~Module + mod # the methods (if any) that are associated with this type +} + +/; method Type + /; start + + ;/ + + /; size [int] + /; loop (int i = 0; i < self.p_ct) [i++] + /; if (self.ptr_chain{i} == PTR_TYPE.REFERENCE) + return 8 + ;/ + ;/ + return self.s + ;/ + + /; _del + _delete(self.name) + _delete(self.ptr_chain) + _delete(self.members) + _delete(self.mod) + ;/ +;/ + +/; NO_TYPE [Type] + Type t + t.s = 0 + t.name = 0 + t.ptr_chain = 0 + t.members = 0 + t.mod = 0 + return t +;/ + +/; is_primitive (~uint8 name) + +;/ + +# Location type represents the type of memory +# that the variable is stored in +enum LOCATION [uint] { + REGISTER = 0, + STACK = 1, + LABEL = 2, + LITERAL = 3 +} + +# The Big Kahuna +struct Variable { + ~uint8 + name, + Type + _type, + uint + location, # Actual location. If literal, the literal value. + # If register, corrosponds to a register name. + # If stack, represents the offset from the base pointer + # Ignore if label. + loc_type +} + +# I hate c-like strings. Hopefully once I'm done with this +# language I'll have something that doesn't use them + + +# This part sucks ass +/; method Variable + +;/ + +# Scopes +struct Scope { + int + num, + c, # Figure it out yourself + tmp, + ~Scope + parent, + ~uint8 + name +} + +/; method Scope + +;/ + +struct Function { + ~uint8 + name, + ~Type + inputs, + Type + output, + ~Module mod +} + +/; method Function + +;/ + +struct Module { + ~Module + parent, + bool + exp, # Export functions or not + ~uint8 + name, + ~Type + types, # Types defined in this module + ~Variable + defs, # Variables defined in this module + ~Function + functions, # Functions defined in this module + ~Module + sub # Sub modules +} + +/; method Module + +;/ + +# +# Actual compiler functions +# + +# Used in the first pass +/; get_type_P1 + +;/ + +/; get_type_P2 + +;/ + +/; get_artifact [~~uint8] + +;/ + +/; is_call [bool] + +;/ + +/; is_definition + +;/ \ No newline at end of file diff --git a/tnslc/logging.tnsl b/tnslc/logging.tnsl index 6fac3a0..238f3be 100644 --- a/tnslc/logging.tnsl +++ b/tnslc/logging.tnsl @@ -64,6 +64,10 @@ int log_mode = 1 _printf(~_log_nl{0}) ;/ +/; log_num (int i) + _print_num(~_dec{0}, i) +;/ + /; log_num_nl (int i) _print_num(~_dec{0}, i) _printf(~_log_nl{0}) diff --git a/tnslc/tnslc_wrapped.tnsl b/tnslc/tnslc_wrapped.tnsl index d58c045..cf5b59b 100644 --- a/tnslc/tnslc_wrapped.tnsl +++ b/tnslc/tnslc_wrapped.tnsl @@ -3,6 +3,7 @@ :include "vector.tnsl" :include "utils.tnsl" :include "tokenizer.tnsl" +# :include "compiler.tnsl" /; main (int argc, ~~uint8 argv) [int] asm "mov r8, rcx" @@ -11,7 +12,10 @@ ~void open_handle = _open_file(argv{1}) ~void write_handle = _create_file(argv{2}) log_one_nl('a') - tokenize_file(open_handle, write_handle) + + Vector tokens = tokenize_file(open_handle, write_handle) + # TODO: do compilation + log_one_nl('a') _close_file(open_handle) _close_file(write_handle) diff --git a/tnslc/tokenizer.tnsl b/tnslc/tokenizer.tnsl index 3220313..08c0b52 100644 --- a/tnslc/tokenizer.tnsl +++ b/tnslc/tokenizer.tnsl @@ -241,7 +241,13 @@ struct Token { return tmp ;/ -/; tokenize_file (~void file_in, file_out) +{}uint8 w_tkn_gen = "%d Tokens generated from file.\n\0" + +/; tokenize_file (~void file_in, file_out) [Vector] + # This vector is going to store all of our tokens as we generate them + Vector out_vect + # The size of a token struct is 3 uint + pointer = 4*8 = 32 bytes + out_vect.start(32) Token tmp tmp.start() @@ -271,16 +277,14 @@ struct Token { # Don't rope the last token into this /; if (tmp._len() > 0) tmp._type = get_tok_type(tmp) - print_token(tmp, file_out) - tmp._del() + out_vect.push(~tmp) tmp.start() ;/ # Handle char/string literal tmp = handle_str(file_in, tmp, ~line, ~column, buf) - print_token(tmp, file_out) - tmp._del() + out_vect.push(~tmp) tmp.start() tmp.line = line tmp.column = column @@ -290,10 +294,10 @@ struct Token { # Handle token break /; if (tmp._len() > 0) tmp._type = get_tok_type(tmp) - print_token(tmp, file_out) + out_vect.push(~tmp) + tmp.start() ;/ - tmp._del() - tmp.start() + tmp.line = line tmp.column = column /; if (is_whitespace(buf) == false) @@ -316,10 +320,13 @@ struct Token { ;/ /; if (tmp._len() > 0) - print_token(tmp, file_out) + tmp._type = get_tok_type(tmp) + out_vect.push(~tmp) ;/ - tmp._del() + _print_num(~w_tkn_gen{0}, out_vect._len()) + + return out_vect ;/ {}uint8 w_SEP = "SEPARATOR\0" diff --git a/tnslc/utils.tnsl b/tnslc/utils.tnsl index 9fe113a..54f01aa 100644 --- a/tnslc/utils.tnsl +++ b/tnslc/utils.tnsl @@ -71,6 +71,16 @@ return false ;/ +/; cstr_contains (~uint8 cstr, uint8 c) [bool] + int j = cstr_len(cstr) + /; loop (int i = 0; i < j) [i++] + /; if (cstr{i} == c) + return true + ;/ + ;/ + return false +;/ + /; write_to_file(~void file, ~uint8 string) int ln = cstr_len(string) /; loop (int i = 0; i < ln) [i++] diff --git a/tnslc/vector.tnsl b/tnslc/vector.tnsl index d21f83c..87a49e9 100644 --- a/tnslc/vector.tnsl +++ b/tnslc/vector.tnsl @@ -10,9 +10,7 @@ struct Vector { /; resize (uint num_el) self.dat_size = num_el - _print_num(~_ptr{0}, ~self.dat) self.dat = _realloc(self.dat, num_el * self.el_size) - _print_num(~_ptr{0}, ~self.dat) ;/ /; get (uint i) [~uint8] @@ -66,6 +64,12 @@ struct Vector { self.set(i, v.dat + i * self.el_size) ;/ ;/ + + /; add (Vector v) + /; loop (int i = 0; i < v._len()) [i++] + self.push(v.get(i)) + ;/ + ;/ /; start (int el_size) self.num_el = 0 -- cgit v1.2.3