From f3a773c750181f8f3d52fd672587814275a04bd7 Mon Sep 17 00:00:00 2001 From: Kyle Gunger Date: Mon, 25 Mar 2024 22:39:06 -0400 Subject: Remove old parser files --- tnslc/tokenizer.tnsl | 364 --------------------------------------------------- 1 file changed, 364 deletions(-) delete mode 100644 tnslc/tokenizer.tnsl (limited to 'tnslc/tokenizer.tnsl') diff --git a/tnslc/tokenizer.tnsl b/tnslc/tokenizer.tnsl deleted file mode 100644 index 7a15ade..0000000 --- a/tnslc/tokenizer.tnsl +++ /dev/null @@ -1,364 +0,0 @@ -# All single reserved characters -{}uint8 MULTI_PARENS = "/;:#" -{}uint8 PARENS = "()[]{}" -{}uint8 SEPS = "\n;:," -{}uint8 RESERVED = "`~!%^&*()-+=[]{}|;:/?<>.," -{}uint8 AUGMENTS = "=~!<>&|^+-*/%`." - -{}uint8 WHITESPACE = " \r\n\t" - -# All lists of keywords are comma delim because the compiler does not yet support arrays of strings -{}uint8 CSV_AUGMENTS = "++,--,==,!==,&&,||,^^,<==,>==,!>,!<,~=,`=,%=,^=,&=,*=,!=,|=,/=,<<,>>,!&,!|,!^,len,is" - -{}uint8 CSV_KEYWORDS = "if,else,loop,continue,break,return,method,struct,enum,interface,export,module,const,static,volatile,raw,extends,override,asm" - -{}uint8 CSV_KEYTYPES = "uint8,uint16,uint32,uint64,uint,int8,int16,int32,int64,int,float32,float64,float,comp32,comp64,comp,vect,bool,type,void" - -# Types of tokens -enum TOKEN_TYPE [uint] { - SEPARATOR = 0, - DELIMITER = 1, - AUGMENT = 2, - KEYTYPE = 3, - KEYWORD = 4, - LITERAL = 5, - DEFWORD = 6 -} - -# Token structure represents a single token in the program -struct Token { - uint - _type, - line, - column, - ~uint8 - data -} - -# Shortcut methods on the token struct -/; method Token - - # Initialize the data buffer - /; start - self.data = _alloc(1) - self.data{0} = 0 - ;/ - - # Append a character to the end of the token - /; append (uint8 ch) - int ln = cstr_len(self.data) - self.data = _realloc(self.data, ln + 2) - self.data{ln} = ch - self.data{ln + 1} = 0 - ;/ - - # Remove the last character from this token - /; pop - int ln = cstr_len(self.data) - self.data{ln - 1} = 0 - ;/ - - # Copy another token to this token - /; copy (Token other) - self._type = other._type - self.line = other.line - self.column = other.column - - self.data = _alloc(cstr_len(other.data) + 1) - - cstr_copy(other.data, self.data) - ;/ - - # Delete the memory associated with this token - /; _del - _realloc(self.data, 0) - ;/ - - # length of the string that this token encodes - /; _len [int] - return cstr_len(self.data) - ;/ -;/ - -{}uint8 tkn_st = "{ \0", tkn_nd = " }\n\0", tkn_sp = " \0" - -/; print_token (Token tok, ~void file_out) - write_to_file(file_out, ~tkn_st{0}) - - write_to_file(file_out, print_tok_type(tok)) - write_to_file(file_out, ~tkn_sp{0}) - write_to_file(file_out, tok.data) - - write_to_file(file_out, ~tkn_nd{0}) -;/ - -# Returns true if the character is whitespace -/; is_whitespace(uint8 c) [bool] - return contains_char(~WHITESPACE, c) -;/ - - -# Returns true if the character is reserved -/; is_reserved (uint8 c) [bool] - return contains_char(~RESERVED, c) -;/ - -# Returns true if the token is a valid reserved token -/; tok_reserved (Token tok) [bool] - /; if (tok._len() == 1) - return is_reserved(tok.data{0}) - ;; else if (tok._len() == 2) - /; if (contains_char(~MULTI_PARENS, tok.data{0}) && contains_char(~MULTI_PARENS, tok.data{1})) - return true - ;/ - ;/ - - return in_csv(~CSV_AUGMENTS, tok.data) !< 0 -;/ - -# True if the token is a valid number (integer or float) -/; is_numeric_literal(Token tok) [bool] - /; if (tok._len() < 1) - return false - ;; else if (tok.data{0} < '0' || tok.data{0} > '9') - return false - ;/ - - bool non_dec = false - /; if (tok._len() > 1 && tok.data{0} == '0') - non_dec = tok.data{1} > '9' - # TODO: non_dec not impl - ;/ - - bool dec_seen = false - - /; loop (int i = 0; i < tok._len()) [i++] - /; if (dec_seen == false && tok.data{i} == '.') - dec_seen = true - ;; else if (tok.data{i} < '0' || tok.data{i} > '9') - return false - ;/ - ;/ - - return true -;/ - -/; get_tok_type(Token tok) [uint] - /; if (tok_reserved(tok) == true) - /; if (tok._len() > 1) - /; if (contains_char(~MULTI_PARENS, tok.data{0}) && contains_char(~MULTI_PARENS, tok.data{1})) - return TOKEN_TYPE.DELIMITER - ;/ - return TOKEN_TYPE.AUGMENT - ;; else if (contains_char(~PARENS, tok.data{0}) == true) - return TOKEN_TYPE.DELIMITER - ;; else if (contains_char(~SEPS, tok.data{0}) == true) - return TOKEN_TYPE.SEPARATOR - ;; else if (contains_char(~AUGMENTS, tok.data{0}) == true) - return TOKEN_TYPE.AUGMENT - ;/ - ;; else if (in_csv(~CSV_KEYWORDS, tok.data) !< 0) - return TOKEN_TYPE.KEYWORD - ;; else if (in_csv(~CSV_KEYTYPES, tok.data) !< 0) - return TOKEN_TYPE.KEYTYPE - ;; else if (is_numeric_literal(tok) == true) - return TOKEN_TYPE.LITERAL - ;/ - - return TOKEN_TYPE.DEFWORD -;/ - - -/; break_token(~Token tok, uint8 c) [bool] - uint type_before = get_tok_type(tok`) - tok`.append(c) - uint type_after = get_tok_type(tok`) - tok`.pop() - - bool a = is_whitespace(c) && type_after !== TOKEN_TYPE.LITERAL - - bool b = false - /; if (is_reserved(c) == true) - b = type_after == TOKEN_TYPE.DEFWORD - ;; else if (tok`._len() > 0) - b = is_reserved(tok`.data{0}) - ;/ - - bool c = type_before == TOKEN_TYPE.LITERAL && type_after == TOKEN_TYPE.DEFWORD - - return a || b || c -;/ - -/; handle_comment (~void file_in) - uint8 buf = 0 - int read_count = 0 - /; loop - read_count = _read_byte(file_in, ~buf) - /; if (buf == '\n' || read_count == 0) - break - ;/ - read_count = 0 - ;/ -;/ - -/; handle_str (~void file_in, Token tmp, ~int line, column, uint8 first) [Token] - uint8 buf = first - int read_count = 0 - tmp._type = TOKEN_TYPE.LITERAL - tmp.append(buf) - read_count = 0 - - /; loop - read_count = _read_byte(file_in, ~buf) - - /; if (read_count == 0) - break - ;/ - - /; if (buf == '\\') - tmp.append(buf) - read_count = _read_byte(file_in, ~buf) - column`++ - /; if (read_count !== 0) - tmp.append(buf) - ;/ - ;; else if (buf == first) - tmp.append(buf) - break - ;; else - tmp.append(buf) - ;/ - - /; if (buf == '\n') - line`++ - column` = 1 - ;; else - column`++ - ;/ - - read_count = 0 - ;/ - - return tmp -;/ - -{}uint8 w_tkn_gen = "%d Tokens generated from file.\n\0" - -/; tokenize_file (~void file_in) [Vector] - # This vector is going to store all of our tokens as we generate them - Vector out_vect - # The size of a token struct is 3 uint + pointer = 4*8 = 32 bytes - out_vect.start(32) - - Token tmp - tmp.start() - tmp.line = 1 - tmp.column = 1 - - uint8 buf = 0 - int read_count = 0 - int line = 1 - int column = 1 - - # Read loop. - /; loop [column++] - read_count = _read_byte(file_in, ~buf) - /; if (read_count == 0) - break - ;/ - - /; if (buf == '#') - - # Handle comment - handle_comment(file_in) - line++ - - ;; else if (buf == '\'' || buf == '"') - - # Don't rope the last token into this - /; if (tmp._len() > 0) - tmp._type = get_tok_type(tmp) - out_vect.push(~tmp) - tmp.start() - ;/ - - # Handle char/string literal - tmp = handle_str(file_in, tmp, ~line, ~column, buf) - - out_vect.push(~tmp) - tmp.start() - tmp.line = line - tmp.column = column - - ;; else if (break_token(~tmp, buf) == true) - - # Handle token break - /; if (tmp._len() > 0) - tmp._type = get_tok_type(tmp) - out_vect.push(~tmp) - tmp.start() - ;/ - - tmp.line = line - tmp.column = column - /; if (is_whitespace(buf) == false) - tmp.append(buf) - ;/ - - ;; else if (is_whitespace(buf) == false) - - # Add non-whitespace - tmp.append(buf) - - ;/ - - /; if (buf == '\n') - line++ - column = 0 - ;/ - - read_count = 0 - ;/ - - /; if (tmp._len() > 0) - tmp._type = get_tok_type(tmp) - out_vect.push(~tmp) - ;/ - - _print_num(~w_tkn_gen{0}, out_vect._len()) - - return out_vect -;/ - -{}uint8 w_SEP = "SEPARATOR\0" -{}uint8 w_DEL = "DELIMITER\0" -{}uint8 w_AUG = "AUGMENT\0" -{}uint8 w_KTP = "KEYTYPE\0" -{}uint8 w_KWD = "KEYWORD\0" -{}uint8 w_LIT = "LITERAL\0" -{}uint8 w_DEF = "DEFWORD\0" - -/; print_tok_type(uint tt) [~uint8] - - ~uint8 ptr = ~w_DEF{0} - - /; if (tt == TOKEN_TYPE.SEPARATOR) - ptr = ~w_SEP{0} - ;; else if (tt == TOKEN_TYPE.DELIMITER) - ptr = ~w_DEL{0} - ;; else if (tt == TOKEN_TYPE.AUGMENT) - ptr = ~w_AUG{0} - ;; else if (tt == TOKEN_TYPE.KEYTYPE) - ptr = ~w_KTP{0} - ;; else if (tt == TOKEN_TYPE.KEYWORD) - ptr = ~w_KWD{0} - ;; else if (tt == TOKEN_TYPE.LITERAL) - ptr = ~w_LIT{0} - ;; else if (tt == TOKEN_TYPE.DEFWORD) - ptr = ~w_DEF{0} - ;/ - - return ptr - -;/ - -- cgit v1.2.3